bsd/kern/uipc_usrreq.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
  61  */
  62 /*
  63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  64  * support for mandatory and extensible security protections.  This notice
  65  * is included in support of clause 2.2 (b) of the Apple Public License,
  66  * Version 2.0.
  67  */
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/kernel.h>
  72 #include <sys/domain.h>
  73 #include <sys/fcntl.h>
  74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
  75 #include <sys/file_internal.h>
  76 #include <sys/filedesc.h>
  77 #include <sys/lock.h>
  78 #include <sys/mbuf.h>
  79 #include <sys/namei.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/protosw.h>
  83 #include <sys/socket.h>
  84 #include <sys/socketvar.h>
  85 #include <sys/stat.h>
  86 #include <sys/sysctl.h>
  87 #include <sys/un.h>
  88 #include <sys/unpcb.h>
  89 #include <sys/vnode_internal.h>
  90 #include <sys/kdebug.h>
  91
  92 #include <kern/zalloc.h>
  93 #include <kern/locks.h>
  94
  95 #if CONFIG_MACF_SOCKET
  96 #include <security/mac_framework.h>
  97 #endif /* MAC_SOCKET */
  98
  99 #define f_msgcount f_fglob->fg_msgcount
 100 #define f_cred f_fglob->fg_cred
 101 #define f_ops f_fglob->fg_ops
 102 #define f_offset f_fglob->fg_offset
 103 #define f_data f_fglob->fg_data
 104 struct  zone *unp_zone;
 105 static  unp_gen_t unp_gencnt;
 106 static  u_int unp_count;
 107
 108 static  lck_attr_t              *unp_mtx_attr;
 109 static  lck_grp_t               *unp_mtx_grp;
 110 static  lck_grp_attr_t          *unp_mtx_grp_attr;
 111 static  lck_rw_t                *unp_list_mtx;
 112
 113 extern lck_mtx_t *uipc_lock;
 114 static  struct unp_head unp_shead, unp_dhead;
 115
 116 /*
 117  * Unix communications domain.
 118  *
 119  * TODO:
 120  *      SEQPACKET, RDM
 121  *      rethink name space problems
 122  *      need a proper out-of-band
 123  *      lock pushdown
 124  */
 125 static struct   sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
 126 static ino_t    unp_ino;                /* prototype for fake inode numbers */
 127
 128 static int      unp_attach(struct socket *);
 129 static void     unp_detach(struct unpcb *);
 130 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
 131 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
 132 static void     unp_disconnect(struct unpcb *);
 133 static void     unp_shutdown(struct unpcb *);
 134 static void     unp_drop(struct unpcb *, int);
 135 __private_extern__ void unp_gc(void);
 136 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *));
 137 static void     unp_mark(struct fileglob *);
 138 static void     unp_discard(struct fileglob *);
 139 static void     unp_discard_fdlocked(struct fileglob *, proc_t);
 140 static int      unp_internalize(struct mbuf *, proc_t);
 141 static int      unp_listen(struct unpcb *, proc_t);
 142
 143 /* TODO: this should be in header file */
 144 extern int fdgetf_noref(proc_t, int, struct fileproc **);
 145
 146 static int
 147 uipc_abort(struct socket *so)
 148 {
 149         struct unpcb *unp = sotounpcb(so);
 150
 151         if (unp == 0)
 152                 return (EINVAL);
 153         unp_drop(unp, ECONNABORTED);
 154         unp_detach(unp);
 155         sofree(so);
 156         return (0);
 157 }
 158
 159 static int
 160 uipc_accept(struct socket *so, struct sockaddr **nam)
 161 {
 162         struct unpcb *unp = sotounpcb(so);
 163
 164         if (unp == 0)
 165                 return (EINVAL);
 166
 167         /*
 168          * Pass back name of connected socket,
 169          * if it was bound and we are still connected
 170          * (our peer may have closed already!).
 171          */
 172         if (unp->unp_conn && unp->unp_conn->unp_addr) {
 173                 *nam = dup_sockaddr((struct sockaddr *)
 174                     unp->unp_conn->unp_addr, 1);
 175         } else {
 176                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 177         }
 178         return (0);
 179 }
 180
 181 /*
 182  * Returns:     0                       Success
 183  *              EISCONN
 184  *      unp_attach:
 185  */
 186 static int
 187 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
 188 {
 189         struct unpcb *unp = sotounpcb(so);
 190
 191         if (unp != 0)
 192                 return (EISCONN);
 193         return (unp_attach(so));
 194 }
 195
 196 static int
 197 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
 198 {
 199         struct unpcb *unp = sotounpcb(so);
 200
 201         if (unp == 0)
 202                 return (EINVAL);
 203
 204         return (unp_bind(unp, nam, p));
 205 }
 206
 207 /*
 208  * Returns:     0                       Success
 209  *              EINVAL
 210  *      unp_connect:???                 [See elsewhere in this file]
 211  */
 212 static int
 213 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
 214 {
 215         struct unpcb *unp = sotounpcb(so);
 216
 217         if (unp == 0)
 218                 return (EINVAL);
 219         return (unp_connect(so, nam, p));
 220 }
 221
 222 /*
 223  * Returns:     0                       Success
 224  *              EINVAL
 225  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 226  *      unp_connect2:EINVAL             Invalid argument
 227  */
 228 static int
 229 uipc_connect2(struct socket *so1, struct socket *so2)
 230 {
 231         struct unpcb *unp = sotounpcb(so1);
 232
 233         if (unp == 0)
 234                 return (EINVAL);
 235
 236         return (unp_connect2(so1, so2));
 237 }
 238
 239 /* control is EOPNOTSUPP */
 240
 241 static int
 242 uipc_detach(struct socket *so)
 243 {
 244         struct unpcb *unp = sotounpcb(so);
 245
 246         if (unp == 0)
 247                 return (EINVAL);
 248
 249         unp_detach(unp);
 250         return (0);
 251 }
 252
 253 static int
 254 uipc_disconnect(struct socket *so)
 255 {
 256         struct unpcb *unp = sotounpcb(so);
 257
 258         if (unp == 0)
 259                 return (EINVAL);
 260         unp_disconnect(unp);
 261         return (0);
 262 }
 263
 264 /*
 265  * Returns:     0                       Success
 266  *              EINVAL
 267  */
 268 static int
 269 uipc_listen(struct socket *so, __unused proc_t p)
 270 {
 271         struct unpcb *unp = sotounpcb(so);
 272
 273         if (unp == 0 || unp->unp_vnode == 0)
 274                 return (EINVAL);
 275         return (unp_listen(unp, p));
 276 }
 277
 278 static int
 279 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 280 {
 281         struct unpcb *unp = sotounpcb(so);
 282
 283         if (unp == NULL)
 284                 return (EINVAL);
 285         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
 286                 *nam = dup_sockaddr((struct sockaddr *)
 287                     unp->unp_conn->unp_addr, 1);
 288         } else {
 289                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 290         }
 291         return (0);
 292 }
 293
 294 static int
 295 uipc_rcvd(struct socket *so, __unused int flags)
 296 {
 297         struct unpcb *unp = sotounpcb(so);
 298         struct socket *so2;
 299
 300         if (unp == 0)
 301                 return (EINVAL);
 302         switch (so->so_type) {
 303         case SOCK_DGRAM:
 304                 panic("uipc_rcvd DGRAM?");
 305                 /*NOTREACHED*/
 306
 307         case SOCK_STREAM:
 308 #define rcv (&so->so_rcv)
 309 #define snd (&so2->so_snd)
 310                 if (unp->unp_conn == 0)
 311                         break;
 312                 so2 = unp->unp_conn->unp_socket;
 313                 /*
 314                  * Adjust backpressure on sender
 315                  * and wakeup any waiting to write.
 316                  */
 317                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
 318                 unp->unp_mbcnt = rcv->sb_mbcnt;
 319                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
 320                 unp->unp_cc = rcv->sb_cc;
 321                 sowwakeup(so2);
 322 #undef snd
 323 #undef rcv
 324                 break;
 325
 326         default:
 327                 panic("uipc_rcvd unknown socktype");
 328         }
 329         return (0);
 330 }
 331
 332 /* pru_rcvoob is EOPNOTSUPP */
 333
 334 /*
 335  * Returns:     0                       Success
 336  *              EINVAL
 337  *              EOPNOTSUPP
 338  *              EPIPE
 339  *              ENOTCONN
 340  *              EISCONN
 341  *      unp_internalize:EINVAL
 342  *      unp_internalize:EBADF
 343  *      unp_connect:EAFNOSUPPORT        Address family not supported
 344  *      unp_connect:EINVAL              Invalid argument
 345  *      unp_connect:ENOTSOCK            Not a socket
 346  *      unp_connect:ECONNREFUSED        Connection refused
 347  *      unp_connect:EISCONN             Socket is connected
 348  *      unp_connect:EPROTOTYPE          Protocol wrong type for socket
 349  *      unp_connect:???
 350  *      sbappendaddr:ENOBUFS            [5th argument, contents modified]
 351  *      sbappendaddr:???                [whatever a filter author chooses]
 352  */
 353 static int
 354 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 355     struct mbuf *control, proc_t p)
 356 {
 357         int error = 0;
 358         struct unpcb *unp = sotounpcb(so);
 359         struct socket *so2;
 360
 361         if (unp == 0) {
 362                 error = EINVAL;
 363                 goto release;
 364         }
 365         if (flags & PRUS_OOB) {
 366                 error = EOPNOTSUPP;
 367                 goto release;
 368         }
 369
 370         if (control) {
 371                 /* release global lock to avoid deadlock (4436174) */
 372                 socket_unlock(so, 0);
 373                 error = unp_internalize(control, p);
 374                 socket_lock(so, 0);
 375                 if (error)
 376                         goto release;
 377         }
 378
 379         switch (so->so_type) {
 380         case SOCK_DGRAM:
 381         {
 382                 struct sockaddr *from;
 383
 384                 if (nam) {
 385                         if (unp->unp_conn) {
 386                                 error = EISCONN;
 387                                 break;
 388                         }
 389                         error = unp_connect(so, nam, p);
 390                         if (error)
 391                                 break;
 392                 } else {
 393                         if (unp->unp_conn == 0) {
 394                                 error = ENOTCONN;
 395                                 break;
 396                         }
 397                 }
 398                 so2 = unp->unp_conn->unp_socket;
 399                 if (unp->unp_addr)
 400                         from = (struct sockaddr *)unp->unp_addr;
 401                 else
 402                         from = &sun_noname;
 403                 /*
 404                  * sbappendaddr() will fail when the receiver runs out of
 405                  * space; in contrast to SOCK_STREAM, we will lose messages
 406                  * for the SOCK_DGRAM case when the receiver's queue overflows.
 407                  * SB_UNIX on the socket buffer implies that the callee will
 408                  * not free the control message, if any, because we would need
 409                  * to call unp_dispose() on it.
 410                  */
 411                 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
 412                         control = NULL;
 413                         sorwakeup(so2);
 414                 } else if (control != NULL && error == 0) {
 415                         /* A socket filter took control; don't touch it */
 416                         control = NULL;
 417                 }
 418                 m = NULL;
 419                 if (nam)
 420                         unp_disconnect(unp);
 421                 break;
 422         }
 423
 424         case SOCK_STREAM: {
 425                 int didreceive = 0;
 426 #define rcv (&so2->so_rcv)
 427 #define snd (&so->so_snd)
 428                 /* Connect if not connected yet. */
 429                 /*
 430                  * Note: A better implementation would complain
 431                  * if not equal to the peer's address.
 432                  */
 433                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 434                         if (nam) {
 435                                 error = unp_connect(so, nam, p);
 436                                 if (error)
 437                                         break;  /* XXX */
 438                         } else {
 439                                 error = ENOTCONN;
 440                                 break;
 441                         }
 442                 }
 443
 444                 if (so->so_state & SS_CANTSENDMORE) {
 445                         error = EPIPE;
 446                         break;
 447                 }
 448                 if (unp->unp_conn == 0)
 449                         panic("uipc_send connected but no connection?");
 450                 so2 = unp->unp_conn->unp_socket;
 451                 /*
 452                  * Send to paired receive port, and then reduce send buffer
 453                  * hiwater marks to maintain backpressure.  Wake up readers.
 454                  * SB_UNIX flag will allow new record to be appended to the
 455                  * receiver's queue even when it is already full.  It is
 456                  * possible, however, that append might fail.  In that case,
 457                  * we will need to call unp_dispose() on the control message;
 458                  * the callee will not free it since SB_UNIX is set.
 459                  */
 460                 didreceive = control ?
 461                     sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
 462
 463                 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
 464                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
 465                 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
 466                 unp->unp_conn->unp_cc = rcv->sb_cc;
 467                 if (didreceive) {
 468                         control = NULL;
 469                         sorwakeup(so2);
 470                 } else if (control != NULL && error == 0) {
 471                         /* A socket filter took control; don't touch it */
 472                         control = NULL;
 473                 }
 474                 m = NULL;
 475 #undef snd
 476 #undef rcv
 477                 }
 478                 break;
 479
 480         default:
 481                 panic("uipc_send unknown socktype");
 482         }
 483
 484         /*
 485          * SEND_EOF is equivalent to a SEND followed by
 486          * a SHUTDOWN.
 487          */
 488         if (flags & PRUS_EOF) {
 489                 socantsendmore(so);
 490                 unp_shutdown(unp);
 491         }
 492
 493         if (control && error != 0) {
 494                 socket_unlock(so, 0);
 495                 unp_dispose(control);
 496                 socket_lock(so, 0);
 497         }
 498
 499 release:
 500         if (control)
 501                 m_freem(control);
 502         if (m)
 503                 m_freem(m);
 504         return (error);
 505 }
 506
 507 static int
 508 uipc_sense(struct socket *so, void *ub, int isstat64)
 509 {
 510         struct unpcb *unp = sotounpcb(so);
 511         struct socket *so2;
 512         blksize_t blksize;
 513
 514         if (unp == 0)
 515                 return (EINVAL);
 516
 517         blksize = so->so_snd.sb_hiwat;
 518         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
 519                 so2 = unp->unp_conn->unp_socket;
 520                 blksize += so2->so_rcv.sb_cc;
 521         }
 522         if (unp->unp_ino == 0)
 523                 unp->unp_ino = unp_ino++;
 524
 525         if (isstat64 != 0) {
 526                 struct stat64  *sb64;
 527
 528                 sb64 = (struct stat64 *)ub;
 529                 sb64->st_blksize = blksize;
 530                 sb64->st_dev = NODEV;
 531                 sb64->st_ino = (ino64_t)unp->unp_ino;
 532         } else {
 533                 struct stat *sb;
 534
 535                 sb = (struct stat *)ub;
 536                 sb->st_blksize = blksize;
 537                 sb->st_dev = NODEV;
 538                 sb->st_ino = (ino_t)unp->unp_ino;
 539         }
 540
 541         return (0);
 542 }
 543
 544 /*
 545  * Returns:     0               Success
 546  *              EINVAL
 547  *
 548  * Notes:       This is not strictly correct, as unp_shutdown() also calls
 549  *              socantrcvmore().  These should maybe both be conditionalized
 550  *              on the 'how' argument in soshutdown() as called from the
 551  *              shutdown() system call.
 552  */
 553 static int
 554 uipc_shutdown(struct socket *so)
 555 {
 556         struct unpcb *unp = sotounpcb(so);
 557
 558         if (unp == 0)
 559                 return (EINVAL);
 560         socantsendmore(so);
 561         unp_shutdown(unp);
 562         return (0);
 563 }
 564
 565 /*
 566  * Returns:     0                       Success
 567  *              EINVAL                  Invalid argument
 568  */
 569 static int
 570 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 571 {
 572         struct unpcb *unp = sotounpcb(so);
 573
 574         if (unp == NULL)
 575                 return (EINVAL);
 576         if (unp->unp_addr != NULL) {
 577                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
 578         } else {
 579                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 580         }
 581         return (0);
 582 }
 583
 584 struct pr_usrreqs uipc_usrreqs = {
 585         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
 586         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
 587         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
 588         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
 589         sosend, soreceive, pru_sopoll_notsupp
 590 };
 591
 592 int
 593 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 594 {
 595         struct unpcb *unp = sotounpcb(so);
 596         int error;
 597
 598         switch (sopt->sopt_dir) {
 599         case SOPT_GET:
 600                 switch (sopt->sopt_name) {
 601                 case LOCAL_PEERCRED:
 602                         if (unp->unp_flags & UNP_HAVEPC) {
 603                                 error = sooptcopyout(sopt, &unp->unp_peercred,
 604                                     sizeof (unp->unp_peercred));
 605                         } else {
 606                                 if (so->so_type == SOCK_STREAM)
 607                                         error = ENOTCONN;
 608                                 else
 609                                         error = EINVAL;
 610                         }
 611                         break;
 612                 default:
 613                         error = EOPNOTSUPP;
 614                         break;
 615                 }
 616                 break;
 617         case SOPT_SET:
 618         default:
 619                 error = EOPNOTSUPP;
 620                 break;
 621         }
 622         return (error);
 623 }
 624
 625 /*
 626  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
 627  * for stream sockets, although the total for sender and receiver is
 628  * actually only PIPSIZ.
 629  * Datagram sockets really use the sendspace as the maximum datagram size,
 630  * and don't really want to reserve the sendspace.  Their recvspace should
 631  * be large enough for at least one max-size datagram plus address.
 632  */
 633 #ifndef PIPSIZ
 634 #define PIPSIZ  8192
 635 #endif
 636 static u_long   unpst_sendspace = PIPSIZ;
 637 static u_long   unpst_recvspace = PIPSIZ;
 638 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
 639 static u_long   unpdg_recvspace = 4*1024;
 640
 641 static int      unp_rights;                     /* file descriptors in flight */
 642 static int      unp_disposed;                   /* discarded file descriptors */
 643
 644 SYSCTL_DECL(_net_local_stream);
 645 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 646    &unpst_sendspace, 0, "");
 647 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 648    &unpst_recvspace, 0, "");
 649 SYSCTL_DECL(_net_local_dgram);
 650 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 651    &unpdg_sendspace, 0, "");
 652 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 653    &unpdg_recvspace, 0, "");
 654 SYSCTL_DECL(_net_local);
 655 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
 656
 657 /*
 658  * Returns:     0                       Success
 659  *              ENOBUFS
 660  *      soreserve:ENOBUFS
 661  */
 662 static int
 663 unp_attach(struct socket *so)
 664 {
 665         struct unpcb *unp;
 666         int error = 0;
 667
 668         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 669                 switch (so->so_type) {
 670
 671                 case SOCK_STREAM:
 672                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
 673                         break;
 674
 675                 case SOCK_DGRAM:
 676                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
 677                         break;
 678
 679                 default:
 680                         panic("unp_attach");
 681                 }
 682                 if (error)
 683                         return (error);
 684         }
 685         unp = (struct unpcb *)zalloc(unp_zone);
 686         if (unp == NULL)
 687                 return (ENOBUFS);
 688         bzero(unp, sizeof (*unp));
 689         lck_rw_lock_exclusive(unp_list_mtx);
 690         LIST_INIT(&unp->unp_refs);
 691         unp->unp_socket = so;
 692         unp->unp_gencnt = ++unp_gencnt;
 693         unp_count++;
 694         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
 695             &unp_dhead : &unp_shead, unp, unp_link);
 696         so->so_pcb = (caddr_t)unp;
 697         /*
 698          * Mark AF_UNIX socket buffers accordingly so that:
 699          *
 700          * a. In the SOCK_STREAM case, socket buffer append won't fail due to
 701          *    the lack of space; this essentially loosens the sbspace() check,
 702          *    since there is disconnect between sosend() and uipc_send() with
 703          *    respect to flow control that might result in our dropping the
 704          *    data in uipc_send().  By setting this, we allow for slightly
 705          *    more records to be appended to the receiving socket to avoid
 706          *    losing data (which we can't afford in the SOCK_STREAM case).
 707          *    Flow control still takes place since we adjust the sender's
 708          *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
 709          *    case and append would still fail when the queue overflows.
 710          *
 711          * b. In the presence of control messages containing internalized
 712          *    file descriptors, the append routines will not free them since
 713          *    we'd need to undo the work first via unp_dispose().
 714          */
 715         so->so_rcv.sb_flags |= SB_UNIX;
 716         so->so_snd.sb_flags |= SB_UNIX;
 717         lck_rw_done(unp_list_mtx);
 718         return (0);
 719 }
 720
 721 static void
 722 unp_detach(struct unpcb *unp)
 723 {
 724         lck_rw_lock_exclusive(unp_list_mtx);
 725         LIST_REMOVE(unp, unp_link);
 726         unp->unp_gencnt = ++unp_gencnt;
 727         lck_rw_done(unp_list_mtx);
 728         --unp_count;
 729         if (unp->unp_vnode) {
 730                 struct vnode *tvp = unp->unp_vnode;
 731                 unp->unp_vnode->v_socket = NULL;
 732                 unp->unp_vnode = NULL;
 733                 vnode_rele(tvp);                /* drop the usecount */
 734         }
 735         if (unp->unp_conn)
 736                 unp_disconnect(unp);
 737         while (unp->unp_refs.lh_first)
 738                 unp_drop(unp->unp_refs.lh_first, ECONNRESET);
 739         soisdisconnected(unp->unp_socket);
 740         /* makes sure we're getting dealloced */
 741         unp->unp_socket->so_flags |= SOF_PCBCLEARING;
 742         unp->unp_socket->so_pcb = NULL;
 743         if (unp_rights) {
 744                 /*
 745                  * Normally the receive buffer is flushed later,
 746                  * in sofree, but if our receive buffer holds references
 747                  * to descriptors that are now garbage, we will dispose
 748                  * of those descriptor references after the garbage collector
 749                  * gets them (resulting in a "panic: closef: count < 0").
 750                  */
 751                 sorflush(unp->unp_socket);
 752
 753                 /* Per domain mutex deadlock avoidance */
 754                 socket_unlock(unp->unp_socket, 0);
 755                 unp_gc();
 756                 socket_lock(unp->unp_socket, 0);
 757         }
 758         if (unp->unp_addr)
 759                 FREE(unp->unp_addr, M_SONAME);
 760         zfree(unp_zone, unp);
 761 }
 762
 763 /*
 764  * Returns:     0                       Success
 765  *              EAFNOSUPPORT
 766  *              EINVAL
 767  *              EADDRINUSE
 768  *              namei:???               [anything namei can return]
 769  *              vnode_authorize:???     [anything vnode_authorize can return]
 770  *
 771  * Notes:       p at this point is the current process, as this function is
 772  *              only called by sobind().
 773  */
 774 static int
 775 unp_bind(
 776         struct unpcb *unp,
 777         struct sockaddr *nam,
 778         proc_t p)
 779 {
 780         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 781         struct vnode *vp, *dvp;
 782         struct vnode_attr va;
 783         vfs_context_t ctx = vfs_context_current();
 784         int error, namelen;
 785         struct nameidata nd;
 786         char buf[SOCK_MAXADDRLEN];
 787
 788         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
 789                 return (EAFNOSUPPORT);
 790         }
 791
 792         if (unp->unp_vnode != NULL)
 793                 return (EINVAL);
 794         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 795         if (namelen <= 0)
 796                 return (EINVAL);
 797
 798         strlcpy(buf, soun->sun_path, namelen+1);
 799         NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE32,
 800             CAST_USER_ADDR_T(buf), ctx);
 801         /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 802         error = namei(&nd);
 803         if (error) {
 804                 return (error);
 805         }
 806         dvp = nd.ni_dvp;
 807         vp = nd.ni_vp;
 808
 809         if (vp != NULL) {
 810                 /*
 811                  * need to do this before the vnode_put of dvp
 812                  * since we may have to release an fs_nodelock
 813                  */
 814                 nameidone(&nd);
 815
 816                 vnode_put(dvp);
 817                 vnode_put(vp);
 818
 819                 return (EADDRINUSE);
 820         }
 821
 822         VATTR_INIT(&va);
 823         VATTR_SET(&va, va_type, VSOCK);
 824         VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
 825
 826 #if CONFIG_MACF_SOCKET
 827         /*
 828          * This is #if MAC_SOCKET, because it affects the connection rate
 829          * of Unix domain dockets that is critical for server performance
 830          */
 831         error = mac_vnode_check_create(ctx,
 832             nd.ni_dvp, &nd.ni_cnd, &va);
 833
 834         if (error == 0)
 835 #endif /* MAC_SOCKET */
 836         /* authorize before creating */
 837         error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
 838
 839         if (!error) {
 840                 /* create the socket */
 841                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
 842         }
 843
 844         nameidone(&nd);
 845         vnode_put(dvp);
 846
 847         if (error) {
 848                 return (error);
 849         }
 850         vnode_ref(vp);  /* gain a longterm reference */
 851         vp->v_socket = unp->unp_socket;
 852         unp->unp_vnode = vp;
 853         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
 854         vnode_put(vp);          /* drop the iocount */
 855
 856         return (0);
 857 }
 858
 859
 860 /*
 861  * Returns:     0                       Success
 862  *              EAFNOSUPPORT            Address family not supported
 863  *              EINVAL                  Invalid argument
 864  *              ENOTSOCK                Not a socket
 865  *              ECONNREFUSED            Connection refused
 866  *              EPROTOTYPE              Protocol wrong type for socket
 867  *              EISCONN                 Socket is connected
 868  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 869  *      unp_connect2:EINVAL             Invalid argument
 870  *      namei:???                       [anything namei can return]
 871  *      vnode_authorize:????            [anything vnode_authorize can return]
 872  *
 873  * Notes:       p at this point is the current process, as this function is
 874  *              only called by sosend(), sendfile(), and soconnectlock().
 875  */
 876 static int
 877 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 878 {
 879         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 880         struct vnode *vp;
 881         struct socket *so2, *so3;
 882         struct unpcb *unp, *unp2, *unp3;
 883         vfs_context_t ctx = vfs_context_current();
 884         int error, len;
 885         struct nameidata nd;
 886         char buf[SOCK_MAXADDRLEN];
 887
 888         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
 889                 return (EAFNOSUPPORT);
 890         }
 891
 892         so2 = so3 = NULL;
 893
 894         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 895         if (len <= 0)
 896                 return (EINVAL);
 897
 898         strlcpy(buf, soun->sun_path, len+1);
 899
 900         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32,
 901             CAST_USER_ADDR_T(buf), ctx);
 902         error = namei(&nd);
 903         if (error) {
 904                 return (error);
 905         }
 906         nameidone(&nd);
 907         vp = nd.ni_vp;
 908         if (vp->v_type != VSOCK) {
 909                 error = ENOTSOCK;
 910                 goto bad;
 911         }
 912
 913         error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
 914         if (error)
 915                 goto bad;
 916         so2 = vp->v_socket;
 917         if (so2 == 0 || so2->so_pcb == NULL) {
 918                 error = ECONNREFUSED;
 919                 goto bad;
 920         }
 921
 922         /* make sure the socket can't go away while we're connecting */
 923         so2->so_usecount++;
 924
 925         if (so->so_type != so2->so_type) {
 926                 error = EPROTOTYPE;
 927                 goto bad;
 928         }
 929
 930         /*
 931          * Check if socket was connected while we were trying to
 932          * acquire the funnel.
 933          * XXX - probably shouldn't return an error for SOCK_DGRAM
 934          */
 935         if ((so->so_state & SS_ISCONNECTED) != 0) {
 936                 error = EISCONN;
 937                 goto bad;
 938         }
 939
 940         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 941                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
 942                     (so3 = sonewconn(so2, 0, nam)) == 0) {
 943                         error = ECONNREFUSED;
 944                         goto bad;
 945                 }
 946                 unp = sotounpcb(so);
 947                 unp2 = sotounpcb(so2);
 948                 unp3 = sotounpcb(so3);
 949                 if (unp2->unp_addr)
 950                         unp3->unp_addr = (struct sockaddr_un *)
 951                             dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
 952
 953                 /*
 954                  * unp_peercred management:
 955                  *
 956                  * The connecter's (client's) credentials are copied
 957                  * from its process structure at the time of connect()
 958                  * (which is now).
 959                  */
 960                 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
 961                 unp3->unp_flags |= UNP_HAVEPC;
 962                 /*
 963                  * The receiver's (server's) credentials are copied
 964                  * from the unp_peercred member of socket on which the
 965                  * former called listen(); unp_listen() cached that
 966                  * process's credentials at that time so we can use
 967                  * them now.
 968                  */
 969                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
 970                     ("unp_connect: listener without cached peercred"));
 971                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 972                     sizeof (unp->unp_peercred));
 973                 unp->unp_flags |= UNP_HAVEPC;
 974
 975 #if CONFIG_MACF_SOCKET
 976                 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
 977                 mac_socketpeer_label_associate_socket(so, so3);
 978                 mac_socketpeer_label_associate_socket(so3, so);
 979                 /* XXXMAC: SOCK_UNLOCK(so); */
 980 #endif /* MAC_SOCKET */
 981                 so2->so_usecount--; /* drop reference taken on so2 */
 982                 so2 = so3;
 983                 so3->so_usecount++; /* make sure we keep it around */
 984         }
 985         error = unp_connect2(so, so2);
 986 bad:
 987         if (so2 != NULL)
 988                 so2->so_usecount--; /* release count on socket */
 989         vnode_put(vp);
 990         return (error);
 991 }
 992
 993 /*
 994  * Returns:     0                       Success
 995  *              EPROTOTYPE              Protocol wrong type for socket
 996  *              EINVAL                  Invalid argument
 997  */
 998 int
 999 unp_connect2(struct socket *so, struct socket *so2)
1000 {
1001         struct unpcb *unp = sotounpcb(so);
1002         struct unpcb *unp2;
1003
1004         if (so2->so_type != so->so_type)
1005                 return (EPROTOTYPE);
1006         unp2 = sotounpcb(so2);
1007
1008         /* Verify both sockets are still opened */
1009         if (unp == 0 || unp2 == 0)
1010                 return (EINVAL);
1011
1012         unp->unp_conn = unp2;
1013         switch (so->so_type) {
1014
1015         case SOCK_DGRAM:
1016                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1017                 soisconnected(so);
1018                 break;
1019
1020         case SOCK_STREAM:
1021                 /* This takes care of socketpair */
1022                 if (!(unp->unp_flags & UNP_HAVEPC) &&
1023                     !(unp2->unp_flags & UNP_HAVEPC)) {
1024                         cru2x(kauth_cred_get(), &unp->unp_peercred);
1025                         unp->unp_flags |= UNP_HAVEPC;
1026
1027                         cru2x(kauth_cred_get(), &unp2->unp_peercred);
1028                         unp2->unp_flags |= UNP_HAVEPC;
1029                 }
1030                 unp2->unp_conn = unp;
1031                 soisconnected(so);
1032                 soisconnected(so2);
1033                 break;
1034
1035         default:
1036                 panic("unp_connect2");
1037         }
1038         return (0);
1039 }
1040
1041 static void
1042 unp_disconnect(struct unpcb *unp)
1043 {
1044         struct unpcb *unp2 = unp->unp_conn;
1045
1046         if (unp2 == 0)
1047                 return;
1048         unp->unp_conn = NULL;
1049         switch (unp->unp_socket->so_type) {
1050
1051         case SOCK_DGRAM:
1052                 lck_rw_lock_exclusive(unp_list_mtx);
1053                 LIST_REMOVE(unp, unp_reflink);
1054                 lck_rw_done(unp_list_mtx);
1055                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1056                 break;
1057
1058         case SOCK_STREAM:
1059                 soisdisconnected(unp->unp_socket);
1060                 unp2->unp_conn = NULL;
1061                 soisdisconnected(unp2->unp_socket);
1062                 break;
1063         }
1064 }
1065
1066 #ifdef notdef
1067 void
1068 unp_abort(struct unpcb *unp)
1069 {
1070
1071         unp_detach(unp);
1072 }
1073 #endif
1074
1075 static int
1076 unp_pcblist SYSCTL_HANDLER_ARGS
1077 {
1078 #pragma unused(oidp,arg2)
1079         int error, i, n;
1080         struct unpcb *unp, **unp_list;
1081         unp_gen_t gencnt;
1082         struct xunpgen xug;
1083         struct unp_head *head;
1084
1085         lck_rw_lock_shared(unp_list_mtx);
1086         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1087
1088         /*
1089          * The process of preparing the PCB list is too time-consuming and
1090          * resource-intensive to repeat twice on every request.
1091          */
1092         if (req->oldptr == USER_ADDR_NULL) {
1093                 n = unp_count;
1094                 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1095                     sizeof (struct xunpcb);
1096                 lck_rw_done(unp_list_mtx);
1097                 return (0);
1098         }
1099
1100         if (req->newptr != USER_ADDR_NULL) {
1101                 lck_rw_done(unp_list_mtx);
1102                 return (EPERM);
1103         }
1104
1105         /*
1106          * OK, now we're committed to doing something.
1107          */
1108         gencnt = unp_gencnt;
1109         n = unp_count;
1110
1111         bzero(&xug, sizeof (xug));
1112         xug.xug_len = sizeof (xug);
1113         xug.xug_count = n;
1114         xug.xug_gen = gencnt;
1115         xug.xug_sogen = so_gencnt;
1116         error = SYSCTL_OUT(req, &xug, sizeof (xug));
1117         if (error) {
1118                 lck_rw_done(unp_list_mtx);
1119                 return (error);
1120         }
1121
1122         /*
1123          * We are done if there is no pcb
1124          */
1125         if (n == 0)  {
1126                 lck_rw_done(unp_list_mtx);
1127                 return (0);
1128         }
1129
1130         MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1131             M_TEMP, M_WAITOK);
1132         if (unp_list == 0) {
1133                 lck_rw_done(unp_list_mtx);
1134                 return (ENOMEM);
1135         }
1136
1137         for (unp = head->lh_first, i = 0; unp && i < n;
1138             unp = unp->unp_link.le_next) {
1139                 if (unp->unp_gencnt <= gencnt)
1140                         unp_list[i++] = unp;
1141         }
1142         n = i;                  /* in case we lost some during malloc */
1143
1144         error = 0;
1145         for (i = 0; i < n; i++) {
1146                 unp = unp_list[i];
1147                 if (unp->unp_gencnt <= gencnt) {
1148                         struct xunpcb xu;
1149
1150                         bzero(&xu, sizeof (xu));
1151                         xu.xu_len = sizeof (xu);
1152                         xu.xu_unpp = (struct  unpcb_compat *)unp;
1153                         /*
1154                          * XXX - need more locking here to protect against
1155                          * connect/disconnect races for SMP.
1156                          */
1157                         if (unp->unp_addr)
1158                                 bcopy(unp->unp_addr, &xu.xu_addr,
1159                                     unp->unp_addr->sun_len);
1160                         if (unp->unp_conn && unp->unp_conn->unp_addr)
1161                                 bcopy(unp->unp_conn->unp_addr,
1162                                     &xu.xu_caddr,
1163                                     unp->unp_conn->unp_addr->sun_len);
1164                         bcopy(unp, &xu.xu_unp, sizeof (xu.xu_unp));
1165                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
1166                         error = SYSCTL_OUT(req, &xu, sizeof (xu));
1167                 }
1168         }
1169         if (!error) {
1170                 /*
1171                  * Give the user an updated idea of our state.
1172                  * If the generation differs from what we told
1173                  * her before, she knows that something happened
1174                  * while we were processing this request, and it
1175                  * might be necessary to retry.
1176                  */
1177                 bzero(&xug, sizeof (xug));
1178                 xug.xug_len = sizeof (xug);
1179                 xug.xug_gen = unp_gencnt;
1180                 xug.xug_sogen = so_gencnt;
1181                 xug.xug_count = unp_count;
1182                 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1183         }
1184         FREE(unp_list, M_TEMP);
1185         lck_rw_done(unp_list_mtx);
1186         return (error);
1187 }
1188
1189 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
1190             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1191             "List of active local datagram sockets");
1192 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
1193             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1194             "List of active local stream sockets");
1195
1196 static void
1197 unp_shutdown(struct unpcb *unp)
1198 {
1199         struct socket *so;
1200
1201         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
1202             (so = unp->unp_conn->unp_socket))
1203                 socantrcvmore(so);
1204 }
1205
1206 static void
1207 unp_drop(struct unpcb *unp, int errno)
1208 {
1209         struct socket *so = unp->unp_socket;
1210
1211         so->so_error = errno;
1212         unp_disconnect(unp);
1213 }
1214
1215 #ifdef notdef
1216 void
1217 unp_drain()
1218 {
1219
1220 }
1221 #endif
1222
1223 /*
1224  * Returns:     0                       Success
1225  *              EMSGSIZE                The new fd's will not fit
1226  *              ENOBUFS                 Cannot alloc struct fileproc
1227  */
1228 int
1229 unp_externalize(struct mbuf *rights)
1230 {
1231         proc_t p = current_proc();              /* XXX */
1232         int i;
1233         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1234         struct fileglob **rp = (struct fileglob **)(cm + 1);
1235         struct fileproc *fp;
1236         struct fileglob *fg;
1237         int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1238         int f;
1239
1240         proc_fdlock(p);
1241
1242         /*
1243          * if the new FD's will not fit, then we free them all
1244          */
1245         if (!fdavail(p, newfds)) {
1246                 for (i = 0; i < newfds; i++) {
1247                         fg = *rp;
1248                         unp_discard_fdlocked(fg, p);
1249                         *rp++ = NULL;
1250                 }
1251                 proc_fdunlock(p);
1252
1253                 return (EMSGSIZE);
1254         }
1255         /*
1256          * now change each pointer to an fd in the global table to
1257          * an integer that is the index to the local fd table entry
1258          * that we set up to point to the global one we are transferring.
1259          * XXX (1) this assumes a pointer and int are the same size...!
1260          * XXX (2) allocation failures should be non-fatal
1261          */
1262         for (i = 0; i < newfds; i++) {
1263 #if CONFIG_MACF_SOCKET
1264                 /*
1265                  * If receive access is denied, don't pass along
1266                  * and error message, just discard the descriptor.
1267                  */
1268                 if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1269                         fg = *rp;
1270                         *rp++ = 0;
1271                         unp_discard_fdlocked(fg, p);
1272                         continue;
1273                 }
1274 #endif
1275                 if (fdalloc(p, 0, &f))
1276                         panic("unp_externalize:fdalloc");
1277                 fg = *rp;
1278                 MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc),
1279                     M_FILEPROC, M_WAITOK);
1280                 if (fp == NULL)
1281                         panic("unp_externalize: MALLOC_ZONE");
1282                 bzero(fp, sizeof (struct fileproc));
1283                 fp->f_iocount = 0;
1284                 fp->f_fglob = fg;
1285                 fg_removeuipc(fg);
1286                 procfdtbl_releasefd(p, f, fp);
1287                 (void) OSAddAtomic(-1, (volatile SInt32 *)&unp_rights);
1288                 *(int *)rp++ = f;
1289         }
1290         proc_fdunlock(p);
1291
1292         return (0);
1293 }
1294
1295 void
1296 unp_init(void)
1297 {
1298         unp_zone = zinit(sizeof (struct unpcb),
1299             (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1300
1301         if (unp_zone == 0)
1302                 panic("unp_init");
1303         LIST_INIT(&unp_dhead);
1304         LIST_INIT(&unp_shead);
1305
1306         /*
1307          * allocate lock group attribute and group for udp pcb mutexes
1308          */
1309         unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1310
1311         unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1312
1313         unp_mtx_attr = lck_attr_alloc_init();
1314
1315         if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1316             unp_mtx_attr)) == NULL)
1317                 return; /* pretty much dead if this fails... */
1318
1319 }
1320
1321 #ifndef MIN
1322 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
1323 #endif
1324
1325 /*
1326  * Returns:     0                       Success
1327  *              EINVAL
1328  *      fdgetf_noref:EBADF
1329  */
1330 static int
1331 unp_internalize(struct mbuf *control, proc_t p)
1332 {
1333         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1334         struct fileglob **rp;
1335         struct fileproc *fp;
1336         int i, error;
1337         int oldfds;
1338
1339         /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1340         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1341             (unsigned long)cm->cmsg_len != (unsigned long)control->m_len) {
1342                 return (EINVAL);
1343         }
1344         oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1345
1346         proc_fdlock(p);
1347         rp = (struct fileglob **)(cm + 1);
1348
1349         for (i = 0; i < oldfds; i++) {
1350                 if ((error = fdgetf_noref(p, *(int *)rp++, NULL)) != 0) {
1351                         proc_fdunlock(p);
1352                         return (error);
1353                 }
1354         }
1355         rp = (struct fileglob **)(cm + 1);
1356
1357         for (i = 0; i < oldfds; i++) {
1358                 (void) fdgetf_noref(p, *(int *)rp, &fp);
1359                 fg_insertuipc(fp->f_fglob);
1360                 *rp++ = fp->f_fglob;
1361                 (void) OSAddAtomic(1, (volatile SInt32 *)&unp_rights);
1362         }
1363         proc_fdunlock(p);
1364
1365         return (0);
1366 }
1367
1368 static int      unp_defer, unp_gcing, unp_gcwait;
1369 static thread_t unp_gcthread = NULL;
1370
1371 /* always called under uipc_lock */
1372 void
1373 unp_gc_wait(void)
1374 {
1375         if (unp_gcthread == current_thread())
1376                 return;
1377
1378         while (unp_gcing != 0) {
1379                 unp_gcwait = 1;
1380                 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
1381         }
1382 }
1383
1384
1385 __private_extern__ void
1386 unp_gc(void)
1387 {
1388         struct fileglob *fg, *nextfg;
1389         struct socket *so;
1390         static struct fileglob **extra_ref;
1391         struct fileglob **fpp;
1392         int nunref, i;
1393         int need_gcwakeup = 0;
1394
1395         lck_mtx_lock(uipc_lock);
1396         if (unp_gcing) {
1397                 lck_mtx_unlock(uipc_lock);
1398                 return;
1399         }
1400         unp_gcing = 1;
1401         unp_defer = 0;
1402         unp_gcthread = current_thread();
1403         lck_mtx_unlock(uipc_lock);
1404         /*
1405          * before going through all this, set all FDs to
1406          * be NOT defered and NOT externally accessible
1407          */
1408         for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
1409                 lck_mtx_lock(&fg->fg_lock);
1410                 fg->fg_flag &= ~(FMARK|FDEFER);
1411                 lck_mtx_unlock(&fg->fg_lock);
1412         }
1413         do {
1414                 for (fg = fmsghead.lh_first; fg != 0;
1415                     fg = fg->f_msglist.le_next) {
1416                         lck_mtx_lock(&fg->fg_lock);
1417                         /*
1418                          * If the file is not open, skip it
1419                          */
1420                         if (fg->fg_count == 0) {
1421                                 lck_mtx_unlock(&fg->fg_lock);
1422                                 continue;
1423                         }
1424                         /*
1425                          * If we already marked it as 'defer'  in a
1426                          * previous pass, then try process it this time
1427                          * and un-mark it
1428                          */
1429                         if (fg->fg_flag & FDEFER) {
1430                                 fg->fg_flag &= ~FDEFER;
1431                                 unp_defer--;
1432                         } else {
1433                                 /*
1434                                  * if it's not defered, then check if it's
1435                                  * already marked.. if so skip it
1436                                  */
1437                                 if (fg->fg_flag & FMARK) {
1438                                         lck_mtx_unlock(&fg->fg_lock);
1439                                         continue;
1440                                 }
1441                                 /*
1442                                  * If all references are from messages
1443                                  * in transit, then skip it. it's not
1444                                  * externally accessible.
1445                                  */
1446                                 if (fg->fg_count == fg->fg_msgcount) {
1447                                         lck_mtx_unlock(&fg->fg_lock);
1448                                         continue;
1449                                 }
1450                                 /*
1451                                  * If it got this far then it must be
1452                                  * externally accessible.
1453                                  */
1454                                 fg->fg_flag |= FMARK;
1455                         }
1456                         /*
1457                          * either it was defered, or it is externally
1458                          * accessible and not already marked so.
1459                          * Now check if it is possibly one of OUR sockets.
1460                          */
1461                         if (fg->fg_type != DTYPE_SOCKET ||
1462                             (so = (struct socket *)fg->fg_data) == 0) {
1463                                 lck_mtx_unlock(&fg->fg_lock);
1464                                 continue;
1465                         }
1466                         if (so->so_proto->pr_domain != &localdomain ||
1467                             (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
1468                                 lck_mtx_unlock(&fg->fg_lock);
1469                                 continue;
1470                         }
1471 #ifdef notdef
1472                         /*
1473                          * if this code is enabled need to run
1474                          * under network funnel
1475                          */
1476                         if (so->so_rcv.sb_flags & SB_LOCK) {
1477                                 /*
1478                                  * This is problematical; it's not clear
1479                                  * we need to wait for the sockbuf to be
1480                                  * unlocked (on a uniprocessor, at least),
1481                                  * and it's also not clear what to do
1482                                  * if sbwait returns an error due to receipt
1483                                  * of a signal.  If sbwait does return
1484                                  * an error, we'll go into an infinite
1485                                  * loop.  Delete all of this for now.
1486                                  */
1487                                 (void) sbwait(&so->so_rcv);
1488                                 goto restart;
1489                         }
1490 #endif
1491                         /*
1492                          * So, Ok, it's one of our sockets and it IS externally
1493                          * accessible (or was defered). Now we look
1494                          * to see if we hold any file descriptors in its
1495                          * message buffers. Follow those links and mark them
1496                          * as accessible too.
1497                          *
1498                          * In case a file is passed onto itself we need to
1499                          * release the file lock.
1500                          */
1501                         lck_mtx_unlock(&fg->fg_lock);
1502
1503                         unp_scan(so->so_rcv.sb_mb, unp_mark);
1504                 }
1505         } while (unp_defer);
1506         /*
1507          * We grab an extra reference to each of the file table entries
1508          * that are not otherwise accessible and then free the rights
1509          * that are stored in messages on them.
1510          *
1511          * The bug in the orginal code is a little tricky, so I'll describe
1512          * what's wrong with it here.
1513          *
1514          * It is incorrect to simply unp_discard each entry for f_msgcount
1515          * times -- consider the case of sockets A and B that contain
1516          * references to each other.  On a last close of some other socket,
1517          * we trigger a gc since the number of outstanding rights (unp_rights)
1518          * is non-zero.  If during the sweep phase the gc code un_discards,
1519          * we end up doing a (full) closef on the descriptor.  A closef on A
1520          * results in the following chain.  Closef calls soo_close, which
1521          * calls soclose.   Soclose calls first (through the switch
1522          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
1523          * returns because the previous instance had set unp_gcing, and
1524          * we return all the way back to soclose, which marks the socket
1525          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
1526          * to free up the rights that are queued in messages on the socket A,
1527          * i.e., the reference on B.  The sorflush calls via the dom_dispose
1528          * switch unp_dispose, which unp_scans with unp_discard.  This second
1529          * instance of unp_discard just calls closef on B.
1530          *
1531          * Well, a similar chain occurs on B, resulting in a sorflush on B,
1532          * which results in another closef on A.  Unfortunately, A is already
1533          * being closed, and the descriptor has already been marked with
1534          * SS_NOFDREF, and soclose panics at this point.
1535          *
1536          * Here, we first take an extra reference to each inaccessible
1537          * descriptor.  Then, we call sorflush ourself, since we know
1538          * it is a Unix domain socket anyhow.  After we destroy all the
1539          * rights carried in messages, we do a last closef to get rid
1540          * of our extra reference.  This is the last close, and the
1541          * unp_detach etc will shut down the socket.
1542          *
1543          * 91/09/19, bsy@cs.cmu.edu
1544          */
1545         extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
1546             M_FILEGLOB, M_WAITOK);
1547         for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
1548             fg = nextfg) {
1549                 lck_mtx_lock(&fg->fg_lock);
1550
1551                 nextfg = fg->f_msglist.le_next;
1552                 /*
1553                  * If it's not open, skip it
1554                  */
1555                 if (fg->fg_count == 0) {
1556                         lck_mtx_unlock(&fg->fg_lock);
1557                         continue;
1558                 }
1559                 /*
1560                  * If all refs are from msgs, and it's not marked accessible
1561                  * then it must be referenced from some unreachable cycle
1562                  * of (shut-down) FDs, so include it in our
1563                  * list of FDs to remove
1564                  */
1565                 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
1566                         fg->fg_count++;
1567                         *fpp++ = fg;
1568                         nunref++;
1569                 }
1570                 lck_mtx_unlock(&fg->fg_lock);
1571         }
1572         /*
1573          * for each FD on our hit list, do the following two things
1574          */
1575         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1576                 struct fileglob *tfg;
1577
1578                 tfg = *fpp;
1579
1580                 if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
1581                         so = (struct socket *)(tfg->fg_data);
1582
1583                         socket_lock(so, 0);
1584
1585                         sorflush(so);
1586
1587                         socket_unlock(so, 0);
1588                 }
1589         }
1590         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
1591                 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
1592
1593         lck_mtx_lock(uipc_lock);
1594         unp_gcing = 0;
1595         unp_gcthread = NULL;
1596
1597         if (unp_gcwait != 0) {
1598                 unp_gcwait = 0;
1599                 need_gcwakeup = 1;
1600         }
1601         lck_mtx_unlock(uipc_lock);
1602
1603         if (need_gcwakeup != 0)
1604                 wakeup(&unp_gcing);
1605         FREE((caddr_t)extra_ref, M_FILEGLOB);
1606 }
1607
1608 void
1609 unp_dispose(struct mbuf *m)
1610 {
1611         if (m) {
1612                 unp_scan(m, unp_discard);
1613         }
1614 }
1615
1616 /*
1617  * Returns:     0                       Success
1618  */
1619 static int
1620 unp_listen(struct unpcb *unp, proc_t p)
1621 {
1622         kauth_cred_t safecred = kauth_cred_proc_ref(p);
1623         cru2x(safecred, &unp->unp_peercred);
1624         kauth_cred_unref(&safecred);
1625         unp->unp_flags |= UNP_HAVEPCCACHED;
1626         return (0);
1627 }
1628
1629 /* should run under kernel funnel */
1630 static void
1631 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
1632 {
1633         struct mbuf *m;
1634         struct fileglob **rp;
1635         struct cmsghdr *cm;
1636         int i;
1637         int qfds;
1638
1639         while (m0) {
1640                 for (m = m0; m; m = m->m_next)
1641                         if (m->m_type == MT_CONTROL &&
1642                             (size_t)m->m_len >= sizeof (*cm)) {
1643                                 cm = mtod(m, struct cmsghdr *);
1644                                 if (cm->cmsg_level != SOL_SOCKET ||
1645                                     cm->cmsg_type != SCM_RIGHTS)
1646                                         continue;
1647                                 qfds = (cm->cmsg_len - sizeof (*cm)) /
1648                                     sizeof (struct fileglob *);
1649                                 rp = (struct fileglob **)(cm + 1);
1650                                 for (i = 0; i < qfds; i++)
1651                                         (*op)(*rp++);
1652                                 break;          /* XXX, but saves time */
1653                         }
1654                 m0 = m0->m_act;
1655         }
1656 }
1657
1658 /* should run under kernel funnel */
1659 static void
1660 unp_mark(struct fileglob *fg)
1661 {
1662         lck_mtx_lock(&fg->fg_lock);
1663
1664         if (fg->fg_flag & FMARK) {
1665                 lck_mtx_unlock(&fg->fg_lock);
1666                 return;
1667         }
1668         fg->fg_flag |= (FMARK|FDEFER);
1669
1670         lck_mtx_unlock(&fg->fg_lock);
1671
1672         unp_defer++;
1673 }
1674
1675 /* should run under kernel funnel */
1676 static void
1677 unp_discard(struct fileglob *fg)
1678 {
1679         proc_t p = current_proc();              /* XXX */
1680
1681         (void) OSAddAtomic(1, (volatile SInt32 *)&unp_disposed);
1682
1683         proc_fdlock(p);
1684         unp_discard_fdlocked(fg, p);
1685         proc_fdunlock(p);
1686 }
1687 static void
1688 unp_discard_fdlocked(struct fileglob *fg, proc_t p)
1689 {
1690         fg_removeuipc(fg);
1691
1692         (void) OSAddAtomic(-1, (volatile SInt32 *)&unp_rights);
1693         (void) closef_locked((struct fileproc *)0, fg, p);
1694 }