bsd/kern/uipc_usrreq.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
  61  */
  62 /*
  63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  64  * support for mandatory and extensible security protections.  This notice
  65  * is included in support of clause 2.2 (b) of the Apple Public License,
  66  * Version 2.0.
  67  */
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/kernel.h>
  72 #include <sys/domain.h>
  73 #include <sys/fcntl.h>
  74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
  75 #include <sys/file_internal.h>
  76 #include <sys/filedesc.h>
  77 #include <sys/lock.h>
  78 #include <sys/mbuf.h>
  79 #include <sys/namei.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/protosw.h>
  83 #include <sys/socket.h>
  84 #include <sys/socketvar.h>
  85 #include <sys/stat.h>
  86 #include <sys/sysctl.h>
  87 #include <sys/un.h>
  88 #include <sys/unpcb.h>
  89 #include <sys/vnode_internal.h>
  90 #include <sys/kdebug.h>
  91
  92 #include <kern/zalloc.h>
  93 #include <kern/locks.h>
  94
  95 #if CONFIG_MACF_SOCKET
  96 #include <security/mac_framework.h>
  97 #endif /* MAC_SOCKET */
  98
  99 #define f_msgcount f_fglob->fg_msgcount
 100 #define f_cred f_fglob->fg_cred
 101 #define f_ops f_fglob->fg_ops
 102 #define f_offset f_fglob->fg_offset
 103 #define f_data f_fglob->fg_data
 104 struct  zone *unp_zone;
 105 static  unp_gen_t unp_gencnt;
 106 static  u_int unp_count;
 107
 108 static  lck_attr_t              *unp_mtx_attr;
 109 static  lck_grp_t               *unp_mtx_grp;
 110 static  lck_grp_attr_t          *unp_mtx_grp_attr;
 111 static  lck_rw_t                *unp_list_mtx;
 112
 113 extern lck_mtx_t *uipc_lock;
 114 static  struct unp_head unp_shead, unp_dhead;
 115
 116 /*
 117  * Unix communications domain.
 118  *
 119  * TODO:
 120  *      SEQPACKET, RDM
 121  *      rethink name space problems
 122  *      need a proper out-of-band
 123  *      lock pushdown
 124  */
 125 static struct   sockaddr sun_noname = { sizeof (sun_noname), AF_LOCAL, { 0 } };
 126 static ino_t    unp_ino;                /* prototype for fake inode numbers */
 127
 128 static int      unp_attach(struct socket *);
 129 static void     unp_detach(struct unpcb *);
 130 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
 131 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
 132 static void     unp_disconnect(struct unpcb *);
 133 static void     unp_shutdown(struct unpcb *);
 134 static void     unp_drop(struct unpcb *, int);
 135 static void     unp_gc(void);
 136 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *));
 137 static void     unp_mark(struct fileglob *);
 138 static void     unp_discard(struct fileglob *);
 139 static void     unp_discard_fdlocked(struct fileglob *, proc_t);
 140 static int      unp_internalize(struct mbuf *, proc_t);
 141 static int      unp_listen(struct unpcb *, proc_t);
 142
 143 /* TODO: this should be in header file */
 144 extern int fdgetf_noref(proc_t, int, struct fileproc **);
 145
 146 static int
 147 uipc_abort(struct socket *so)
 148 {
 149         struct unpcb *unp = sotounpcb(so);
 150
 151         if (unp == 0)
 152                 return (EINVAL);
 153         unp_drop(unp, ECONNABORTED);
 154         unp_detach(unp);
 155         sofree(so);
 156         return (0);
 157 }
 158
 159 static int
 160 uipc_accept(struct socket *so, struct sockaddr **nam)
 161 {
 162         struct unpcb *unp = sotounpcb(so);
 163
 164         if (unp == 0)
 165                 return (EINVAL);
 166
 167         /*
 168          * Pass back name of connected socket,
 169          * if it was bound and we are still connected
 170          * (our peer may have closed already!).
 171          */
 172         if (unp->unp_conn && unp->unp_conn->unp_addr) {
 173                 *nam = dup_sockaddr((struct sockaddr *)
 174                     unp->unp_conn->unp_addr, 1);
 175         } else {
 176                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 177         }
 178         return (0);
 179 }
 180
 181 /*
 182  * Returns:     0                       Success
 183  *              EISCONN
 184  *      unp_attach:
 185  */
 186 static int
 187 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
 188 {
 189         struct unpcb *unp = sotounpcb(so);
 190
 191         if (unp != 0)
 192                 return (EISCONN);
 193         return (unp_attach(so));
 194 }
 195
 196 static int
 197 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
 198 {
 199         struct unpcb *unp = sotounpcb(so);
 200
 201         if (unp == 0)
 202                 return (EINVAL);
 203
 204         return (unp_bind(unp, nam, p));
 205 }
 206
 207 /*
 208  * Returns:     0                       Success
 209  *              EINVAL
 210  *      unp_connect:???                 [See elsewhere in this file]
 211  */
 212 static int
 213 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
 214 {
 215         struct unpcb *unp = sotounpcb(so);
 216
 217         if (unp == 0)
 218                 return (EINVAL);
 219         return (unp_connect(so, nam, p));
 220 }
 221
 222 /*
 223  * Returns:     0                       Success
 224  *              EINVAL
 225  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 226  *      unp_connect2:EINVAL             Invalid argument
 227  */
 228 static int
 229 uipc_connect2(struct socket *so1, struct socket *so2)
 230 {
 231         struct unpcb *unp = sotounpcb(so1);
 232
 233         if (unp == 0)
 234                 return (EINVAL);
 235
 236         return (unp_connect2(so1, so2));
 237 }
 238
 239 /* control is EOPNOTSUPP */
 240
 241 static int
 242 uipc_detach(struct socket *so)
 243 {
 244         struct unpcb *unp = sotounpcb(so);
 245
 246         if (unp == 0)
 247                 return (EINVAL);
 248
 249         unp_detach(unp);
 250         return (0);
 251 }
 252
 253 static int
 254 uipc_disconnect(struct socket *so)
 255 {
 256         struct unpcb *unp = sotounpcb(so);
 257
 258         if (unp == 0)
 259                 return (EINVAL);
 260         unp_disconnect(unp);
 261         return (0);
 262 }
 263
 264 /*
 265  * Returns:     0                       Success
 266  *              EINVAL
 267  */
 268 static int
 269 uipc_listen(struct socket *so, __unused proc_t p)
 270 {
 271         struct unpcb *unp = sotounpcb(so);
 272
 273         if (unp == 0 || unp->unp_vnode == 0)
 274                 return (EINVAL);
 275         return (unp_listen(unp, p));
 276 }
 277
 278 static int
 279 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 280 {
 281         struct unpcb *unp = sotounpcb(so);
 282
 283         if (unp == NULL)
 284                 return (EINVAL);
 285         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
 286                 *nam = dup_sockaddr((struct sockaddr *)
 287                     unp->unp_conn->unp_addr, 1);
 288         } else {
 289                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 290         }
 291         return (0);
 292 }
 293
 294 static int
 295 uipc_rcvd(struct socket *so, __unused int flags)
 296 {
 297         struct unpcb *unp = sotounpcb(so);
 298         struct socket *so2;
 299
 300         if (unp == 0)
 301                 return (EINVAL);
 302         switch (so->so_type) {
 303         case SOCK_DGRAM:
 304                 panic("uipc_rcvd DGRAM?");
 305                 /*NOTREACHED*/
 306
 307         case SOCK_STREAM:
 308 #define rcv (&so->so_rcv)
 309 #define snd (&so2->so_snd)
 310                 if (unp->unp_conn == 0)
 311                         break;
 312                 so2 = unp->unp_conn->unp_socket;
 313                 /*
 314                  * Adjust backpressure on sender
 315                  * and wakeup any waiting to write.
 316                  */
 317                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
 318                 unp->unp_mbcnt = rcv->sb_mbcnt;
 319                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
 320                 unp->unp_cc = rcv->sb_cc;
 321                 sowwakeup(so2);
 322 #undef snd
 323 #undef rcv
 324                 break;
 325
 326         default:
 327                 panic("uipc_rcvd unknown socktype");
 328         }
 329         return (0);
 330 }
 331
 332 /* pru_rcvoob is EOPNOTSUPP */
 333
 334 /*
 335  * Returns:     0                       Success
 336  *              EINVAL
 337  *              EOPNOTSUPP
 338  *              EPIPE
 339  *              ENOTCONN
 340  *              EISCONN
 341  *      unp_internalize:EINVAL
 342  *      unp_internalize:EBADF
 343  *      unp_connect:EAFNOSUPPORT        Address family not supported
 344  *      unp_connect:EINVAL              Invalid argument
 345  *      unp_connect:ENOTSOCK            Not a socket
 346  *      unp_connect:ECONNREFUSED        Connection refused
 347  *      unp_connect:EISCONN             Socket is connected
 348  *      unp_connect:EPROTOTYPE          Protocol wrong type for socket
 349  *      unp_connect:???
 350  *      sbappendaddr:ENOBUFS            [5th argument, contents modified]
 351  *      sbappendaddr:???                [whatever a filter author chooses]
 352  */
 353 static int
 354 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 355     struct mbuf *control, proc_t p)
 356 {
 357         int error = 0;
 358         struct unpcb *unp = sotounpcb(so);
 359         struct socket *so2;
 360
 361         if (unp == 0) {
 362                 error = EINVAL;
 363                 goto release;
 364         }
 365         if (flags & PRUS_OOB) {
 366                 error = EOPNOTSUPP;
 367                 goto release;
 368         }
 369
 370         if (control) {
 371                 /* release global lock to avoid deadlock (4436174) */
 372                 socket_unlock(so, 0);
 373                 error = unp_internalize(control, p);
 374                 socket_lock(so, 0);
 375                 if (error)
 376                         goto release;
 377         }
 378
 379         switch (so->so_type) {
 380         case SOCK_DGRAM:
 381         {
 382                 struct sockaddr *from;
 383
 384                 if (nam) {
 385                         if (unp->unp_conn) {
 386                                 error = EISCONN;
 387                                 break;
 388                         }
 389                         error = unp_connect(so, nam, p);
 390                         if (error)
 391                                 break;
 392                 } else {
 393                         if (unp->unp_conn == 0) {
 394                                 error = ENOTCONN;
 395                                 break;
 396                         }
 397                 }
 398                 so2 = unp->unp_conn->unp_socket;
 399                 if (unp->unp_addr)
 400                         from = (struct sockaddr *)unp->unp_addr;
 401                 else
 402                         from = &sun_noname;
 403                 /*
 404                  * sbappendaddr() will fail when the receiver runs out of
 405                  * space; in contrast to SOCK_STREAM, we will lose messages
 406                  * for the SOCK_DGRAM case when the receiver's queue overflows.
 407                  * SB_UNIX on the socket buffer implies that the callee will
 408                  * not free the control message, if any, because we would need
 409                  * to call unp_dispose() on it.
 410                  */
 411                 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
 412                         control = NULL;
 413                         sorwakeup(so2);
 414                 } else if (control != NULL && error == 0) {
 415                         /* A socket filter took control; don't touch it */
 416                         control = NULL;
 417                 }
 418                 m = NULL;
 419                 if (nam)
 420                         unp_disconnect(unp);
 421                 break;
 422         }
 423
 424         case SOCK_STREAM: {
 425                 int didreceive = 0;
 426 #define rcv (&so2->so_rcv)
 427 #define snd (&so->so_snd)
 428                 /* Connect if not connected yet. */
 429                 /*
 430                  * Note: A better implementation would complain
 431                  * if not equal to the peer's address.
 432                  */
 433                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 434                         if (nam) {
 435                                 error = unp_connect(so, nam, p);
 436                                 if (error)
 437                                         break;  /* XXX */
 438                         } else {
 439                                 error = ENOTCONN;
 440                                 break;
 441                         }
 442                 }
 443
 444                 if (so->so_state & SS_CANTSENDMORE) {
 445                         error = EPIPE;
 446                         break;
 447                 }
 448                 if (unp->unp_conn == 0)
 449                         panic("uipc_send connected but no connection?");
 450                 so2 = unp->unp_conn->unp_socket;
 451                 /*
 452                  * Send to paired receive port, and then reduce send buffer
 453                  * hiwater marks to maintain backpressure.  Wake up readers.
 454                  * SB_UNIX flag will allow new record to be appended to the
 455                  * receiver's queue even when it is already full.  It is
 456                  * possible, however, that append might fail.  In that case,
 457                  * we will need to call unp_dispose() on the control message;
 458                  * the callee will not free it since SB_UNIX is set.
 459                  */
 460                 didreceive = control ?
 461                     sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
 462
 463                 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
 464                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
 465                 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
 466                 unp->unp_conn->unp_cc = rcv->sb_cc;
 467                 if (didreceive) {
 468                         control = NULL;
 469                         sorwakeup(so2);
 470                 } else if (control != NULL && error == 0) {
 471                         /* A socket filter took control; don't touch it */
 472                         control = NULL;
 473                 }
 474                 m = NULL;
 475 #undef snd
 476 #undef rcv
 477                 }
 478                 break;
 479
 480         default:
 481                 panic("uipc_send unknown socktype");
 482         }
 483
 484         /*
 485          * SEND_EOF is equivalent to a SEND followed by
 486          * a SHUTDOWN.
 487          */
 488         if (flags & PRUS_EOF) {
 489                 socantsendmore(so);
 490                 unp_shutdown(unp);
 491         }
 492
 493         if (control && error != 0) {
 494                 socket_unlock(so, 0);
 495                 unp_dispose(control);
 496                 socket_lock(so, 0);
 497         }
 498
 499 release:
 500         if (control)
 501                 m_freem(control);
 502         if (m)
 503                 m_freem(m);
 504         return (error);
 505 }
 506
 507 static int
 508 uipc_sense(struct socket *so, void *ub, int isstat64)
 509 {
 510         struct unpcb *unp = sotounpcb(so);
 511         struct socket *so2;
 512         blksize_t blksize;
 513
 514         if (unp == 0)
 515                 return (EINVAL);
 516
 517         blksize = so->so_snd.sb_hiwat;
 518         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
 519                 so2 = unp->unp_conn->unp_socket;
 520                 blksize += so2->so_rcv.sb_cc;
 521         }
 522         if (unp->unp_ino == 0)
 523                 unp->unp_ino = unp_ino++;
 524
 525         if (isstat64 != 0) {
 526                 struct stat64  *sb64;
 527
 528                 sb64 = (struct stat64 *)ub;
 529                 sb64->st_blksize = blksize;
 530                 sb64->st_dev = NODEV;
 531                 sb64->st_ino = (ino64_t)unp->unp_ino;
 532         } else {
 533                 struct stat *sb;
 534
 535                 sb = (struct stat *)ub;
 536                 sb->st_blksize = blksize;
 537                 sb->st_dev = NODEV;
 538                 sb->st_ino = (ino_t)unp->unp_ino;
 539         }
 540
 541         return (0);
 542 }
 543
 544 /*
 545  * Returns:     0               Success
 546  *              EINVAL
 547  *
 548  * Notes:       This is not strictly correct, as unp_shutdown() also calls
 549  *              socantrcvmore().  These should maybe both be conditionalized
 550  *              on the 'how' argument in soshutdown() as called from the
 551  *              shutdown() system call.
 552  */
 553 static int
 554 uipc_shutdown(struct socket *so)
 555 {
 556         struct unpcb *unp = sotounpcb(so);
 557
 558         if (unp == 0)
 559                 return (EINVAL);
 560         socantsendmore(so);
 561         unp_shutdown(unp);
 562         return (0);
 563 }
 564
 565 /*
 566  * Returns:     0                       Success
 567  *              EINVAL                  Invalid argument
 568  */
 569 static int
 570 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 571 {
 572         struct unpcb *unp = sotounpcb(so);
 573
 574         if (unp == NULL)
 575                 return (EINVAL);
 576         if (unp->unp_addr != NULL) {
 577                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
 578         } else {
 579                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
 580         }
 581         return (0);
 582 }
 583
 584 struct pr_usrreqs uipc_usrreqs = {
 585         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
 586         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
 587         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
 588         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
 589         sosend, soreceive, pru_sopoll_notsupp
 590 };
 591
 592 int
 593 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 594 {
 595         struct unpcb *unp = sotounpcb(so);
 596         int error;
 597
 598         switch (sopt->sopt_dir) {
 599         case SOPT_GET:
 600                 switch (sopt->sopt_name) {
 601                 case LOCAL_PEERCRED:
 602                         if (unp->unp_flags & UNP_HAVEPC) {
 603                                 error = sooptcopyout(sopt, &unp->unp_peercred,
 604                                     sizeof (unp->unp_peercred));
 605                         } else {
 606                                 if (so->so_type == SOCK_STREAM)
 607                                         error = ENOTCONN;
 608                                 else
 609                                         error = EINVAL;
 610                         }
 611                         break;
 612                 default:
 613                         error = EOPNOTSUPP;
 614                         break;
 615                 }
 616                 break;
 617         case SOPT_SET:
 618         default:
 619                 error = EOPNOTSUPP;
 620                 break;
 621         }
 622         return (error);
 623 }
 624
 625 /*
 626  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
 627  * for stream sockets, although the total for sender and receiver is
 628  * actually only PIPSIZ.
 629  * Datagram sockets really use the sendspace as the maximum datagram size,
 630  * and don't really want to reserve the sendspace.  Their recvspace should
 631  * be large enough for at least one max-size datagram plus address.
 632  */
 633 #ifndef PIPSIZ
 634 #define PIPSIZ  8192
 635 #endif
 636 static u_long   unpst_sendspace = PIPSIZ;
 637 static u_long   unpst_recvspace = PIPSIZ;
 638 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
 639 static u_long   unpdg_recvspace = 4*1024;
 640
 641 static int      unp_rights;                     /* file descriptors in flight */
 642 static int      unp_disposed;                   /* discarded file descriptors */
 643
 644 SYSCTL_DECL(_net_local_stream);
 645 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 646    &unpst_sendspace, 0, "");
 647 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 648    &unpst_recvspace, 0, "");
 649 SYSCTL_DECL(_net_local_dgram);
 650 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 651    &unpdg_sendspace, 0, "");
 652 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 653    &unpdg_recvspace, 0, "");
 654 SYSCTL_DECL(_net_local);
 655 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
 656
 657 /*
 658  * Returns:     0                       Success
 659  *              ENOBUFS
 660  *      soreserve:ENOBUFS
 661  */
 662 static int
 663 unp_attach(struct socket *so)
 664 {
 665         struct unpcb *unp;
 666         int error = 0;
 667
 668         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 669                 switch (so->so_type) {
 670
 671                 case SOCK_STREAM:
 672                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
 673                         break;
 674
 675                 case SOCK_DGRAM:
 676                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
 677                         break;
 678
 679                 default:
 680                         panic("unp_attach");
 681                 }
 682                 if (error)
 683                         return (error);
 684         }
 685         unp = (struct unpcb *)zalloc(unp_zone);
 686         if (unp == NULL)
 687                 return (ENOBUFS);
 688         bzero(unp, sizeof (*unp));
 689         lck_rw_lock_exclusive(unp_list_mtx);
 690         LIST_INIT(&unp->unp_refs);
 691         unp->unp_socket = so;
 692         unp->unp_gencnt = ++unp_gencnt;
 693         unp_count++;
 694         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
 695             &unp_dhead : &unp_shead, unp, unp_link);
 696         so->so_pcb = (caddr_t)unp;
 697         /*
 698          * Mark AF_UNIX socket buffers accordingly so that:
 699          *
 700          * a. In the SOCK_STREAM case, socket buffer append won't fail due to
 701          *    the lack of space; this essentially loosens the sbspace() check,
 702          *    since there is disconnect between sosend() and uipc_send() with
 703          *    respect to flow control that might result in our dropping the
 704          *    data in uipc_send().  By setting this, we allow for slightly
 705          *    more records to be appended to the receiving socket to avoid
 706          *    losing data (which we can't afford in the SOCK_STREAM case).
 707          *    Flow control still takes place since we adjust the sender's
 708          *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
 709          *    case and append would still fail when the queue overflows.
 710          *
 711          * b. In the presence of control messages containing internalized
 712          *    file descriptors, the append routines will not free them since
 713          *    we'd need to undo the work first via unp_dispose().
 714          */
 715         so->so_rcv.sb_flags |= SB_UNIX;
 716         so->so_snd.sb_flags |= SB_UNIX;
 717         lck_rw_done(unp_list_mtx);
 718         return (0);
 719 }
 720
 721 static void
 722 unp_detach(struct unpcb *unp)
 723 {
 724         lck_rw_lock_exclusive(unp_list_mtx);
 725         LIST_REMOVE(unp, unp_link);
 726         unp->unp_gencnt = ++unp_gencnt;
 727         lck_rw_done(unp_list_mtx);
 728         --unp_count;
 729         if (unp->unp_vnode) {
 730                 struct vnode *tvp = unp->unp_vnode;
 731                 unp->unp_vnode->v_socket = NULL;
 732                 unp->unp_vnode = NULL;
 733                 vnode_rele(tvp);                /* drop the usecount */
 734         }
 735         if (unp->unp_conn)
 736                 unp_disconnect(unp);
 737         while (unp->unp_refs.lh_first)
 738                 unp_drop(unp->unp_refs.lh_first, ECONNRESET);
 739         soisdisconnected(unp->unp_socket);
 740         /* makes sure we're getting dealloced */
 741         unp->unp_socket->so_flags |= SOF_PCBCLEARING;
 742         unp->unp_socket->so_pcb = NULL;
 743         if (unp_rights) {
 744                 /*
 745                  * Normally the receive buffer is flushed later,
 746                  * in sofree, but if our receive buffer holds references
 747                  * to descriptors that are now garbage, we will dispose
 748                  * of those descriptor references after the garbage collector
 749                  * gets them (resulting in a "panic: closef: count < 0").
 750                  */
 751                 sorflush(unp->unp_socket);
 752                 unp_gc();
 753         }
 754         if (unp->unp_addr)
 755                 FREE(unp->unp_addr, M_SONAME);
 756         zfree(unp_zone, unp);
 757 }
 758
 759 /*
 760  * Returns:     0                       Success
 761  *              EAFNOSUPPORT
 762  *              EINVAL
 763  *              EADDRINUSE
 764  *              namei:???               [anything namei can return]
 765  *              vnode_authorize:???     [anything vnode_authorize can return]
 766  *
 767  * Notes:       p at this point is the current process, as this function is
 768  *              only called by sobind().
 769  */
 770 static int
 771 unp_bind(
 772         struct unpcb *unp,
 773         struct sockaddr *nam,
 774         proc_t p)
 775 {
 776         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 777         struct vnode *vp, *dvp;
 778         struct vnode_attr va;
 779         vfs_context_t ctx = vfs_context_current();
 780         int error, namelen;
 781         struct nameidata nd;
 782         char buf[SOCK_MAXADDRLEN];
 783
 784         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
 785                 return (EAFNOSUPPORT);
 786         }
 787
 788         if (unp->unp_vnode != NULL)
 789                 return (EINVAL);
 790         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 791         if (namelen <= 0)
 792                 return (EINVAL);
 793
 794         strlcpy(buf, soun->sun_path, namelen+1);
 795         NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE32,
 796             CAST_USER_ADDR_T(buf), ctx);
 797         /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 798         error = namei(&nd);
 799         if (error) {
 800                 return (error);
 801         }
 802         dvp = nd.ni_dvp;
 803         vp = nd.ni_vp;
 804
 805         if (vp != NULL) {
 806                 /*
 807                  * need to do this before the vnode_put of dvp
 808                  * since we may have to release an fs_nodelock
 809                  */
 810                 nameidone(&nd);
 811
 812                 vnode_put(dvp);
 813                 vnode_put(vp);
 814
 815                 return (EADDRINUSE);
 816         }
 817
 818         VATTR_INIT(&va);
 819         VATTR_SET(&va, va_type, VSOCK);
 820         VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));
 821
 822 #if CONFIG_MACF_SOCKET
 823         /*
 824          * This is #if MAC_SOCKET, because it affects the connection rate
 825          * of Unix domain dockets that is critical for server performance
 826          */
 827         error = mac_vnode_check_create(ctx,
 828             nd.ni_dvp, &nd.ni_cnd, &va);
 829
 830         if (error == 0)
 831 #endif /* MAC_SOCKET */
 832         /* authorize before creating */
 833         error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
 834
 835         if (!error) {
 836                 /* create the socket */
 837                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
 838         }
 839
 840         nameidone(&nd);
 841         vnode_put(dvp);
 842
 843         if (error) {
 844                 return (error);
 845         }
 846         vnode_ref(vp);  /* gain a longterm reference */
 847         vp->v_socket = unp->unp_socket;
 848         unp->unp_vnode = vp;
 849         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
 850         vnode_put(vp);          /* drop the iocount */
 851
 852         return (0);
 853 }
 854
 855
 856 /*
 857  * Returns:     0                       Success
 858  *              EAFNOSUPPORT            Address family not supported
 859  *              EINVAL                  Invalid argument
 860  *              ENOTSOCK                Not a socket
 861  *              ECONNREFUSED            Connection refused
 862  *              EPROTOTYPE              Protocol wrong type for socket
 863  *              EISCONN                 Socket is connected
 864  *      unp_connect2:EPROTOTYPE         Protocol wrong type for socket
 865  *      unp_connect2:EINVAL             Invalid argument
 866  *      namei:???                       [anything namei can return]
 867  *      vnode_authorize:????            [anything vnode_authorize can return]
 868  *
 869  * Notes:       p at this point is the current process, as this function is
 870  *              only called by sosend(), sendfile(), and soconnectlock().
 871  */
 872 static int
 873 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 874 {
 875         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 876         struct vnode *vp;
 877         struct socket *so2, *so3;
 878         struct unpcb *unp, *unp2, *unp3;
 879         vfs_context_t ctx = vfs_context_current();
 880         int error, len;
 881         struct nameidata nd;
 882         char buf[SOCK_MAXADDRLEN];
 883
 884         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
 885                 return (EAFNOSUPPORT);
 886         }
 887
 888         so2 = so3 = NULL;
 889
 890         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 891         if (len <= 0)
 892                 return (EINVAL);
 893
 894         strlcpy(buf, soun->sun_path, len+1);
 895
 896         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32,
 897             CAST_USER_ADDR_T(buf), ctx);
 898         error = namei(&nd);
 899         if (error) {
 900                 return (error);
 901         }
 902         nameidone(&nd);
 903         vp = nd.ni_vp;
 904         if (vp->v_type != VSOCK) {
 905                 error = ENOTSOCK;
 906                 goto bad;
 907         }
 908
 909         error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
 910         if (error)
 911                 goto bad;
 912         so2 = vp->v_socket;
 913         if (so2 == 0 || so2->so_pcb == NULL) {
 914                 error = ECONNREFUSED;
 915                 goto bad;
 916         }
 917
 918         /* make sure the socket can't go away while we're connecting */
 919         so2->so_usecount++;
 920
 921         if (so->so_type != so2->so_type) {
 922                 error = EPROTOTYPE;
 923                 goto bad;
 924         }
 925
 926         /*
 927          * Check if socket was connected while we were trying to
 928          * acquire the funnel.
 929          * XXX - probably shouldn't return an error for SOCK_DGRAM
 930          */
 931         if ((so->so_state & SS_ISCONNECTED) != 0) {
 932                 error = EISCONN;
 933                 goto bad;
 934         }
 935
 936         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 937                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
 938                     (so3 = sonewconn(so2, 0, nam)) == 0) {
 939                         error = ECONNREFUSED;
 940                         goto bad;
 941                 }
 942                 unp = sotounpcb(so);
 943                 unp2 = sotounpcb(so2);
 944                 unp3 = sotounpcb(so3);
 945                 if (unp2->unp_addr)
 946                         unp3->unp_addr = (struct sockaddr_un *)
 947                             dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
 948
 949                 /*
 950                  * unp_peercred management:
 951                  *
 952                  * The connecter's (client's) credentials are copied
 953                  * from its process structure at the time of connect()
 954                  * (which is now).
 955                  */
 956                 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
 957                 unp3->unp_flags |= UNP_HAVEPC;
 958                 /*
 959                  * The receiver's (server's) credentials are copied
 960                  * from the unp_peercred member of socket on which the
 961                  * former called listen(); unp_listen() cached that
 962                  * process's credentials at that time so we can use
 963                  * them now.
 964                  */
 965                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
 966                     ("unp_connect: listener without cached peercred"));
 967                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 968                     sizeof (unp->unp_peercred));
 969                 unp->unp_flags |= UNP_HAVEPC;
 970
 971 #if CONFIG_MACF_SOCKET
 972                 /* XXXMAC: recursive lock: SOCK_LOCK(so); */
 973                 mac_socketpeer_label_associate_socket(so, so3);
 974                 mac_socketpeer_label_associate_socket(so3, so);
 975                 /* XXXMAC: SOCK_UNLOCK(so); */
 976 #endif /* MAC_SOCKET */
 977                 so2->so_usecount--; /* drop reference taken on so2 */
 978                 so2 = so3;
 979                 so3->so_usecount++; /* make sure we keep it around */
 980         }
 981         error = unp_connect2(so, so2);
 982 bad:
 983         if (so2 != NULL)
 984                 so2->so_usecount--; /* release count on socket */
 985         vnode_put(vp);
 986         return (error);
 987 }
 988
 989 /*
 990  * Returns:     0                       Success
 991  *              EPROTOTYPE              Protocol wrong type for socket
 992  *              EINVAL                  Invalid argument
 993  */
 994 int
 995 unp_connect2(struct socket *so, struct socket *so2)
 996 {
 997         struct unpcb *unp = sotounpcb(so);
 998         struct unpcb *unp2;
 999
1000         if (so2->so_type != so->so_type)
1001                 return (EPROTOTYPE);
1002         unp2 = sotounpcb(so2);
1003
1004         /* Verify both sockets are still opened */
1005         if (unp == 0 || unp2 == 0)
1006                 return (EINVAL);
1007
1008         unp->unp_conn = unp2;
1009         switch (so->so_type) {
1010
1011         case SOCK_DGRAM:
1012                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1013                 soisconnected(so);
1014                 break;
1015
1016         case SOCK_STREAM:
1017                 /* This takes care of socketpair */
1018                 if (!(unp->unp_flags & UNP_HAVEPC) &&
1019                     !(unp2->unp_flags & UNP_HAVEPC)) {
1020                         cru2x(kauth_cred_get(), &unp->unp_peercred);
1021                         unp->unp_flags |= UNP_HAVEPC;
1022
1023                         cru2x(kauth_cred_get(), &unp2->unp_peercred);
1024                         unp2->unp_flags |= UNP_HAVEPC;
1025                 }
1026                 unp2->unp_conn = unp;
1027                 soisconnected(so);
1028                 soisconnected(so2);
1029                 break;
1030
1031         default:
1032                 panic("unp_connect2");
1033         }
1034         return (0);
1035 }
1036
1037 static void
1038 unp_disconnect(struct unpcb *unp)
1039 {
1040         struct unpcb *unp2 = unp->unp_conn;
1041
1042         if (unp2 == 0)
1043                 return;
1044         unp->unp_conn = NULL;
1045         switch (unp->unp_socket->so_type) {
1046
1047         case SOCK_DGRAM:
1048                 lck_rw_lock_exclusive(unp_list_mtx);
1049                 LIST_REMOVE(unp, unp_reflink);
1050                 lck_rw_done(unp_list_mtx);
1051                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1052                 break;
1053
1054         case SOCK_STREAM:
1055                 soisdisconnected(unp->unp_socket);
1056                 unp2->unp_conn = NULL;
1057                 soisdisconnected(unp2->unp_socket);
1058                 break;
1059         }
1060 }
1061
1062 #ifdef notdef
1063 void
1064 unp_abort(struct unpcb *unp)
1065 {
1066
1067         unp_detach(unp);
1068 }
1069 #endif
1070
1071 static int
1072 unp_pcblist SYSCTL_HANDLER_ARGS
1073 {
1074 #pragma unused(oidp,arg2)
1075         int error, i, n;
1076         struct unpcb *unp, **unp_list;
1077         unp_gen_t gencnt;
1078         struct xunpgen xug;
1079         struct unp_head *head;
1080
1081         lck_rw_lock_shared(unp_list_mtx);
1082         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1083
1084         /*
1085          * The process of preparing the PCB list is too time-consuming and
1086          * resource-intensive to repeat twice on every request.
1087          */
1088         if (req->oldptr == USER_ADDR_NULL) {
1089                 n = unp_count;
1090                 req->oldidx = 2 * sizeof (xug) + (n + n / 8) *
1091                     sizeof (struct xunpcb);
1092                 lck_rw_done(unp_list_mtx);
1093                 return (0);
1094         }
1095
1096         if (req->newptr != USER_ADDR_NULL) {
1097                 lck_rw_done(unp_list_mtx);
1098                 return (EPERM);
1099         }
1100
1101         /*
1102          * OK, now we're committed to doing something.
1103          */
1104         gencnt = unp_gencnt;
1105         n = unp_count;
1106
1107         bzero(&xug, sizeof (xug));
1108         xug.xug_len = sizeof (xug);
1109         xug.xug_count = n;
1110         xug.xug_gen = gencnt;
1111         xug.xug_sogen = so_gencnt;
1112         error = SYSCTL_OUT(req, &xug, sizeof (xug));
1113         if (error) {
1114                 lck_rw_done(unp_list_mtx);
1115                 return (error);
1116         }
1117
1118         /*
1119          * We are done if there is no pcb
1120          */
1121         if (n == 0)  {
1122                 lck_rw_done(unp_list_mtx);
1123                 return (0);
1124         }
1125
1126         MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list),
1127             M_TEMP, M_WAITOK);
1128         if (unp_list == 0) {
1129                 lck_rw_done(unp_list_mtx);
1130                 return (ENOMEM);
1131         }
1132
1133         for (unp = head->lh_first, i = 0; unp && i < n;
1134             unp = unp->unp_link.le_next) {
1135                 if (unp->unp_gencnt <= gencnt)
1136                         unp_list[i++] = unp;
1137         }
1138         n = i;                  /* in case we lost some during malloc */
1139
1140         error = 0;
1141         for (i = 0; i < n; i++) {
1142                 unp = unp_list[i];
1143                 if (unp->unp_gencnt <= gencnt) {
1144                         struct xunpcb xu;
1145
1146                         bzero(&xu, sizeof (xu));
1147                         xu.xu_len = sizeof (xu);
1148                         xu.xu_unpp = (struct  unpcb_compat *)unp;
1149                         /*
1150                          * XXX - need more locking here to protect against
1151                          * connect/disconnect races for SMP.
1152                          */
1153                         if (unp->unp_addr)
1154                                 bcopy(unp->unp_addr, &xu.xu_addr,
1155                                     unp->unp_addr->sun_len);
1156                         if (unp->unp_conn && unp->unp_conn->unp_addr)
1157                                 bcopy(unp->unp_conn->unp_addr,
1158                                     &xu.xu_caddr,
1159                                     unp->unp_conn->unp_addr->sun_len);
1160                         bcopy(unp, &xu.xu_unp, sizeof (xu.xu_unp));
1161                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
1162                         error = SYSCTL_OUT(req, &xu, sizeof (xu));
1163                 }
1164         }
1165         if (!error) {
1166                 /*
1167                  * Give the user an updated idea of our state.
1168                  * If the generation differs from what we told
1169                  * her before, she knows that something happened
1170                  * while we were processing this request, and it
1171                  * might be necessary to retry.
1172                  */
1173                 bzero(&xug, sizeof (xug));
1174                 xug.xug_len = sizeof (xug);
1175                 xug.xug_gen = unp_gencnt;
1176                 xug.xug_sogen = so_gencnt;
1177                 xug.xug_count = unp_count;
1178                 error = SYSCTL_OUT(req, &xug, sizeof (xug));
1179         }
1180         FREE(unp_list, M_TEMP);
1181         lck_rw_done(unp_list_mtx);
1182         return (error);
1183 }
1184
1185 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
1186             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1187             "List of active local datagram sockets");
1188 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
1189             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1190             "List of active local stream sockets");
1191
1192 static void
1193 unp_shutdown(struct unpcb *unp)
1194 {
1195         struct socket *so;
1196
1197         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
1198             (so = unp->unp_conn->unp_socket))
1199                 socantrcvmore(so);
1200 }
1201
1202 static void
1203 unp_drop(struct unpcb *unp, int errno)
1204 {
1205         struct socket *so = unp->unp_socket;
1206
1207         so->so_error = errno;
1208         unp_disconnect(unp);
1209 }
1210
1211 #ifdef notdef
1212 void
1213 unp_drain()
1214 {
1215
1216 }
1217 #endif
1218
1219 /*
1220  * Returns:     0                       Success
1221  *              EMSGSIZE                The new fd's will not fit
1222  *              ENOBUFS                 Cannot alloc struct fileproc
1223  */
1224 int
1225 unp_externalize(struct mbuf *rights)
1226 {
1227         proc_t p = current_proc();              /* XXX */
1228         int i;
1229         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1230         struct fileglob **rp = (struct fileglob **)(cm + 1);
1231         struct fileproc *fp;
1232         struct fileglob *fg;
1233         int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1234         int f;
1235
1236         proc_fdlock(p);
1237
1238         /*
1239          * if the new FD's will not fit, then we free them all
1240          */
1241         if (!fdavail(p, newfds)) {
1242                 for (i = 0; i < newfds; i++) {
1243                         fg = *rp;
1244                         unp_discard_fdlocked(fg, p);
1245                         *rp++ = NULL;
1246                 }
1247                 proc_fdunlock(p);
1248
1249                 return (EMSGSIZE);
1250         }
1251         /*
1252          * now change each pointer to an fd in the global table to
1253          * an integer that is the index to the local fd table entry
1254          * that we set up to point to the global one we are transferring.
1255          * XXX (1) this assumes a pointer and int are the same size...!
1256          * XXX (2) allocation failures should be non-fatal
1257          */
1258         for (i = 0; i < newfds; i++) {
1259 #if CONFIG_MACF_SOCKET
1260                 /*
1261                  * If receive access is denied, don't pass along
1262                  * and error message, just discard the descriptor.
1263                  */
1264                 if (mac_file_check_receive(kauth_cred_get(), *rp)) {
1265                         fg = *rp;
1266                         *rp++ = 0;
1267                         unp_discard_fdlocked(fg, p);
1268                         continue;
1269                 }
1270 #endif
1271                 if (fdalloc(p, 0, &f))
1272                         panic("unp_externalize:fdalloc");
1273                 fg = *rp;
1274                 MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc),
1275                     M_FILEPROC, M_WAITOK);
1276                 if (fp == NULL)
1277                         panic("unp_externalize: MALLOC_ZONE");
1278                 bzero(fp, sizeof (struct fileproc));
1279                 fp->f_iocount = 0;
1280                 fp->f_fglob = fg;
1281                 fg_removeuipc(fg);
1282                 procfdtbl_releasefd(p, f, fp);
1283                 (void) OSAddAtomic(-1, (volatile SInt32 *)&unp_rights);
1284                 *(int *)rp++ = f;
1285         }
1286         proc_fdunlock(p);
1287
1288         return (0);
1289 }
1290
1291 void
1292 unp_init(void)
1293 {
1294         unp_zone = zinit(sizeof (struct unpcb),
1295             (nmbclusters * sizeof (struct unpcb)), 4096, "unpzone");
1296
1297         if (unp_zone == 0)
1298                 panic("unp_init");
1299         LIST_INIT(&unp_dhead);
1300         LIST_INIT(&unp_shead);
1301
1302         /*
1303          * allocate lock group attribute and group for udp pcb mutexes
1304          */
1305         unp_mtx_grp_attr = lck_grp_attr_alloc_init();
1306
1307         unp_mtx_grp = lck_grp_alloc_init("unp_list", unp_mtx_grp_attr);
1308
1309         unp_mtx_attr = lck_attr_alloc_init();
1310
1311         if ((unp_list_mtx = lck_rw_alloc_init(unp_mtx_grp,
1312             unp_mtx_attr)) == NULL)
1313                 return; /* pretty much dead if this fails... */
1314
1315 }
1316
1317 #ifndef MIN
1318 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
1319 #endif
1320
1321 /*
1322  * Returns:     0                       Success
1323  *              EINVAL
1324  *      fdgetf_noref:EBADF
1325  */
1326 static int
1327 unp_internalize(struct mbuf *control, proc_t p)
1328 {
1329         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1330         struct fileglob **rp;
1331         struct fileproc *fp;
1332         int i, error;
1333         int oldfds;
1334
1335         /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
1336         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1337             (unsigned long)cm->cmsg_len != (unsigned long)control->m_len) {
1338                 return (EINVAL);
1339         }
1340         oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
1341
1342         proc_fdlock(p);
1343         rp = (struct fileglob **)(cm + 1);
1344
1345         for (i = 0; i < oldfds; i++) {
1346                 if ((error = fdgetf_noref(p, *(int *)rp++, NULL)) != 0) {
1347                         proc_fdunlock(p);
1348                         return (error);
1349                 }
1350         }
1351         rp = (struct fileglob **)(cm + 1);
1352
1353         for (i = 0; i < oldfds; i++) {
1354                 (void) fdgetf_noref(p, *(int *)rp, &fp);
1355                 fg_insertuipc(fp->f_fglob);
1356                 *rp++ = fp->f_fglob;
1357                 (void) OSAddAtomic(1, (volatile SInt32 *)&unp_rights);
1358         }
1359         proc_fdunlock(p);
1360
1361         return (0);
1362 }
1363
1364 static int      unp_defer, unp_gcing, unp_gcwait;
1365
1366 /* always called under uipc_lock */
1367 void
1368 unp_gc_wait(void)
1369 {
1370         while (unp_gcing != 0) {
1371                 unp_gcwait = 1;
1372                 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
1373         }
1374 }
1375
1376
1377 static void
1378 unp_gc(void)
1379 {
1380         struct fileglob *fg, *nextfg;
1381         struct socket *so;
1382         struct fileglob **extra_ref, **fpp;
1383         int nunref, i;
1384         int need_gcwakeup = 0;
1385
1386         lck_mtx_lock(uipc_lock);
1387         if (unp_gcing) {
1388                 lck_mtx_unlock(uipc_lock);
1389                 return;
1390         }
1391         unp_gcing = 1;
1392         unp_defer = 0;
1393         lck_mtx_unlock(uipc_lock);
1394         /*
1395          * before going through all this, set all FDs to
1396          * be NOT defered and NOT externally accessible
1397          */
1398         for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {
1399                 lck_mtx_lock(&fg->fg_lock);
1400                 fg->fg_flag &= ~(FMARK|FDEFER);
1401                 lck_mtx_unlock(&fg->fg_lock);
1402         }
1403         do {
1404                 for (fg = fmsghead.lh_first; fg != 0;
1405                     fg = fg->f_msglist.le_next) {
1406                         lck_mtx_lock(&fg->fg_lock);
1407                         /*
1408                          * If the file is not open, skip it
1409                          */
1410                         if (fg->fg_count == 0) {
1411                                 lck_mtx_unlock(&fg->fg_lock);
1412                                 continue;
1413                         }
1414                         /*
1415                          * If we already marked it as 'defer'  in a
1416                          * previous pass, then try process it this time
1417                          * and un-mark it
1418                          */
1419                         if (fg->fg_flag & FDEFER) {
1420                                 fg->fg_flag &= ~FDEFER;
1421                                 unp_defer--;
1422                         } else {
1423                                 /*
1424                                  * if it's not defered, then check if it's
1425                                  * already marked.. if so skip it
1426                                  */
1427                                 if (fg->fg_flag & FMARK) {
1428                                         lck_mtx_unlock(&fg->fg_lock);
1429                                         continue;
1430                                 }
1431                                 /*
1432                                  * If all references are from messages
1433                                  * in transit, then skip it. it's not
1434                                  * externally accessible.
1435                                  */
1436                                 if (fg->fg_count == fg->fg_msgcount) {
1437                                         lck_mtx_unlock(&fg->fg_lock);
1438                                         continue;
1439                                 }
1440                                 /*
1441                                  * If it got this far then it must be
1442                                  * externally accessible.
1443                                  */
1444                                 fg->fg_flag |= FMARK;
1445                         }
1446                         /*
1447                          * either it was defered, or it is externally
1448                          * accessible and not already marked so.
1449                          * Now check if it is possibly one of OUR sockets.
1450                          */
1451                         if (fg->fg_type != DTYPE_SOCKET ||
1452                             (so = (struct socket *)fg->fg_data) == 0) {
1453                                 lck_mtx_unlock(&fg->fg_lock);
1454                                 continue;
1455                         }
1456                         if (so->so_proto->pr_domain != &localdomain ||
1457                             (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
1458                                 lck_mtx_unlock(&fg->fg_lock);
1459                                 continue;
1460                         }
1461 #ifdef notdef
1462                         /*
1463                          * if this code is enabled need to run
1464                          * under network funnel
1465                          */
1466                         if (so->so_rcv.sb_flags & SB_LOCK) {
1467                                 /*
1468                                  * This is problematical; it's not clear
1469                                  * we need to wait for the sockbuf to be
1470                                  * unlocked (on a uniprocessor, at least),
1471                                  * and it's also not clear what to do
1472                                  * if sbwait returns an error due to receipt
1473                                  * of a signal.  If sbwait does return
1474                                  * an error, we'll go into an infinite
1475                                  * loop.  Delete all of this for now.
1476                                  */
1477                                 (void) sbwait(&so->so_rcv);
1478                                 goto restart;
1479                         }
1480 #endif
1481                         /*
1482                          * So, Ok, it's one of our sockets and it IS externally
1483                          * accessible (or was defered). Now we look
1484                          * to see if we hold any file descriptors in its
1485                          * message buffers. Follow those links and mark them
1486                          * as accessible too.
1487                          */
1488                         unp_scan(so->so_rcv.sb_mb, unp_mark);
1489                         lck_mtx_unlock(&fg->fg_lock);
1490                 }
1491         } while (unp_defer);
1492         /*
1493          * We grab an extra reference to each of the file table entries
1494          * that are not otherwise accessible and then free the rights
1495          * that are stored in messages on them.
1496          *
1497          * The bug in the orginal code is a little tricky, so I'll describe
1498          * what's wrong with it here.
1499          *
1500          * It is incorrect to simply unp_discard each entry for f_msgcount
1501          * times -- consider the case of sockets A and B that contain
1502          * references to each other.  On a last close of some other socket,
1503          * we trigger a gc since the number of outstanding rights (unp_rights)
1504          * is non-zero.  If during the sweep phase the gc code un_discards,
1505          * we end up doing a (full) closef on the descriptor.  A closef on A
1506          * results in the following chain.  Closef calls soo_close, which
1507          * calls soclose.   Soclose calls first (through the switch
1508          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
1509          * returns because the previous instance had set unp_gcing, and
1510          * we return all the way back to soclose, which marks the socket
1511          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
1512          * to free up the rights that are queued in messages on the socket A,
1513          * i.e., the reference on B.  The sorflush calls via the dom_dispose
1514          * switch unp_dispose, which unp_scans with unp_discard.  This second
1515          * instance of unp_discard just calls closef on B.
1516          *
1517          * Well, a similar chain occurs on B, resulting in a sorflush on B,
1518          * which results in another closef on A.  Unfortunately, A is already
1519          * being closed, and the descriptor has already been marked with
1520          * SS_NOFDREF, and soclose panics at this point.
1521          *
1522          * Here, we first take an extra reference to each inaccessible
1523          * descriptor.  Then, we call sorflush ourself, since we know
1524          * it is a Unix domain socket anyhow.  After we destroy all the
1525          * rights carried in messages, we do a last closef to get rid
1526          * of our extra reference.  This is the last close, and the
1527          * unp_detach etc will shut down the socket.
1528          *
1529          * 91/09/19, bsy@cs.cmu.edu
1530          */
1531         extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *),
1532             M_FILEGLOB, M_WAITOK);
1533         for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0;
1534             fg = nextfg) {
1535                 lck_mtx_lock(&fg->fg_lock);
1536
1537                 nextfg = fg->f_msglist.le_next;
1538                 /*
1539                  * If it's not open, skip it
1540                  */
1541                 if (fg->fg_count == 0) {
1542                         lck_mtx_unlock(&fg->fg_lock);
1543                         continue;
1544                 }
1545                 /*
1546                  * If all refs are from msgs, and it's not marked accessible
1547                  * then it must be referenced from some unreachable cycle
1548                  * of (shut-down) FDs, so include it in our
1549                  * list of FDs to remove
1550                  */
1551                 if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) {
1552                         fg->fg_count++;
1553                         *fpp++ = fg;
1554                         nunref++;
1555                 }
1556                 lck_mtx_unlock(&fg->fg_lock);
1557         }
1558         /*
1559          * for each FD on our hit list, do the following two things
1560          */
1561         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1562                 struct fileglob *tfg;
1563
1564                 tfg = *fpp;
1565
1566                 if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
1567                         int locked = 0;
1568
1569                         so = (struct socket *)(tfg->fg_data);
1570
1571                         /* XXXX */
1572                         /* Assume local sockets use a global lock */
1573                         if (so->so_proto->pr_domain->dom_family != PF_LOCAL) {
1574                                 socket_lock(so, 0);
1575                                 locked = 1;
1576                         }
1577                         sorflush(so);
1578
1579                         if (locked)
1580                                 socket_unlock(so, 0);
1581                 }
1582         }
1583         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
1584                 closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL);
1585
1586         lck_mtx_lock(uipc_lock);
1587         unp_gcing = 0;
1588
1589         if (unp_gcwait != 0) {
1590                 unp_gcwait = 0;
1591                 need_gcwakeup = 1;
1592         }
1593         lck_mtx_unlock(uipc_lock);
1594
1595         if (need_gcwakeup != 0)
1596                 wakeup(&unp_gcing);
1597         FREE((caddr_t)extra_ref, M_FILEGLOB);
1598 }
1599
1600 void
1601 unp_dispose(struct mbuf *m)
1602 {
1603         if (m) {
1604                 unp_scan(m, unp_discard);
1605         }
1606 }
1607
1608 /*
1609  * Returns:     0                       Success
1610  */
1611 static int
1612 unp_listen(struct unpcb *unp, proc_t p)
1613 {
1614         kauth_cred_t safecred = kauth_cred_proc_ref(p);
1615         cru2x(safecred, &unp->unp_peercred);
1616         kauth_cred_unref(&safecred);
1617         unp->unp_flags |= UNP_HAVEPCCACHED;
1618         return (0);
1619 }
1620
1621 /* should run under kernel funnel */
1622 static void
1623 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *))
1624 {
1625         struct mbuf *m;
1626         struct fileglob **rp;
1627         struct cmsghdr *cm;
1628         int i;
1629         int qfds;
1630
1631         while (m0) {
1632                 for (m = m0; m; m = m->m_next)
1633                         if (m->m_type == MT_CONTROL &&
1634                             (size_t)m->m_len >= sizeof (*cm)) {
1635                                 cm = mtod(m, struct cmsghdr *);
1636                                 if (cm->cmsg_level != SOL_SOCKET ||
1637                                     cm->cmsg_type != SCM_RIGHTS)
1638                                         continue;
1639                                 qfds = (cm->cmsg_len - sizeof (*cm)) /
1640                                     sizeof (struct fileglob *);
1641                                 rp = (struct fileglob **)(cm + 1);
1642                                 for (i = 0; i < qfds; i++)
1643                                         (*op)(*rp++);
1644                                 break;          /* XXX, but saves time */
1645                         }
1646                 m0 = m0->m_act;
1647         }
1648 }
1649
1650 /* should run under kernel funnel */
1651 static void
1652 unp_mark(struct fileglob *fg)
1653 {
1654         lck_mtx_lock(&fg->fg_lock);
1655
1656         if (fg->fg_flag & FMARK) {
1657                 lck_mtx_unlock(&fg->fg_lock);
1658                 return;
1659         }
1660         fg->fg_flag |= (FMARK|FDEFER);
1661
1662         lck_mtx_unlock(&fg->fg_lock);
1663
1664         unp_defer++;
1665 }
1666
1667 /* should run under kernel funnel */
1668 static void
1669 unp_discard(struct fileglob *fg)
1670 {
1671         proc_t p = current_proc();              /* XXX */
1672
1673         (void) OSAddAtomic(1, (volatile SInt32 *)&unp_disposed);
1674
1675         proc_fdlock(p);
1676         unp_discard_fdlocked(fg, p);
1677         proc_fdunlock(p);
1678 }
1679 static void
1680 unp_discard_fdlocked(struct fileglob *fg, proc_t p)
1681 {
1682         fg_removeuipc(fg);
1683
1684         (void) OSAddAtomic(-1, (volatile SInt32 *)&unp_rights);
1685         (void) closef_locked((struct fileproc *)0, fg, p);
1686 }