bsd/kern/kern_descrip.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/file_internal.h>
  83 #include <sys/socket.h>
  84 #include <sys/socketvar.h>
  85 #include <sys/stat.h>
  86 #include <sys/ioctl.h>
  87 #include <sys/fcntl.h>
  88 #include <sys/malloc.h>
  89 #include <sys/mman.h>
  90 #include <sys/syslog.h>
  91 #include <sys/unistd.h>
  92 #include <sys/resourcevar.h>
  93 #include <sys/aio_kern.h>
  94 #include <sys/ev.h>
  95 #include <kern/lock.h>
  96 #include <sys/uio_internal.h>
  97
  98 #include <security/audit/audit.h>
  99
 100 #include <sys/mount_internal.h>
 101 #include <sys/kdebug.h>
 102 #include <sys/sysproto.h>
 103 #include <sys/pipe.h>
 104 #include <sys/spawn.h>
 105 #include <kern/kern_types.h>
 106 #include <kern/kalloc.h>
 107 #include <libkern/OSAtomic.h>
 108
 109 #include <sys/ubc_internal.h>
 110
 111 #include <kern/ipc_misc.h>
 112 #include <vm/vm_protos.h>
 113
 114 #include <mach/mach_port.h>
 115
 116 #if CONFIG_PROTECT
 117 #include <sys/cprotect.h>
 118 #endif
 119 #include <hfs/hfs.h>
 120
 121 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
 122     mach_msg_type_name_t, ipc_port_t *);
 123 void ipc_port_release_send(ipc_port_t);
 124
 125 struct psemnode;
 126 struct pshmnode;
 127
 128 static int finishdup(proc_t p,
 129     struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 130
 131 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
 132 void fg_drop(struct fileproc * fp);
 133 void fg_free(struct fileglob *fg);
 134 void fg_ref(struct fileproc * fp);
 135 void fileport_releasefg(struct fileglob *fg);
 136
 137 /* flags for close_internal_locked */
 138 #define FD_DUP2RESV 1
 139 static int close_internal_locked(struct proc *p, int fd, struct fileproc *fp, int flags);
 140
 141 static int closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx);
 142
 143 /* We don't want these exported */
 144 __private_extern__
 145 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 146
 147 __private_extern__
 148 int unlink1(vfs_context_t, struct nameidata *, int);
 149
 150 static void _fdrelse(struct proc * p, int fd);
 151
 152
 153 extern void file_lock_init(void) __attribute__((section("__TEXT, initcode")));
 154 extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat4, proc_t p);
 155 #if SOCKETS
 156 extern int soo_stat(struct socket *so, void *ub, int isstat64);
 157 #endif /* SOCKETS */
 158
 159 extern kauth_scope_t    kauth_scope_fileop;
 160
 161 extern int cs_debug;
 162
 163 /* Conflict wait queue for when selects collide (opaque type) */
 164 extern struct wait_queue select_conflict_queue;
 165
 166 #define f_flag f_fglob->fg_flag
 167 #define f_type f_fglob->fg_type
 168 #define f_msgcount f_fglob->fg_msgcount
 169 #define f_cred f_fglob->fg_cred
 170 #define f_ops f_fglob->fg_ops
 171 #define f_offset f_fglob->fg_offset
 172 #define f_data f_fglob->fg_data
 173 /*
 174  * Descriptor management.
 175  */
 176 struct fmsglist fmsghead;       /* head of list of open files */
 177 struct fmsglist fmsg_ithead;    /* head of list of open files */
 178 int nfiles;                     /* actual number of open files */
 179
 180
 181 lck_grp_attr_t * file_lck_grp_attr;
 182 lck_grp_t * file_lck_grp;
 183 lck_attr_t * file_lck_attr;
 184
 185 lck_mtx_t * uipc_lock;
 186
 187
 188 /*
 189  * file_lock_init
 190  *
 191  * Description: Initialize the file lock group and the uipc and flist locks
 192  *
 193  * Parameters:  (void)
 194  *
 195  * Returns:     void
 196  *
 197  * Notes:       Called at system startup from bsd_init().
 198  */
 199 void
 200 file_lock_init(void)
 201 {
 202         /* allocate file lock group attribute and group */
 203         file_lck_grp_attr= lck_grp_attr_alloc_init();
 204
 205         file_lck_grp = lck_grp_alloc_init("file",  file_lck_grp_attr);
 206
 207         /* Allocate file lock attribute */
 208         file_lck_attr = lck_attr_alloc_init();
 209
 210         uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
 211 }
 212
 213
 214 /*
 215  * proc_fdlock, proc_fdlock_spin
 216  *
 217  * Description: Lock to control access to the per process struct fileproc
 218  *              and struct filedesc
 219  *
 220  * Parameters:  p                               Process to take the lock on
 221  *
 222  * Returns:     void
 223  *
 224  * Notes:       The lock is initialized in forkproc() and destroyed in
 225  *              reap_child_process().
 226  */
 227 void
 228 proc_fdlock(proc_t p)
 229 {
 230         lck_mtx_lock(&p->p_fdmlock);
 231 }
 232
 233 void
 234 proc_fdlock_spin(proc_t p)
 235 {
 236         lck_mtx_lock_spin(&p->p_fdmlock);
 237 }
 238
 239 void
 240 proc_fdlock_assert(proc_t p, int assertflags)
 241 {
 242         lck_mtx_assert(&p->p_fdmlock, assertflags);
 243 }
 244
 245
 246 /*
 247  * proc_fdunlock
 248  *
 249  * Description: Unlock the lock previously locked by a call to proc_fdlock()
 250  *
 251  * Parameters:  p                               Process to drop the lock on
 252  *
 253  * Returns:     void
 254  */
 255 void
 256 proc_fdunlock(proc_t p)
 257 {
 258         lck_mtx_unlock(&p->p_fdmlock);
 259 }
 260
 261
 262 /*
 263  * System calls on descriptors.
 264  */
 265
 266
 267 /*
 268  * getdtablesize
 269  *
 270  * Description: Returns the per process maximum size of the descriptor table
 271  *
 272  * Parameters:  p                               Process being queried
 273  *              retval                          Pointer to the call return area
 274  *
 275  * Returns:     0                               Success
 276  *
 277  * Implicit returns:
 278  *              *retval (modified)              Size of dtable
 279  */
 280 int
 281 getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 282 {
 283         proc_fdlock_spin(p);
 284         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 285         proc_fdunlock(p);
 286
 287         return (0);
 288 }
 289
 290
 291 void
 292 procfdtbl_reservefd(struct proc * p, int fd)
 293 {
 294         p->p_fd->fd_ofiles[fd] = NULL;
 295         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
 296 }
 297
 298 void
 299 procfdtbl_markclosefd(struct proc * p, int fd)
 300 {
 301         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
 302 }
 303
 304 void
 305 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
 306 {
 307         if (fp != NULL)
 308                 p->p_fd->fd_ofiles[fd] = fp;
 309         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
 310         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
 311                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
 312                 wakeup(&p->p_fd);
 313         }
 314 }
 315
 316 void
 317 procfdtbl_waitfd(struct proc * p, int fd)
 318 {
 319         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
 320         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
 321 }
 322
 323
 324 void
 325 procfdtbl_clearfd(struct proc * p, int fd)
 326 {
 327         int waiting;
 328
 329         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
 330         p->p_fd->fd_ofiles[fd] = NULL;
 331         p->p_fd->fd_ofileflags[fd] = 0;
 332         if ( waiting == UF_RESVWAIT) {
 333                 wakeup(&p->p_fd);
 334         }
 335 }
 336
 337 /*
 338  * _fdrelse
 339  *
 340  * Description: Inline utility function to free an fd in a filedesc
 341  *
 342  * Parameters:  fdp                             Pointer to filedesc fd lies in
 343  *              fd                              fd to free
 344  *              reserv                          fd should be reserved
 345  *
 346  * Returns:     void
 347  *
 348  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 349  *              the caller
 350  */
 351 static void
 352 _fdrelse(struct proc * p, int fd)
 353 {
 354         struct filedesc *fdp = p->p_fd;
 355         int nfd = 0;
 356
 357         if (fd < fdp->fd_freefile)
 358                 fdp->fd_freefile = fd;
 359 #if DIAGNOSTIC
 360         if (fd > fdp->fd_lastfile)
 361                 panic("fdrelse: fd_lastfile inconsistent");
 362 #endif
 363         procfdtbl_clearfd(p, fd);
 364
 365         while ((nfd = fdp->fd_lastfile) > 0 &&
 366                         fdp->fd_ofiles[nfd] == NULL &&
 367                         !(fdp->fd_ofileflags[nfd] & UF_RESERVED))
 368                 fdp->fd_lastfile--;
 369 }
 370
 371
 372 int
 373 fd_rdwr(
 374         int fd,
 375         enum uio_rw rw,
 376         uint64_t base,
 377         int64_t len,
 378         enum uio_seg segflg,
 379         off_t   offset,
 380         int     io_flg,
 381         int64_t *aresid)
 382 {
 383         struct fileproc *fp;
 384         proc_t  p;
 385         int error = 0;
 386         int flags = 0;
 387         int spacetype;
 388         uio_t auio = NULL;
 389         char uio_buf[ UIO_SIZEOF(1) ];
 390         struct vfs_context context = *(vfs_context_current());
 391
 392         p = current_proc();
 393
 394         error = fp_lookup(p, fd, &fp, 0);
 395         if (error)
 396                 return(error);
 397
 398         if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
 399                 error = EINVAL;
 400                 goto out;
 401         }
 402         if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
 403                 error = EBADF;
 404                 goto out;
 405         }
 406
 407         if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
 408                 error = EBADF;
 409                 goto out;
 410         }
 411
 412         context.vc_ucred = fp->f_fglob->fg_cred;
 413
 414         if (UIO_SEG_IS_USER_SPACE(segflg))
 415                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 416         else
 417                 spacetype = UIO_SYSSPACE;
 418
 419         auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
 420
 421         uio_addiov(auio, base, len);
 422
 423         if ( !(io_flg & IO_APPEND))
 424                 flags = FOF_OFFSET;
 425
 426         if (rw == UIO_WRITE)
 427                 error = fo_write(fp, auio, flags, &context);
 428         else
 429                 error = fo_read(fp, auio, flags, &context);
 430
 431         if (aresid)
 432                 *aresid = uio_resid(auio);
 433         else {
 434                 if (uio_resid(auio) && error == 0)
 435                         error = EIO;
 436         }
 437 out:
 438         if (rw == UIO_WRITE && error == 0)
 439                 fp_drop_written(p, fd, fp);
 440         else
 441                 fp_drop(p, fd, fp, 0);
 442
 443         return error;
 444 }
 445
 446
 447
 448 /*
 449  * dup
 450  *
 451  * Description: Duplicate a file descriptor.
 452  *
 453  * Parameters:  p                               Process performing the dup
 454  *              uap->fd                         The fd to dup
 455  *              retval                          Pointer to the call return area
 456  *
 457  * Returns:     0                               Success
 458  *              !0                              Errno
 459  *
 460  * Implicit returns:
 461  *              *retval (modified)              The new descriptor
 462  */
 463 int
 464 dup(proc_t p, struct dup_args *uap, int32_t *retval)
 465 {
 466         struct filedesc *fdp = p->p_fd;
 467         int old = uap->fd;
 468         int new, error;
 469         struct fileproc *fp;
 470
 471         proc_fdlock(p);
 472         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
 473                 proc_fdunlock(p);
 474                 return(error);
 475         }
 476         if ( (error = fdalloc(p, 0, &new)) ) {
 477                 fp_drop(p, old, fp, 1);
 478                 proc_fdunlock(p);
 479                 return (error);
 480         }
 481         error = finishdup(p, fdp, old, new, 0, retval);
 482         fp_drop(p, old, fp, 1);
 483         proc_fdunlock(p);
 484
 485         return (error);
 486 }
 487
 488 /*
 489  * dup2
 490  *
 491  * Description: Duplicate a file descriptor to a particular value.
 492  *
 493  * Parameters:  p                               Process performing the dup
 494  *              uap->from                       The fd to dup
 495  *              uap->to                         The fd to dup it to
 496  *              retval                          Pointer to the call return area
 497  *
 498  * Returns:     0                               Success
 499  *              !0                              Errno
 500  *
 501  * Implicit returns:
 502  *              *retval (modified)              The new descriptor
 503  */
 504 int
 505 dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 506 {
 507         struct filedesc *fdp = p->p_fd;
 508         int old = uap->from, new = uap->to;
 509         int i, error;
 510         struct fileproc *fp, *nfp;
 511
 512         proc_fdlock(p);
 513
 514 startover:
 515         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
 516                 proc_fdunlock(p);
 517                 return(error);
 518         }
 519         if (new < 0 ||
 520                 (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 521             new >= maxfiles) {
 522                 fp_drop(p, old, fp, 1);
 523                 proc_fdunlock(p);
 524                 return (EBADF);
 525         }
 526         if (old == new) {
 527                 fp_drop(p, old, fp, 1);
 528                 *retval = new;
 529                 proc_fdunlock(p);
 530                 return (0);
 531         }
 532         if (new < 0 || new >= fdp->fd_nfiles) {
 533                 if ( (error = fdalloc(p, new, &i)) ) {
 534                         fp_drop(p, old, fp, 1);
 535                         proc_fdunlock(p);
 536                         return (error);
 537                 }
 538                 if (new != i) {
 539                         fdrelse(p, i);
 540                         goto closeit;
 541                 }
 542         } else {
 543 closeit:
 544                 while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED)  {
 545                                 fp_drop(p, old, fp, 1);
 546                                 procfdtbl_waitfd(p, new);
 547 #if DIAGNOSTIC
 548                                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 549 #endif
 550                                 goto startover;
 551                 }
 552
 553                 if ((fdp->fd_ofiles[new] != NULL) &&
 554                     ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
 555                         fp_drop(p, old, fp, 1);
 556                         (void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
 557 #if DIAGNOSTIC
 558                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 559 #endif
 560                         procfdtbl_clearfd(p, new);
 561                         goto startover;
 562                 } else  {
 563 #if DIAGNOSTIC
 564                         if (fdp->fd_ofiles[new] != NULL)
 565                                 panic("dup2: no ref on fileproc %d", new);
 566 #endif
 567                         procfdtbl_reservefd(p, new);
 568                 }
 569
 570 #if DIAGNOSTIC
 571                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 572 #endif
 573
 574         }
 575 #if DIAGNOSTIC
 576         if (fdp->fd_ofiles[new] != 0)
 577                 panic("dup2: overwriting fd_ofiles with new %d", new);
 578         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
 579                 panic("dup2: unreserved fileflags with new %d", new);
 580 #endif
 581         error = finishdup(p, fdp, old, new, 0, retval);
 582         fp_drop(p, old, fp, 1);
 583         proc_fdunlock(p);
 584
 585         return(error);
 586 }
 587
 588
 589 /*
 590  * fcntl
 591  *
 592  * Description: The file control system call.
 593  *
 594  * Parameters:  p                               Process performing the fcntl
 595  *              uap->fd                         The fd to operate against
 596  *              uap->cmd                        The command to perform
 597  *              uap->arg                        Pointer to the command argument
 598  *              retval                          Pointer to the call return area
 599  *
 600  * Returns:     0                               Success
 601  *              !0                              Errno (see fcntl_nocancel)
 602  *
 603  * Implicit returns:
 604  *              *retval (modified)              fcntl return value (if any)
 605  *
 606  * Notes:       This system call differs from fcntl_nocancel() in that it
 607  *              tests for cancellation prior to performing a potentially
 608  *              blocking operation.
 609  */
 610 int
 611 fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 612 {
 613         __pthread_testcancel(1);
 614         return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval));
 615 }
 616
 617
 618 /*
 619  * fcntl_nocancel
 620  *
 621  * Description: A non-cancel-testing file control system call.
 622  *
 623  * Parameters:  p                               Process performing the fcntl
 624  *              uap->fd                         The fd to operate against
 625  *              uap->cmd                        The command to perform
 626  *              uap->arg                        Pointer to the command argument
 627  *              retval                          Pointer to the call return area
 628  *
 629  * Returns:     0                               Success
 630  *              EINVAL
 631  *      fp_lookup:EBADF                         Bad file descriptor
 632  * [F_DUPFD]
 633  *      fdalloc:EMFILE
 634  *      fdalloc:ENOMEM
 635  *      finishdup:EBADF
 636  *      finishdup:ENOMEM
 637  * [F_SETOWN]
 638  *              ESRCH
 639  * [F_SETLK]
 640  *              EBADF
 641  *              EOVERFLOW
 642  *      copyin:EFAULT
 643  *      vnode_getwithref:???
 644  *      VNOP_ADVLOCK:???
 645  * [F_GETLK]
 646  *              EBADF
 647  *              EOVERFLOW
 648  *      copyin:EFAULT
 649  *      copyout:EFAULT
 650  *      vnode_getwithref:???
 651  *      VNOP_ADVLOCK:???
 652  * [F_PREALLOCATE]
 653  *              EBADF
 654  *              EINVAL
 655  *      copyin:EFAULT
 656  *      copyout:EFAULT
 657  *      vnode_getwithref:???
 658  *      VNOP_ALLOCATE:???
 659  * [F_SETSIZE,F_RDADVISE]
 660  *              EBADF
 661  *      copyin:EFAULT
 662  *      vnode_getwithref:???
 663  * [F_RDAHEAD,F_NOCACHE]
 664  *              EBADF
 665  *      vnode_getwithref:???
 666  * [???]
 667  *
 668  * Implicit returns:
 669  *              *retval (modified)              fcntl return value (if any)
 670  */
 671 int
 672 fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 673 {
 674         int fd = uap->fd;
 675         struct filedesc *fdp = p->p_fd;
 676         struct fileproc *fp;
 677         char *pop;
 678         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 679         int i, tmp, error, error2, flg = F_POSIX;
 680         struct flock fl;
 681         struct vfs_context context;
 682         off_t offset;
 683         int newmin;
 684         daddr64_t lbn, bn;
 685         unsigned int fflag;
 686         user_addr_t argp;
 687         boolean_t is64bit;
 688
 689         AUDIT_ARG(fd, uap->fd);
 690         AUDIT_ARG(cmd, uap->cmd);
 691
 692         proc_fdlock(p);
 693         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
 694                 proc_fdunlock(p);
 695                 return(error);
 696         }
 697         context.vc_thread = current_thread();
 698         context.vc_ucred = fp->f_cred;
 699
 700         is64bit = proc_is64bit(p);
 701         if (is64bit) {
 702                 argp = uap->arg;
 703         }
 704         else {
 705                 /*
 706                  * Since the arg parameter is defined as a long but may be
 707                  * either a long or a pointer we must take care to handle
 708                  * sign extension issues.  Our sys call munger will sign
 709                  * extend a long when we are called from a 32-bit process.
 710                  * Since we can never have an address greater than 32-bits
 711                  * from a 32-bit process we lop off the top 32-bits to avoid
 712                  * getting the wrong address
 713                  */
 714                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
 715         }
 716
 717         pop = &fdp->fd_ofileflags[fd];
 718
 719 #if CONFIG_MACF
 720         error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
 721             uap->arg);
 722         if (error)
 723                 goto out;
 724 #endif
 725
 726         switch (uap->cmd) {
 727
 728         case F_DUPFD:
 729         case F_DUPFD_CLOEXEC:
 730                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 731                 AUDIT_ARG(value32, newmin);
 732                 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 733                     newmin >= maxfiles) {
 734                         error = EINVAL;
 735                         goto out;
 736                 }
 737                 if ( (error = fdalloc(p, newmin, &i)) )
 738                         goto out;
 739                 error = finishdup(p, fdp, fd, i,
 740                     uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
 741                 goto out;
 742
 743         case F_GETFD:
 744                 *retval = (*pop & UF_EXCLOSE)? 1 : 0;
 745                 error = 0;
 746                 goto out;
 747
 748         case F_SETFD:
 749                 AUDIT_ARG(value32, uap->arg);
 750                 *pop = (*pop &~ UF_EXCLOSE) |
 751                         (uap->arg & 1)? UF_EXCLOSE : 0;
 752                 error = 0;
 753                 goto out;
 754
 755         case F_GETFL:
 756                 *retval = OFLAGS(fp->f_flag);
 757                 error = 0;
 758                 goto out;
 759
 760         case F_SETFL:
 761                 fp->f_flag &= ~FCNTLFLAGS;
 762                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 763                 AUDIT_ARG(value32, tmp);
 764                 fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
 765                 tmp = fp->f_flag & FNONBLOCK;
 766                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 767                 if (error)
 768                         goto out;
 769                 tmp = fp->f_flag & FASYNC;
 770                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 771                 if (!error)
 772                         goto out;
 773                 fp->f_flag &= ~FNONBLOCK;
 774                 tmp = 0;
 775                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 776                 goto out;
 777
 778         case F_GETOWN:
 779                 if (fp->f_type == DTYPE_SOCKET) {
 780                         *retval = ((struct socket *)fp->f_data)->so_pgid;
 781                         error = 0;
 782                         goto out;
 783                 }
 784                 error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, &context);
 785                 *retval = -*retval;
 786                 goto out;
 787
 788         case F_SETOWN:
 789                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
 790                 AUDIT_ARG(value32, tmp);
 791                 if (fp->f_type == DTYPE_SOCKET) {
 792                         ((struct socket *)fp->f_data)->so_pgid = tmp;
 793                         error =0;
 794                         goto out;
 795                 }
 796                 if (fp->f_type == DTYPE_PIPE) {
 797                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 798                         goto out;
 799                 }
 800
 801                 if (tmp <= 0) {
 802                         tmp = -tmp;
 803                 } else {
 804                         proc_t p1 = proc_find(tmp);
 805                         if (p1 == 0) {
 806                                 error = ESRCH;
 807                                 goto out;
 808                         }
 809                         tmp = (int)p1->p_pgrpid;
 810                         proc_rele(p1);
 811                 }
 812                 error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 813                 goto out;
 814
 815         case F_SETNOSIGPIPE:
 816                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
 817                 if (fp->f_type == DTYPE_SOCKET) {
 818                         error = sock_setsockopt((struct socket *)fp->f_data,
 819                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp));
 820                 } else {
 821                         struct fileglob *fg = fp->f_fglob;
 822
 823                         lck_mtx_lock_spin(&fg->fg_lock);
 824                         if (tmp)
 825                                 fg->fg_lflags |= FG_NOSIGPIPE;
 826                         else
 827                                 fg->fg_lflags &= FG_NOSIGPIPE;
 828                         lck_mtx_unlock(&fg->fg_lock);
 829                         error = 0;
 830                 }
 831                 goto out;
 832
 833         case F_GETNOSIGPIPE:
 834                 if (fp->f_type == DTYPE_SOCKET) {
 835                         int retsize = sizeof (*retval);
 836                         error = sock_getsockopt((struct socket *)fp->f_data,
 837                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
 838                 } else {
 839                         *retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
 840                                 1 : 0;
 841                         error = 0;
 842                 }
 843                 goto out;
 844
 845         case F_SETLKW:
 846                 flg |= F_WAIT;
 847                 /* Fall into F_SETLK */
 848
 849         case F_SETLK:
 850                 if (fp->f_type != DTYPE_VNODE) {
 851                         error = EBADF;
 852                         goto out;
 853                 }
 854                 vp = (struct vnode *)fp->f_data;
 855
 856                 fflag = fp->f_flag;
 857                 offset = fp->f_offset;
 858                 proc_fdunlock(p);
 859
 860                 /* Copy in the lock structure */
 861                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
 862                 if (error) {
 863                         goto outdrop;
 864                 }
 865
 866                 volatile off_t affected_lock_area_set = 0;
 867                 affected_lock_area_set = fl.l_start + offset;
 868                 if ((fl.l_whence == SEEK_CUR) && (affected_lock_area_set < fl.l_start)) {
 869                     error = EOVERFLOW;
 870                     goto outdrop;
 871                 }
 872
 873                 if ( (error = vnode_getwithref(vp)) ) {
 874                         goto outdrop;
 875                 }
 876                 if (fl.l_whence == SEEK_CUR)
 877                         fl.l_start += offset;
 878
 879 #if CONFIG_MACF
 880                 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
 881                     F_SETLK, &fl);
 882                 if (error) {
 883                         (void)vnode_put(vp);
 884                         goto outdrop;
 885                 }
 886 #endif
 887                 switch (fl.l_type) {
 888
 889                 case F_RDLCK:
 890                         if ((fflag & FREAD) == 0) {
 891                                 (void)vnode_put(vp);
 892                                 error = EBADF;
 893                                 goto outdrop;
 894                         }
 895                         // XXX UInt32 unsafe for LP64 kernel
 896                         OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
 897                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context);
 898                         (void)vnode_put(vp);
 899                         goto outdrop;
 900
 901                 case F_WRLCK:
 902                         if ((fflag & FWRITE) == 0) {
 903                                 (void)vnode_put(vp);
 904                                 error = EBADF;
 905                                 goto outdrop;
 906                         }
 907                         // XXX UInt32 unsafe for LP64 kernel
 908                         OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
 909                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context);
 910                         (void)vnode_put(vp);
 911                         goto outdrop;
 912
 913                 case F_UNLCK:
 914                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
 915                                 F_POSIX, &context);
 916                         (void)vnode_put(vp);
 917                         goto outdrop;
 918
 919                 default:
 920                         (void)vnode_put(vp);
 921                         error = EINVAL;
 922                         goto outdrop;
 923                 }
 924
 925         case F_GETLK:
 926 #if CONFIG_EMBEDDED
 927         case F_GETLKPID:
 928 #endif
 929                 if (fp->f_type != DTYPE_VNODE) {
 930                         error = EBADF;
 931                         goto out;
 932                 }
 933                 vp = (struct vnode *)fp->f_data;
 934
 935                 offset = fp->f_offset;
 936                 proc_fdunlock(p);
 937
 938                 /* Copy in the lock structure */
 939                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
 940                 if (error)
 941                         goto outdrop;
 942
 943                 volatile off_t affected_lock_area_end = 0;
 944                 affected_lock_area_end = fl.l_start + offset;
 945                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
 946                 /* and ending byte for EOVERFLOW in SEEK_SET */
 947                 if (((fl.l_whence == SEEK_CUR) &&
 948                      ((affected_lock_area_end < fl.l_start) ||
 949                       ((fl.l_len > 0) && (affected_lock_area_end + fl.l_len - 1 < affected_lock_area_end)))) ||
 950                     ((fl.l_whence == SEEK_SET) && (fl.l_len > 0) && (fl.l_start + fl.l_len - 1 < fl.l_start)))
 951                 {
 952                         /* lf_advlock doesn't check start/end for F_GETLK if file has no locks */
 953                         error = EOVERFLOW;
 954                         goto outdrop;
 955                 }
 956
 957                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
 958                         error = EINVAL;
 959                         goto outdrop;
 960                 }
 961
 962                 switch (fl.l_type) {
 963                 case F_RDLCK:
 964                 case F_UNLCK:
 965                 case F_WRLCK:
 966                         break;
 967                 default:
 968                         error = EINVAL;
 969                         goto outdrop;
 970                 }
 971
 972                 switch (fl.l_whence) {
 973                 case SEEK_CUR:
 974                 case SEEK_SET:
 975                 case SEEK_END:
 976                         break;
 977                 default:
 978                         error = EINVAL;
 979                         goto outdrop;
 980                 }
 981
 982                 if ( (error = vnode_getwithref(vp)) == 0 ) {
 983                         if (fl.l_whence == SEEK_CUR)
 984                                 fl.l_start += offset;
 985
 986 #if CONFIG_MACF
 987                         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
 988                             uap->cmd, &fl);
 989                         if (error == 0)
 990 #endif
 991                         error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context);
 992
 993                         (void)vnode_put(vp);
 994
 995                         if (error == 0)
 996                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
 997                 }
 998                 goto outdrop;
 999
1000         case F_PREALLOCATE: {
1001                 fstore_t alloc_struct;    /* structure for allocate command */
1002                 u_int32_t alloc_flags = 0;
1003
1004                 if (fp->f_type != DTYPE_VNODE) {
1005                         error = EBADF;
1006                         goto out;
1007                 }
1008
1009                 vp = (struct vnode *)fp->f_data;
1010                 proc_fdunlock(p);
1011
1012                 /* make sure that we have write permission */
1013                 if ((fp->f_flag & FWRITE) == 0) {
1014                         error = EBADF;
1015                         goto outdrop;
1016                 }
1017
1018                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1019                 if (error)
1020                         goto outdrop;
1021
1022                 /* now set the space allocated to 0 */
1023                 alloc_struct.fst_bytesalloc = 0;
1024
1025                 /*
1026                  * Do some simple parameter checking
1027                  */
1028
1029                 /* set up the flags */
1030
1031                 alloc_flags |= PREALLOCATE;
1032
1033                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
1034                         alloc_flags |= ALLOCATECONTIG;
1035
1036                 if (alloc_struct.fst_flags & F_ALLOCATEALL)
1037                         alloc_flags |= ALLOCATEALL;
1038
1039                 /*
1040                  * Do any position mode specific stuff.  The only
1041                  * position mode  supported now is PEOFPOSMODE
1042                  */
1043
1044                 switch (alloc_struct.fst_posmode) {
1045
1046                 case F_PEOFPOSMODE:
1047                         if (alloc_struct.fst_offset != 0) {
1048                                 error = EINVAL;
1049                                 goto outdrop;
1050                         }
1051
1052                         alloc_flags |= ALLOCATEFROMPEOF;
1053                         break;
1054
1055                 case F_VOLPOSMODE:
1056                         if (alloc_struct.fst_offset <= 0) {
1057                                 error = EINVAL;
1058                                 goto outdrop;
1059                         }
1060
1061                         alloc_flags |= ALLOCATEFROMVOL;
1062                         break;
1063
1064                 default: {
1065                         error = EINVAL;
1066                         goto outdrop;
1067                         }
1068                 }
1069                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1070                         /*
1071                          * call allocate to get the space
1072                          */
1073                         error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
1074                                               &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1075                                               &context);
1076                         (void)vnode_put(vp);
1077
1078                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1079
1080                         if (error == 0)
1081                                 error = error2;
1082                 }
1083                 goto outdrop;
1084
1085                 }
1086         case F_SETSIZE:
1087                 if (fp->f_type != DTYPE_VNODE) {
1088                         error = EBADF;
1089                         goto out;
1090                 }
1091                 vp = (struct vnode *)fp->f_data;
1092                 proc_fdunlock(p);
1093
1094                 error = copyin(argp, (caddr_t)&offset, sizeof (off_t));
1095                 if (error)
1096                         goto outdrop;
1097                 AUDIT_ARG(value64, offset);
1098
1099                 error = vnode_getwithref(vp);
1100                 if (error)
1101                         goto outdrop;
1102
1103 #if CONFIG_MACF
1104                 error = mac_vnode_check_truncate(&context,
1105                     fp->f_fglob->fg_cred, vp);
1106                 if (error) {
1107                         (void)vnode_put(vp);
1108                         goto outdrop;
1109                 }
1110 #endif
1111                 /*
1112                  * Make sure that we are root.  Growing a file
1113                  * without zero filling the data is a security hole
1114                  * root would have access anyway so we'll allow it
1115                  */
1116                 if (!is_suser()) {
1117                         error = EACCES;
1118                 } else {
1119                         /*
1120                          * set the file size
1121                          */
1122                         error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1123                             &context);
1124                 }
1125
1126                 (void)vnode_put(vp);
1127                 goto outdrop;
1128
1129         case F_RDAHEAD:
1130                 if (fp->f_type != DTYPE_VNODE) {
1131                         error = EBADF;
1132                         goto out;
1133                 }
1134                 if (uap->arg)
1135                         fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1136                 else
1137                         fp->f_fglob->fg_flag |= FNORDAHEAD;
1138
1139                 goto out;
1140
1141         case F_NOCACHE:
1142                 if (fp->f_type != DTYPE_VNODE) {
1143                         error = EBADF;
1144                         goto out;
1145                 }
1146                 if (uap->arg)
1147                         fp->f_fglob->fg_flag |= FNOCACHE;
1148                 else
1149                         fp->f_fglob->fg_flag &= ~FNOCACHE;
1150
1151                 goto out;
1152
1153         case F_NODIRECT:
1154                 if (fp->f_type != DTYPE_VNODE) {
1155                         error = EBADF;
1156                         goto out;
1157                 }
1158                 if (uap->arg)
1159                         fp->f_fglob->fg_flag |= FNODIRECT;
1160                 else
1161                         fp->f_fglob->fg_flag &= ~FNODIRECT;
1162
1163                 goto out;
1164
1165         case F_SINGLE_WRITER:
1166                 if (fp->f_type != DTYPE_VNODE) {
1167                         error = EBADF;
1168                         goto out;
1169                 }
1170                 if (uap->arg)
1171                         fp->f_fglob->fg_flag |= FSINGLE_WRITER;
1172                 else
1173                         fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1174
1175                 goto out;
1176
1177         case F_GLOBAL_NOCACHE:
1178                 if (fp->f_type != DTYPE_VNODE) {
1179                         error = EBADF;
1180                         goto out;
1181                 }
1182                 vp = (struct vnode *)fp->f_data;
1183                 proc_fdunlock(p);
1184
1185                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1186
1187                         *retval = vnode_isnocache(vp);
1188
1189                         if (uap->arg)
1190                                 vnode_setnocache(vp);
1191                         else
1192                                 vnode_clearnocache(vp);
1193
1194                         (void)vnode_put(vp);
1195                 }
1196                 goto outdrop;
1197
1198         case F_CHECK_OPENEVT:
1199                 if (fp->f_type != DTYPE_VNODE) {
1200                         error = EBADF;
1201                         goto out;
1202                 }
1203                 vp = (struct vnode *)fp->f_data;
1204                 proc_fdunlock(p);
1205
1206                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1207
1208                         *retval = vnode_is_openevt(vp);
1209
1210                         if (uap->arg)
1211                                 vnode_set_openevt(vp);
1212                         else
1213                                 vnode_clear_openevt(vp);
1214
1215                         (void)vnode_put(vp);
1216                 }
1217                 goto outdrop;
1218
1219         case F_RDADVISE: {
1220                 struct radvisory ra_struct;
1221
1222                 if (fp->f_type != DTYPE_VNODE) {
1223                         error = EBADF;
1224                         goto out;
1225                 }
1226                 vp = (struct vnode *)fp->f_data;
1227                 proc_fdunlock(p);
1228
1229                 if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct))) )
1230                         goto outdrop;
1231                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1232                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1233
1234                         (void)vnode_put(vp);
1235                 }
1236                 goto outdrop;
1237                 }
1238
1239         case F_FLUSH_DATA:
1240
1241                 if (fp->f_type != DTYPE_VNODE) {
1242                         error = EBADF;
1243                         goto out;
1244                 }
1245                 vp = (struct vnode *)fp->f_data;
1246                 proc_fdunlock(p);
1247
1248                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1249                         error = cluster_push(vp, 0);
1250
1251                         (void)vnode_put(vp);
1252                 }
1253                 goto outdrop;
1254
1255         case F_LOG2PHYS:
1256         case F_LOG2PHYS_EXT: {
1257                 struct log2phys l2p_struct;    /* structure for allocate command */
1258                 int devBlockSize;
1259
1260                 off_t file_offset = 0;
1261                 size_t a_size = 0;
1262                 size_t run = 0;
1263
1264                 if (uap->cmd == F_LOG2PHYS_EXT) {
1265                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1266                         if (error)
1267                                 goto out;
1268                         file_offset = l2p_struct.l2p_devoffset;
1269                 } else {
1270                         file_offset = fp->f_offset;
1271                 }
1272                 if (fp->f_type != DTYPE_VNODE) {
1273                         error = EBADF;
1274                         goto out;
1275                 }
1276                 vp = (struct vnode *)fp->f_data;
1277                 proc_fdunlock(p);
1278                 if ( (error = vnode_getwithref(vp)) ) {
1279                         goto outdrop;
1280                 }
1281                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1282                 if (error) {
1283                         (void)vnode_put(vp);
1284                         goto outdrop;
1285                 }
1286                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1287                 if (error) {
1288                         (void)vnode_put(vp);
1289                         goto outdrop;
1290                 }
1291                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1292                 if (uap->cmd == F_LOG2PHYS_EXT) {
1293                         a_size = l2p_struct.l2p_contigbytes;
1294                 } else {
1295                         a_size = devBlockSize;
1296                 }
1297
1298                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1299
1300                 (void)vnode_put(vp);
1301
1302                 if (!error) {
1303                         l2p_struct.l2p_flags = 0;       /* for now */
1304                         if (uap->cmd == F_LOG2PHYS_EXT) {
1305                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1306                         } else {
1307                                 l2p_struct.l2p_contigbytes = 0; /* for now */
1308                         }
1309                         l2p_struct.l2p_devoffset = bn * devBlockSize;
1310                         l2p_struct.l2p_devoffset += file_offset - offset;
1311                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1312                 }
1313                 goto outdrop;
1314                 }
1315         case F_GETPATH: {
1316                 char *pathbufp;
1317                 int pathlen;
1318
1319                 if (fp->f_type != DTYPE_VNODE) {
1320                         error = EBADF;
1321                         goto out;
1322                 }
1323                 vp = (struct vnode *)fp->f_data;
1324                 proc_fdunlock(p);
1325
1326                 pathlen = MAXPATHLEN;
1327                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1328                 if (pathbufp == NULL) {
1329                         error = ENOMEM;
1330                         goto outdrop;
1331                 }
1332                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1333                         error = vn_getpath(vp, pathbufp, &pathlen);
1334                         (void)vnode_put(vp);
1335
1336                         if (error == 0)
1337                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
1338                 }
1339                 FREE(pathbufp, M_TEMP);
1340                 goto outdrop;
1341         }
1342
1343         case F_PATHPKG_CHECK: {
1344                 char *pathbufp;
1345                 size_t pathlen;
1346
1347                 if (fp->f_type != DTYPE_VNODE) {
1348                         error = EBADF;
1349                         goto out;
1350                 }
1351                 vp = (struct vnode *)fp->f_data;
1352                 proc_fdunlock(p);
1353
1354                 pathlen = MAXPATHLEN;
1355                 pathbufp = kalloc(MAXPATHLEN);
1356
1357                 if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) {
1358                         if ( (error = vnode_getwithref(vp)) == 0 ) {
1359                                 AUDIT_ARG(text, pathbufp);
1360                                 error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1361
1362                                 (void)vnode_put(vp);
1363                         }
1364                 }
1365                 kfree(pathbufp, MAXPATHLEN);
1366                 goto outdrop;
1367         }
1368
1369         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1370         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZECACHE
1371         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1372         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1373                 if (fp->f_type != DTYPE_VNODE) {
1374                         error = EBADF;
1375                         goto out;
1376                 }
1377                 vp = (struct vnode *)fp->f_data;
1378                 proc_fdunlock(p);
1379
1380                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1381                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1382
1383                         (void)vnode_put(vp);
1384                 }
1385                 break;
1386         }
1387
1388         /*
1389          * SPI (private) for opening a file starting from a dir fd
1390          */
1391         case F_OPENFROM: {
1392                 struct user_fopenfrom fopen;
1393                 struct vnode_attr va;
1394                 struct nameidata nd;
1395                 int cmode;
1396
1397                 /* Check if this isn't a valid file descriptor */
1398                 if ((fp->f_type != DTYPE_VNODE) ||
1399                     (fp->f_flag & FREAD) == 0) {
1400                         error = EBADF;
1401                         goto out;
1402                 }
1403                 vp = (struct vnode *)fp->f_data;
1404                 proc_fdunlock(p);
1405
1406                 if (vnode_getwithref(vp)) {
1407                         error = ENOENT;
1408                         goto outdrop;
1409                 }
1410
1411                 /* Only valid for directories */
1412                 if (vp->v_type != VDIR) {
1413                         vnode_put(vp);
1414                         error = ENOTDIR;
1415                         goto outdrop;
1416                 }
1417
1418                 /* Get flags, mode and pathname arguments. */
1419                 if (IS_64BIT_PROCESS(p)) {
1420                         error = copyin(argp, &fopen, sizeof(fopen));
1421                 } else {
1422                         struct user32_fopenfrom fopen32;
1423
1424                         error = copyin(argp, &fopen32, sizeof(fopen32));
1425                         fopen.o_flags = fopen32.o_flags;
1426                         fopen.o_mode = fopen32.o_mode;
1427                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1428                 }
1429                 if (error) {
1430                         vnode_put(vp);
1431                         goto outdrop;
1432                 }
1433                 AUDIT_ARG(fflags, fopen.o_flags);
1434                 AUDIT_ARG(mode, fopen.o_mode);
1435                 VATTR_INIT(&va);
1436                 /* Mask off all but regular access permissions */
1437                 cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1438                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1439
1440                 /* Start the lookup relative to the file descriptor's vnode. */
1441                 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
1442                        fopen.o_pathname, &context);
1443                 nd.ni_dvp = vp;
1444
1445                 error = open1(&context, &nd, fopen.o_flags, &va, retval);
1446
1447                 vnode_put(vp);
1448                 break;
1449         }
1450         /*
1451          * SPI (private) for unlinking a file starting from a dir fd
1452          */
1453         case F_UNLINKFROM: {
1454                 struct nameidata nd;
1455                 user_addr_t pathname;
1456
1457                 /* Check if this isn't a valid file descriptor */
1458                 if ((fp->f_type != DTYPE_VNODE) ||
1459                     (fp->f_flag & FREAD) == 0) {
1460                         error = EBADF;
1461                         goto out;
1462                 }
1463                 vp = (struct vnode *)fp->f_data;
1464                 proc_fdunlock(p);
1465
1466                 if (vnode_getwithref(vp)) {
1467                         error = ENOENT;
1468                         goto outdrop;
1469                 }
1470
1471                 /* Only valid for directories */
1472                 if (vp->v_type != VDIR) {
1473                         vnode_put(vp);
1474                         error = ENOTDIR;
1475                         goto outdrop;
1476                 }
1477
1478                 /* Get flags, mode and pathname arguments. */
1479                 if (IS_64BIT_PROCESS(p)) {
1480                         pathname = (user_addr_t)argp;
1481                 } else {
1482                         pathname = CAST_USER_ADDR_T(argp);
1483                 }
1484
1485                 /* Start the lookup relative to the file descriptor's vnode. */
1486                 NDINIT(&nd, DELETE, OP_UNLINK, USEDVP | AUDITVNPATH1, UIO_USERSPACE,
1487                        pathname, &context);
1488                 nd.ni_dvp = vp;
1489
1490                 error = unlink1(&context, &nd, 0);
1491
1492                 vnode_put(vp);
1493                 break;
1494
1495         }
1496
1497         case F_ADDSIGS:
1498         case F_ADDFILESIGS:
1499         {
1500                 struct user_fsignatures fs;
1501                 kern_return_t kr;
1502                 vm_offset_t kernel_blob_addr;
1503                 vm_size_t kernel_blob_size;
1504
1505                 if (fp->f_type != DTYPE_VNODE) {
1506                         error = EBADF;
1507                         goto out;
1508                 }
1509                 vp = (struct vnode *)fp->f_data;
1510                 proc_fdunlock(p);
1511                 error = vnode_getwithref(vp);
1512                 if (error)
1513                         goto outdrop;
1514
1515                 if (IS_64BIT_PROCESS(p)) {
1516                         error = copyin(argp, &fs, sizeof (fs));
1517                 } else {
1518                         struct user32_fsignatures fs32;
1519
1520                         error = copyin(argp, &fs32, sizeof (fs32));
1521                         fs.fs_file_start = fs32.fs_file_start;
1522                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1523                         fs.fs_blob_size = fs32.fs_blob_size;
1524                 }
1525
1526                 if (error) {
1527                         vnode_put(vp);
1528                         goto outdrop;
1529                 }
1530
1531                 if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start))
1532                 {
1533                         /*
1534                         if(cs_debug)
1535                                 printf("CODE SIGNING: resident blob offered for: %s\n", vp->v_name);
1536                          */
1537                         vnode_put(vp);
1538                         goto outdrop;
1539                 }
1540
1541 #define CS_MAX_BLOB_SIZE (1280ULL * 1024) /* max shared cache file XXX ? */
1542                 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1543                         error = E2BIG;
1544                         vnode_put(vp);
1545                         goto outdrop;
1546                 }
1547
1548                 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1549                 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1550                 if (kr != KERN_SUCCESS) {
1551                         error = ENOMEM;
1552                         vnode_put(vp);
1553                         goto outdrop;
1554                 }
1555
1556                 if(uap->cmd == F_ADDSIGS) {
1557                         error = copyin(fs.fs_blob_start,
1558                                        (void *) kernel_blob_addr,
1559                                        kernel_blob_size);
1560                 } else /* F_ADDFILESIGS */ {
1561                         error = vn_rdwr(UIO_READ,
1562                                         vp,
1563                                         (caddr_t) kernel_blob_addr,
1564                                         kernel_blob_size,
1565                                          fs.fs_file_start + fs.fs_blob_start,
1566                                         UIO_SYSSPACE,
1567                                         0,
1568                                         kauth_cred_get(),
1569                                         0,
1570                                         p);
1571                 }
1572
1573                 if (error) {
1574                         ubc_cs_blob_deallocate(kernel_blob_addr,
1575                                                kernel_blob_size);
1576                         vnode_put(vp);
1577                         goto outdrop;
1578                 }
1579
1580                 error = ubc_cs_blob_add(
1581                         vp,
1582                         CPU_TYPE_ANY,   /* not for a specific architecture */
1583                         fs.fs_file_start,
1584                         kernel_blob_addr,
1585                         kernel_blob_size);
1586                 if (error) {
1587                         ubc_cs_blob_deallocate(kernel_blob_addr,
1588                                                kernel_blob_size);
1589                 } else {
1590                         /* ubc_blob_add() has consumed "kernel_blob_addr" */
1591 #if CHECK_CS_VALIDATION_BITMAP
1592                         ubc_cs_validation_bitmap_allocate( vp );
1593 #endif
1594                 }
1595
1596                 (void) vnode_put(vp);
1597                 break;
1598         }
1599
1600         case F_MARKDEPENDENCY: {
1601                 struct vnode_attr va;
1602                 vfs_context_t ctx = vfs_context_current();
1603                 kauth_cred_t cred;
1604
1605                 if ((current_proc()->p_flag & P_DEPENDENCY_CAPABLE) == 0) {
1606                     error = EPERM;
1607                     goto out;
1608                 }
1609
1610                 if (fp->f_type != DTYPE_VNODE) {
1611                         error = EBADF;
1612                         goto out;
1613                 }
1614
1615                 vp = (struct vnode *)fp->f_data;
1616                 proc_fdunlock(p);
1617
1618                 if (vnode_getwithref(vp)) {
1619                         error = ENOENT;
1620                         goto outdrop;
1621                 }
1622
1623                 if (!vnode_isvroot(vp)) {
1624                     error = EINVAL;
1625                     vnode_put(vp);
1626                     goto outdrop;
1627                 }
1628
1629                 // get the owner of the root dir
1630                 VATTR_INIT(&va);
1631                 VATTR_WANTED(&va, va_uid);
1632                 if (vnode_getattr(vp, &va, ctx) != 0) {
1633                     error = EINVAL;
1634                     vnode_put(vp);
1635                     goto outdrop;
1636                 }
1637
1638                 // and last, check that the caller is the super user or
1639                 // the owner of the mount point
1640                 cred = vfs_context_ucred(ctx);
1641                 if (!is_suser() && va.va_uid != kauth_cred_getuid(cred)) {
1642                         error = EACCES;
1643                         vnode_put(vp);
1644                         goto outdrop;
1645                 }
1646
1647                 // if all those checks pass then we can mark the dependency
1648                 vfs_markdependency(vp->v_mount);
1649                 error = 0;
1650
1651                 vnode_put(vp);
1652
1653                 break;
1654         }
1655
1656 #if CONFIG_PROTECT
1657         case F_GETPROTECTIONCLASS: {
1658                 int class = 0;
1659
1660                 if (fp->f_type != DTYPE_VNODE) {
1661                         error = EBADF;
1662                         goto out;
1663                 }
1664                 vp = (struct vnode *)fp->f_data;
1665
1666                 proc_fdunlock(p);
1667
1668                 if (vnode_getwithref(vp)) {
1669                         error = ENOENT;
1670                         goto outdrop;
1671                 }
1672
1673                 error = cp_vnode_getclass (vp, &class);
1674                 if (error == 0) {
1675                         *retval = class;
1676                 }
1677
1678                 vnode_put(vp);
1679                 break;
1680         }
1681
1682         case F_SETPROTECTIONCLASS: {
1683                 /* tmp must be a valid PROTECTION_CLASS_* */
1684                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
1685
1686                 if (fp->f_type != DTYPE_VNODE) {
1687                         error = EBADF;
1688                         goto out;
1689                 }
1690                 vp = (struct vnode *)fp->f_data;
1691
1692                 proc_fdunlock(p);
1693
1694                 if (vnode_getwithref(vp)) {
1695                         error = ENOENT;
1696                         goto outdrop;
1697                 }
1698
1699                 /* Only go forward if you have write access */
1700                 vfs_context_t ctx = vfs_context_current();
1701                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1702                         vnode_put(vp);
1703                         error = EBADF;
1704                         goto outdrop;
1705                 }
1706                 error = cp_vnode_setclass (vp, tmp);
1707                 vnode_put(vp);
1708                 break;
1709         }
1710
1711         case F_TRANSCODEKEY: {
1712
1713                 if (fp->f_type != DTYPE_VNODE) {
1714                         error = EBADF;
1715                         goto out;
1716                 }
1717
1718                 vp = (struct vnode *)fp->f_data;
1719                 proc_fdunlock(p);
1720
1721                 if (vnode_getwithref(vp)) {
1722                         error = ENOENT;
1723                         goto outdrop;
1724                 }
1725
1726                 error = cp_vnode_transcode (vp);
1727                 vnode_put(vp);
1728                 break;
1729         }
1730
1731         case F_GETPROTECTIONLEVEL:  {
1732                 uint32_t cp_version = 0;
1733
1734                 if (fp->f_type != DTYPE_VNODE) {
1735                         error = EBADF;
1736                         goto out;
1737                 }
1738
1739                 vp = (struct vnode*) fp->f_data;
1740                 proc_fdunlock (p);
1741
1742                 if (vnode_getwithref(vp)) {
1743                         error = ENOENT;
1744                         goto outdrop;
1745                 }
1746
1747                 /*
1748                  * if cp_get_major_vers fails, error will be set to proper errno
1749                  * and cp_version will still be 0.
1750                  */
1751
1752                 error = cp_get_root_major_vers (vp, &cp_version);
1753                 *retval = cp_version;
1754
1755                 vnode_put (vp);
1756                 break;
1757         }
1758
1759 #endif /* CONFIG_PROTECT */
1760
1761         case F_MOVEDATAEXTENTS: {
1762                 struct fileproc *fp2 = NULL;
1763                 struct vnode *src_vp = NULLVP;
1764                 struct vnode *dst_vp = NULLVP;
1765                 /* We need to grab the 2nd FD out of the argments before moving on. */
1766                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
1767
1768                 if (fp->f_type != DTYPE_VNODE) {
1769                         error = EBADF;
1770                         goto out;
1771                 }
1772
1773                 /* For now, special case HFS+ only, since this is SPI. */
1774                 src_vp = (struct vnode *)fp->f_data;
1775                 if (src_vp->v_tag != VT_HFS) {
1776                         error = EINVAL;
1777                         goto out;
1778                 }
1779
1780                 /*
1781                  * Get the references before we start acquiring iocounts on the vnodes,
1782                  * while we still hold the proc fd lock
1783                  */
1784                 if ( (error = fp_lookup(p, fd2, &fp2, 1)) ) {
1785                         error = EBADF;
1786                         goto out;
1787                 }
1788                 if (fp2->f_type != DTYPE_VNODE) {
1789                         fp_drop(p, fd2, fp2, 1);
1790                         error = EBADF;
1791                         goto out;
1792                 }
1793                 dst_vp = (struct vnode *)fp2->f_data;
1794                 if (dst_vp->v_tag != VT_HFS) {
1795                         fp_drop(p, fd2, fp2, 1);
1796                         error = EINVAL;
1797                         goto out;
1798                 }
1799
1800 #if CONFIG_MACF
1801                 /* Re-do MAC checks against the new FD, pass in a fake argument */
1802                 error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
1803                 if (error) {
1804                         fp_drop(p, fd2, fp2, 1);
1805                         goto out;
1806                 }
1807 #endif
1808                 /* Audit the 2nd FD */
1809                 AUDIT_ARG(fd, fd2);
1810
1811                 proc_fdunlock(p);
1812
1813                 if (vnode_getwithref(src_vp)) {
1814                         fp_drop(p, fd2, fp2, 0);
1815                         error = ENOENT;
1816                         goto outdrop;
1817                 }
1818                 if (vnode_getwithref(dst_vp)) {
1819                         vnode_put (src_vp);
1820                         fp_drop(p, fd2, fp2, 0);
1821                         error = ENOENT;
1822                         goto outdrop;
1823                 }
1824
1825                 /*
1826                  * Basic asserts; validate they are not the same and that
1827                  * both live on the same filesystem.
1828                  */
1829                 if (dst_vp == src_vp) {
1830                         vnode_put (src_vp);
1831                         vnode_put (dst_vp);
1832                         fp_drop (p, fd2, fp2, 0);
1833                         error = EINVAL;
1834                         goto outdrop;
1835                 }
1836
1837                 if (dst_vp->v_mount != src_vp->v_mount) {
1838                         vnode_put (src_vp);
1839                         vnode_put (dst_vp);
1840                         fp_drop (p, fd2, fp2, 0);
1841                         error = EXDEV;
1842                         goto outdrop;
1843                 }
1844
1845                 /* Now we have a legit pair of FDs.  Go to work */
1846
1847                 /* Now check for write access to the target files */
1848                 if(vnode_authorize(src_vp, NULLVP,
1849                                                    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
1850                         vnode_put(src_vp);
1851                         vnode_put(dst_vp);
1852                         fp_drop(p, fd2, fp2, 0);
1853                         error = EBADF;
1854                         goto outdrop;
1855                 }
1856
1857                 if(vnode_authorize(dst_vp, NULLVP,
1858                                                    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
1859                         vnode_put(src_vp);
1860                         vnode_put(dst_vp);
1861                         fp_drop(p, fd2, fp2, 0);
1862                         error = EBADF;
1863                         goto outdrop;
1864                 }
1865
1866                 /* Verify that both vps point to files and not directories */
1867                 if ( !vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
1868                         error = EINVAL;
1869                         vnode_put (src_vp);
1870                         vnode_put (dst_vp);
1871                         fp_drop (p, fd2, fp2, 0);
1872                         goto outdrop;
1873                 }
1874
1875                 /*
1876                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
1877                  * We'll pass in our special bit indicating that the new behavior is expected
1878                  */
1879
1880                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
1881
1882                 vnode_put (src_vp);
1883                 vnode_put (dst_vp);
1884                 fp_drop(p, fd2, fp2, 0);
1885                 break;
1886         }
1887
1888
1889         /*
1890          * SPI (private) for indicating to a filesystem that subsequent writes to
1891          * the open FD will represent static content.
1892          */
1893         case F_SETSTATICCONTENT: {
1894                 caddr_t ioctl_arg = NULL;
1895
1896                 if (uap->arg) {
1897                         ioctl_arg = (caddr_t) 1;
1898                 }
1899
1900                 if (fp->f_type != DTYPE_VNODE) {
1901                         error = EBADF;
1902                         goto out;
1903                 }
1904                 vp = (struct vnode *)fp->f_data;
1905                 proc_fdunlock(p);
1906
1907                 error = vnode_getwithref(vp);
1908                 if (error) {
1909                         error = ENOENT;
1910                         goto outdrop;
1911                 }
1912
1913                 /* Only go forward if you have write access */
1914                 vfs_context_t ctx = vfs_context_current();
1915                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1916                         vnode_put(vp);
1917                         error = EBADF;
1918                         goto outdrop;
1919                 }
1920
1921                 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
1922                 (void)vnode_put(vp);
1923
1924                 break;
1925         }
1926
1927         /*
1928          * Set the vnode pointed to by 'fd'
1929          * and tag it as the (potentially future) backing store
1930          * for another filesystem
1931          */
1932         case F_SETBACKINGSTORE: {
1933                 if (fp->f_type != DTYPE_VNODE) {
1934                         error = EBADF;
1935                         goto out;
1936                 }
1937
1938                 vp = (struct vnode *)fp->f_data;
1939
1940                 if (vp->v_tag != VT_HFS) {
1941                         error = EINVAL;
1942                         goto out;
1943                 }
1944                 proc_fdunlock(p);
1945
1946                 if (vnode_getwithref(vp)) {
1947                         error = ENOENT;
1948                         goto outdrop;
1949                 }
1950
1951                 /* only proceed if you have write access */
1952                 vfs_context_t ctx = vfs_context_current();
1953                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1954                         vnode_put(vp);
1955                         error = EBADF;
1956                         goto outdrop;
1957                 }
1958
1959
1960                 /* If arg != 0, set, otherwise unset */
1961                 if (uap->arg) {
1962                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)1, 0, &context);
1963                 }
1964                 else {
1965                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)NULL, 0, &context);
1966                 }
1967
1968                 vnode_put(vp);
1969                 break;
1970         }
1971
1972         /*
1973          * like F_GETPATH, but special semantics for
1974          * the mobile time machine handler.
1975          */
1976         case F_GETPATH_MTMINFO: {
1977                 char *pathbufp;
1978                 int pathlen;
1979
1980                 if (fp->f_type != DTYPE_VNODE) {
1981                         error = EBADF;
1982                         goto out;
1983                 }
1984                 vp = (struct vnode *)fp->f_data;
1985                 proc_fdunlock(p);
1986
1987                 pathlen = MAXPATHLEN;
1988                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1989                 if (pathbufp == NULL) {
1990                         error = ENOMEM;
1991                         goto outdrop;
1992                 }
1993                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1994                         int backingstore = 0;
1995
1996                         /* Check for error from vn_getpath before moving on */
1997                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
1998                                 if (vp->v_tag == VT_HFS) {
1999                                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
2000                                 }
2001                                 (void)vnode_put(vp);
2002
2003                                 if (error == 0) {
2004                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
2005                                 }
2006                                 if (error == 0) {
2007                                         /*
2008                                          * If the copyout was successful, now check to ensure
2009                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
2010                                          * wants the path regardless.
2011                                          */
2012                                         if (backingstore) {
2013                                                 error = EBUSY;
2014                                         }
2015                                 }
2016                         } else
2017                                 (void)vnode_put(vp);
2018                 }
2019                 FREE(pathbufp, M_TEMP);
2020                 goto outdrop;
2021         }
2022
2023         default:
2024                 /*
2025                  * This is an fcntl() that we d not recognize at this level;
2026                  * if this is a vnode, we send it down into the VNOP_IOCTL
2027                  * for this vnode; this can include special devices, and will
2028                  * effectively overload fcntl() to send ioctl()'s.
2029                  */
2030                 if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
2031                         error = EINVAL;
2032                         goto out;
2033                 }
2034
2035                 if (fp->f_type != DTYPE_VNODE) {
2036                         error = EBADF;
2037                         goto out;
2038                 }
2039                 vp = (struct vnode *)fp->f_data;
2040                 proc_fdunlock(p);
2041
2042                 if ( (error = vnode_getwithref(vp)) == 0 ) {
2043 #define STK_PARAMS 128
2044                         char stkbuf[STK_PARAMS];
2045                         unsigned int size;
2046                         caddr_t data, memp;
2047                         /*
2048                          * For this to work properly, we have to copy in the
2049                          * ioctl() cmd argument if there is one; we must also
2050                          * check that a command parameter, if present, does
2051                          * not exceed the maximum command length dictated by
2052                          * the number of bits we have available in the command
2053                          * to represent a structure length.  Finally, we have
2054                          * to copy the results back out, if it is that type of
2055                          * ioctl().
2056                          */
2057                         size = IOCPARM_LEN(uap->cmd);
2058                         if (size > IOCPARM_MAX) {
2059                                 (void)vnode_put(vp);
2060                                 error = EINVAL;
2061                                 break;
2062                         }
2063
2064                         memp = NULL;
2065                         if (size > sizeof (stkbuf)) {
2066                                 if ((memp = (caddr_t)kalloc(size)) == 0) {
2067                                         (void)vnode_put(vp);
2068                                         error = ENOMEM;
2069                                         goto outdrop;
2070                                 }
2071                                 data = memp;
2072                         } else {
2073                                 data = &stkbuf[0];
2074                         }
2075
2076                         if (uap->cmd & IOC_IN) {
2077                                 if (size) {
2078                                         /* structure */
2079                                         error = copyin(argp, data, size);
2080                                         if (error) {
2081                                                 (void)vnode_put(vp);
2082                                                 if (memp)
2083                                                         kfree(memp, size);
2084                                                 goto outdrop;
2085                                         }
2086                                 } else {
2087                                         /* int */
2088                                         if (is64bit) {
2089                                                 *(user_addr_t *)data = argp;
2090                                         } else {
2091                                                 *(uint32_t *)data = (uint32_t)argp;
2092                                         }
2093                                 };
2094                         } else if ((uap->cmd & IOC_OUT) && size) {
2095                                 /*
2096                                  * Zero the buffer so the user always
2097                                  * gets back something deterministic.
2098                                  */
2099                                 bzero(data, size);
2100                         } else if (uap->cmd & IOC_VOID) {
2101                                 if (is64bit) {
2102                                     *(user_addr_t *)data = argp;
2103                                 } else {
2104                                     *(uint32_t *)data = (uint32_t)argp;
2105                                 }
2106                         }
2107
2108                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
2109
2110                         (void)vnode_put(vp);
2111
2112                         /* Copy any output data to user */
2113                         if (error == 0 && (uap->cmd & IOC_OUT) && size)
2114                                 error = copyout(data, argp, size);
2115                         if (memp)
2116                                 kfree(memp, size);
2117                 }
2118                 break;
2119         }
2120
2121 outdrop:
2122         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2123         fp_drop(p, fd, fp, 0);
2124         return(error);
2125 out:
2126         fp_drop(p, fd, fp, 1);
2127         proc_fdunlock(p);
2128         return(error);
2129 }
2130
2131
2132 /*
2133  * finishdup
2134  *
2135  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
2136  *
2137  * Parameters:  p                               Process performing the dup
2138  *              old                             The fd to dup
2139  *              new                             The fd to dup it to
2140  *              fd_flags                        Flags to augment the new fd
2141  *              retval                          Pointer to the call return area
2142  *
2143  * Returns:     0                               Success
2144  *              EBADF
2145  *              ENOMEM
2146  *
2147  * Implicit returns:
2148  *              *retval (modified)              The new descriptor
2149  *
2150  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
2151  *              the caller
2152  *
2153  * Notes:       This function may drop and reacquire this lock; it is unsafe
2154  *              for a caller to assume that other state protected by the lock
2155  *              has not been subsequently changed out from under it.
2156  */
2157 int
2158 finishdup(proc_t p,
2159     struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
2160 {
2161         struct fileproc *nfp;
2162         struct fileproc *ofp;
2163 #if CONFIG_MACF
2164         int error;
2165 #endif
2166
2167 #if DIAGNOSTIC
2168         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2169 #endif
2170         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
2171             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
2172                 fdrelse(p, new);
2173                 return (EBADF);
2174         }
2175         fg_ref(ofp);
2176
2177 #if CONFIG_MACF
2178         error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2179         if (error) {
2180                 fg_drop(ofp);
2181                 fdrelse(p, new);
2182                 return (error);
2183         }
2184 #endif
2185
2186         proc_fdunlock(p);
2187
2188         MALLOC_ZONE(nfp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
2189         /* Failure check follows proc_fdlock() due to handling requirements */
2190
2191         proc_fdlock(p);
2192
2193         if (nfp == NULL) {
2194                 fg_drop(ofp);
2195                 fdrelse(p, new);
2196                 return (ENOMEM);
2197         }
2198
2199         bzero(nfp, sizeof(struct fileproc));
2200
2201         nfp->f_flags = 0;
2202         nfp->f_fglob = ofp->f_fglob;
2203         nfp->f_iocount = 0;
2204
2205 #if DIAGNOSTIC
2206         if (fdp->fd_ofiles[new] != 0)
2207                 panic("finishdup: overwriting fd_ofiles with new %d", new);
2208         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
2209                 panic("finishdup: unreserved fileflags with new %d", new);
2210 #endif
2211
2212         if (new > fdp->fd_lastfile)
2213                 fdp->fd_lastfile = new;
2214         *fdflags(p, new) |= fd_flags;
2215         procfdtbl_releasefd(p, new, nfp);
2216         *retval = new;
2217         return (0);
2218 }
2219
2220
2221 /*
2222  * close
2223  *
2224  * Description: The implementation of the close(2) system call
2225  *
2226  * Parameters:  p                       Process in whose per process file table
2227  *                                      the close is to occur
2228  *              uap->fd                 fd to be closed
2229  *              retval                  <unused>
2230  *
2231  * Returns:     0                       Success
2232  *      fp_lookup:EBADF                 Bad file descriptor
2233  *      close_internal:EBADF
2234  *      close_internal:???              Anything returnable by a per-fileops
2235  *                                      close function
2236  */
2237 int
2238 close(proc_t p, struct close_args *uap, int32_t *retval)
2239 {
2240         __pthread_testcancel(1);
2241         return(close_nocancel(p, (struct close_nocancel_args *)uap, retval));
2242 }
2243
2244
2245 int
2246 close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
2247 {
2248         struct fileproc *fp;
2249         int fd = uap->fd;
2250         int error =0;
2251
2252         AUDIT_SYSCLOSE(p, fd);
2253
2254         proc_fdlock(p);
2255
2256         if ( (error = fp_lookup(p,fd,&fp, 1)) ) {
2257                 proc_fdunlock(p);
2258                 return(error);
2259         }
2260
2261         error = close_internal_locked(p, fd, fp, 0);
2262
2263         proc_fdunlock(p);
2264
2265         return(error);
2266 }
2267
2268
2269 /*
2270  * close_internal_locked
2271  *
2272  * Close a file descriptor.
2273  *
2274  * Parameters:  p                       Process in whose per process file table
2275  *                                      the close is to occur
2276  *              fd                      fd to be closed
2277  *              fp                      fileproc associated with the fd
2278  *
2279  * Returns:     0                       Success
2280  *              EBADF                   fd already in close wait state
2281  *      closef_locked:???               Anything returnable by a per-fileops
2282  *                                      close function
2283  *
2284  * Locks:       Assumes proc_fdlock for process is held by the caller and returns
2285  *              with lock held
2286  *
2287  * Notes:       This function may drop and reacquire this lock; it is unsafe
2288  *              for a caller to assume that other state protected by the lock
2289  *              has not been subsequently changes out from under it, if the
2290  *              caller made the call with the lock held.
2291  */
2292 static int
2293 close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
2294 {
2295         struct filedesc *fdp = p->p_fd;
2296         int error =0;
2297         int resvfd = flags & FD_DUP2RESV;
2298
2299
2300 #if DIAGNOSTIC
2301         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2302 #endif
2303
2304         /* Keep people from using the filedesc while we are closing it */
2305         procfdtbl_markclosefd(p, fd);
2306
2307
2308         if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2309                 panic("close_internal_locked: being called on already closing fd");
2310         }
2311
2312
2313 #if DIAGNOSTIC
2314         if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2315                 panic("close_internal: unreserved fileflags with fd %d", fd);
2316 #endif
2317
2318         fp->f_flags |= FP_CLOSING;
2319
2320         if ( (fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners() ) {
2321
2322                 proc_fdunlock(p);
2323
2324                 if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) {
2325                         /*
2326                          * call out to allow 3rd party notification of close.
2327                          * Ignore result of kauth_authorize_fileop call.
2328                          */
2329                         if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
2330                                 u_int   fileop_flags = 0;
2331                                 if ((fp->f_flags & FP_WRITTEN) != 0)
2332                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
2333                                 kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2334                                                        (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2335                                 vnode_put((vnode_t)fp->f_data);
2336                         }
2337                 }
2338                 if (fp->f_flags & FP_AIOISSUED)
2339                         /*
2340                          * cancel all async IO requests that can be cancelled.
2341                          */
2342                         _aio_close( p, fd );
2343
2344                 proc_fdlock(p);
2345         }
2346
2347         if (fd < fdp->fd_knlistsize)
2348                 knote_fdclose(p, fd);
2349
2350         if (fp->f_flags & FP_WAITEVENT)
2351                 (void)waitevent_close(p, fp);
2352
2353         if ((fp->f_flags & FP_INCHRREAD) == 0)
2354                 fileproc_drain(p, fp);
2355
2356         if (resvfd == 0) {
2357                 _fdrelse(p, fd);
2358         } else {
2359                 procfdtbl_reservefd(p, fd);
2360         }
2361
2362         error = closef_locked(fp, fp->f_fglob, p);
2363         if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
2364                 wakeup(&fp->f_flags);
2365         fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
2366
2367         proc_fdunlock(p);
2368
2369         FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
2370
2371         proc_fdlock(p);
2372
2373 #if DIAGNOSTIC
2374         if (resvfd != 0) {
2375                 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2376                         panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
2377         }
2378 #endif
2379
2380         return(error);
2381 }
2382
2383
2384 /*
2385  * fstat1
2386  *
2387  * Description: Return status information about a file descriptor.
2388  *
2389  * Parameters:  p                               The process doing the fstat
2390  *              fd                              The fd to stat
2391  *              ub                              The user stat buffer
2392  *              xsecurity                       The user extended security
2393  *                                              buffer, or 0 if none
2394  *              xsecurity_size                  The size of xsecurity, or 0
2395  *                                              if no xsecurity
2396  *              isstat64                        Flag to indicate 64 bit version
2397  *                                              for inode size, etc.
2398  *
2399  * Returns:     0                               Success
2400  *              EBADF
2401  *              EFAULT
2402  *      fp_lookup:EBADF                         Bad file descriptor
2403  *      vnode_getwithref:???
2404  *      copyout:EFAULT
2405  *      vnode_getwithref:???
2406  *      vn_stat:???
2407  *      soo_stat:???
2408  *      pipe_stat:???
2409  *      pshm_stat:???
2410  *      kqueue_stat:???
2411  *
2412  * Notes:       Internal implementation for all other fstat() related
2413  *              functions
2414  *
2415  *              XXX switch on node type is bogus; need a stat in struct
2416  *              XXX fileops instead.
2417  */
2418 static int
2419 fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
2420 {
2421         struct fileproc *fp;
2422         union {
2423                 struct stat sb;
2424                 struct stat64 sb64;
2425         } source;
2426         union {
2427                 struct user64_stat user64_sb;
2428                 struct user32_stat user32_sb;
2429                 struct user64_stat64 user64_sb64;
2430                 struct user32_stat64 user32_sb64;
2431         } dest;
2432         int error, my_size;
2433         int funnel_state;
2434         file_type_t type;
2435         caddr_t data;
2436         kauth_filesec_t fsec;
2437         user_size_t xsecurity_bufsize;
2438         vfs_context_t ctx = vfs_context_current();
2439         void * sbptr;
2440
2441
2442         AUDIT_ARG(fd, fd);
2443
2444         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
2445                 return(error);
2446         }
2447         type = fp->f_type;
2448         data = fp->f_data;
2449         fsec = KAUTH_FILESEC_NONE;
2450
2451         sbptr = (void *)&source;
2452
2453         switch (type) {
2454
2455         case DTYPE_VNODE:
2456                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
2457                         /*
2458                          * If the caller has the file open, and is not
2459                          * requesting extended security information, we are
2460                          * going to let them get the basic stat information.
2461                          */
2462                         if (xsecurity == USER_ADDR_NULL) {
2463                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx);
2464                         } else {
2465                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx);
2466                         }
2467
2468                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
2469                         (void)vnode_put((vnode_t)data);
2470                 }
2471                 break;
2472
2473 #if SOCKETS
2474         case DTYPE_SOCKET:
2475                 error = soo_stat((struct socket *)data, sbptr, isstat64);
2476                 break;
2477 #endif /* SOCKETS */
2478
2479         case DTYPE_PIPE:
2480                 error = pipe_stat((void *)data, sbptr, isstat64);
2481                 break;
2482
2483         case DTYPE_PSXSHM:
2484                 error = pshm_stat((void *)data, sbptr, isstat64);
2485                 break;
2486
2487         case DTYPE_KQUEUE:
2488                 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2489                 error = kqueue_stat(fp, sbptr, isstat64, p);
2490                 thread_funnel_set(kernel_flock, funnel_state);
2491                 break;
2492
2493         default:
2494                 error = EBADF;
2495                 goto out;
2496         }
2497         if (error == 0) {
2498                 caddr_t sbp;
2499
2500                 if (isstat64 != 0) {
2501                         source.sb64.st_lspare = 0;
2502                         source.sb64.st_qspare[0] = 0LL;
2503                         source.sb64.st_qspare[1] = 0LL;
2504
2505                         if (IS_64BIT_PROCESS(current_proc())) {
2506                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
2507                                 my_size = sizeof(dest.user64_sb64);
2508                                 sbp = (caddr_t)&dest.user64_sb64;
2509                         } else {
2510                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
2511                                 my_size = sizeof(dest.user32_sb64);
2512                                 sbp = (caddr_t)&dest.user32_sb64;
2513                         }
2514                 } else {
2515                         source.sb.st_lspare = 0;
2516                         source.sb.st_qspare[0] = 0LL;
2517                         source.sb.st_qspare[1] = 0LL;
2518                         if (IS_64BIT_PROCESS(current_proc())) {
2519                                 munge_user64_stat(&source.sb, &dest.user64_sb);
2520                                 my_size = sizeof(dest.user64_sb);
2521                                 sbp = (caddr_t)&dest.user64_sb;
2522                         } else {
2523                                 munge_user32_stat(&source.sb, &dest.user32_sb);
2524                                 my_size = sizeof(dest.user32_sb);
2525                                 sbp = (caddr_t)&dest.user32_sb;
2526                         }
2527                 }
2528
2529                 error = copyout(sbp, ub, my_size);
2530         }
2531
2532         /* caller wants extended security information? */
2533         if (xsecurity != USER_ADDR_NULL) {
2534
2535                 /* did we get any? */
2536                  if (fsec == KAUTH_FILESEC_NONE) {
2537                         if (susize(xsecurity_size, 0) != 0) {
2538                                 error = EFAULT;
2539                                 goto out;
2540                         }
2541                 } else {
2542                         /* find the user buffer size */
2543                         xsecurity_bufsize = fusize(xsecurity_size);
2544
2545                         /* copy out the actual data size */
2546                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
2547                                 error = EFAULT;
2548                                 goto out;
2549                         }
2550
2551                         /* if the caller supplied enough room, copy out to it */
2552                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
2553                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
2554                 }
2555         }
2556 out:
2557         fp_drop(p, fd, fp, 0);
2558         if (fsec != NULL)
2559                 kauth_filesec_free(fsec);
2560         return (error);
2561 }
2562
2563
2564 /*
2565  * fstat_extended
2566  *
2567  * Description: Extended version of fstat supporting returning extended
2568  *              security information
2569  *
2570  * Parameters:  p                               The process doing the fstat
2571  *              uap->fd                         The fd to stat
2572  *              uap->ub                         The user stat buffer
2573  *              uap->xsecurity                  The user extended security
2574  *                                              buffer, or 0 if none
2575  *              uap->xsecurity_size             The size of xsecurity, or 0
2576  *
2577  * Returns:     0                               Success
2578  *              !0                              Errno (see fstat1)
2579  */
2580 int
2581 fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
2582 {
2583         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
2584 }
2585
2586
2587 /*
2588  * fstat
2589  *
2590  * Description: Get file status for the file associated with fd
2591  *
2592  * Parameters:  p                               The process doing the fstat
2593  *              uap->fd                         The fd to stat
2594  *              uap->ub                         The user stat buffer
2595  *
2596  * Returns:     0                               Success
2597  *              !0                              Errno (see fstat1)
2598  */
2599 int
2600 fstat(proc_t p, register struct fstat_args *uap, __unused int32_t *retval)
2601 {
2602         return(fstat1(p, uap->fd, uap->ub, 0, 0, 0));
2603 }
2604
2605
2606 /*
2607  * fstat64_extended
2608  *
2609  * Description: Extended version of fstat64 supporting returning extended
2610  *              security information
2611  *
2612  * Parameters:  p                               The process doing the fstat
2613  *              uap->fd                         The fd to stat
2614  *              uap->ub                         The user stat buffer
2615  *              uap->xsecurity                  The user extended security
2616  *                                              buffer, or 0 if none
2617  *              uap->xsecurity_size             The size of xsecurity, or 0
2618  *
2619  * Returns:     0                               Success
2620  *              !0                              Errno (see fstat1)
2621  */
2622 int
2623 fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
2624 {
2625         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
2626 }
2627
2628
2629 /*
2630  * fstat64
2631  *
2632  * Description: Get 64 bit version of the file status for the file associated
2633  *              with fd
2634  *
2635  * Parameters:  p                               The process doing the fstat
2636  *              uap->fd                         The fd to stat
2637  *              uap->ub                         The user stat buffer
2638  *
2639  * Returns:     0                               Success
2640  *              !0                              Errno (see fstat1)
2641  */
2642 int
2643 fstat64(proc_t p, register struct fstat64_args *uap, __unused int32_t *retval)
2644 {
2645         return(fstat1(p, uap->fd, uap->ub, 0, 0, 1));
2646 }
2647
2648
2649 /*
2650  * fpathconf
2651  *
2652  * Description: Return pathconf information about a file descriptor.
2653  *
2654  * Parameters:  p                               Process making the request
2655  *              uap->fd                         fd to get information about
2656  *              uap->name                       Name of information desired
2657  *              retval                          Pointer to the call return area
2658  *
2659  * Returns:     0                               Success
2660  *              EINVAL
2661  *      fp_lookup:EBADF                         Bad file descriptor
2662  *      vnode_getwithref:???
2663  *      vn_pathconf:???
2664  *
2665  * Implicit returns:
2666  *              *retval (modified)              Returned information (numeric)
2667  */
2668 int
2669 fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
2670 {
2671         int fd = uap->fd;
2672         struct fileproc *fp;
2673         struct vnode *vp;
2674         int error = 0;
2675         file_type_t type;
2676         caddr_t data;
2677
2678
2679         AUDIT_ARG(fd, uap->fd);
2680         if ( (error = fp_lookup(p, fd, &fp, 0)) )
2681                 return(error);
2682         type = fp->f_type;
2683         data = fp->f_data;
2684
2685         switch (type) {
2686
2687         case DTYPE_SOCKET:
2688                 if (uap->name != _PC_PIPE_BUF) {
2689                         error = EINVAL;
2690                         goto out;
2691                 }
2692                 *retval = PIPE_BUF;
2693                 error = 0;
2694                 goto out;
2695
2696         case DTYPE_PIPE:
2697                 if (uap->name != _PC_PIPE_BUF) {
2698                         error = EINVAL;
2699                         goto out;
2700                 }
2701                 *retval = PIPE_BUF;
2702                 error = 0;
2703                 goto out;
2704
2705         case DTYPE_VNODE:
2706                 vp = (struct vnode *)data;
2707
2708                 if ( (error = vnode_getwithref(vp)) == 0) {
2709                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2710
2711                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
2712
2713                         (void)vnode_put(vp);
2714                 }
2715                 goto out;
2716
2717         case DTYPE_PSXSHM:
2718         case DTYPE_PSXSEM:
2719         case DTYPE_KQUEUE:
2720         case DTYPE_FSEVENTS:
2721                 error = EINVAL;
2722                 goto out;
2723
2724         }
2725         /*NOTREACHED*/
2726 out:
2727         fp_drop(p, fd, fp, 0);
2728         return(error);
2729 }
2730
2731 /*
2732  * Statistics counter for the number of times a process calling fdalloc()
2733  * has resulted in an expansion of the per process open file table.
2734  *
2735  * XXX This would likely be of more use if it were per process
2736  */
2737 int fdexpand;
2738
2739
2740 /*
2741  * fdalloc
2742  *
2743  * Description: Allocate a file descriptor for the process.
2744  *
2745  * Parameters:  p                               Process to allocate the fd in
2746  *              want                            The fd we would prefer to get
2747  *              result                          Pointer to fd we got
2748  *
2749  * Returns:     0                               Success
2750  *              EMFILE
2751  *              ENOMEM
2752  *
2753  * Implicit returns:
2754  *              *result (modified)              The fd which was allocated
2755  */
2756 int
2757 fdalloc(proc_t p, int want, int *result)
2758 {
2759         struct filedesc *fdp = p->p_fd;
2760         int i;
2761         int lim, last, numfiles, oldnfiles;
2762         struct fileproc **newofiles, **ofiles;
2763         char *newofileflags;
2764
2765         /*
2766          * Search for a free descriptor starting at the higher
2767          * of want or fd_freefile.  If that fails, consider
2768          * expanding the ofile array.
2769          */
2770 #if DIAGNOSTIC
2771         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2772 #endif
2773
2774         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
2775         for (;;) {
2776                 last = min(fdp->fd_nfiles, lim);
2777                 if ((i = want) < fdp->fd_freefile)
2778                         i = fdp->fd_freefile;
2779                 for (; i < last; i++) {
2780                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
2781                                 procfdtbl_reservefd(p, i);
2782                                 if (i > fdp->fd_lastfile)
2783                                         fdp->fd_lastfile = i;
2784                                 if (want <= fdp->fd_freefile)
2785                                         fdp->fd_freefile = i;
2786                                 *result = i;
2787                                 return (0);
2788                         }
2789                 }
2790
2791                 /*
2792                  * No space in current array.  Expand?
2793                  */
2794                 if (fdp->fd_nfiles >= lim)
2795                         return (EMFILE);
2796                 if (fdp->fd_nfiles < NDEXTENT)
2797                         numfiles = NDEXTENT;
2798                 else
2799                         numfiles = 2 * fdp->fd_nfiles;
2800                 /* Enforce lim */
2801                 if (numfiles > lim)
2802                         numfiles = lim;
2803                 proc_fdunlock(p);
2804                 MALLOC_ZONE(newofiles, struct fileproc **,
2805                                 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
2806                 proc_fdlock(p);
2807                 if (newofiles == NULL) {
2808                         return (ENOMEM);
2809                 }
2810                 if (fdp->fd_nfiles >= numfiles) {
2811                         FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
2812                         continue;
2813                 }
2814                 newofileflags = (char *) &newofiles[numfiles];
2815                 /*
2816                  * Copy the existing ofile and ofileflags arrays
2817                  * and zero the new portion of each array.
2818                  */
2819                 oldnfiles = fdp->fd_nfiles;
2820                 (void) memcpy(newofiles, fdp->fd_ofiles,
2821                                 oldnfiles * sizeof(*fdp->fd_ofiles));
2822                 (void) memset(&newofiles[oldnfiles], 0,
2823                                 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
2824
2825                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
2826                                 oldnfiles * sizeof(*fdp->fd_ofileflags));
2827                 (void) memset(&newofileflags[oldnfiles], 0,
2828                                 (numfiles - oldnfiles) *
2829                                                 sizeof(*fdp->fd_ofileflags));
2830                 ofiles = fdp->fd_ofiles;
2831                 fdp->fd_ofiles = newofiles;
2832                 fdp->fd_ofileflags = newofileflags;
2833                 fdp->fd_nfiles = numfiles;
2834                 FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
2835                 fdexpand++;
2836         }
2837 }
2838
2839
2840 /*
2841  * fdavail
2842  *
2843  * Description: Check to see whether n user file descriptors are available
2844  *              to the process p.
2845  *
2846  * Parameters:  p                               Process to check in
2847  *              n                               The number of fd's desired
2848  *
2849  * Returns:     0                               No
2850  *              1                               Yes
2851  *
2852  * Locks:       Assumes proc_fdlock for process is held by the caller
2853  *
2854  * Notes:       The answer only remains valid so long as the proc_fdlock is
2855  *              held by the caller.
2856  */
2857 int
2858 fdavail(proc_t p, int n)
2859 {
2860         struct filedesc *fdp = p->p_fd;
2861         struct fileproc **fpp;
2862         char *flags;
2863         int i, lim;
2864
2865         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
2866         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
2867                 return (1);
2868         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
2869         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
2870         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++)
2871                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0)
2872                         return (1);
2873         return (0);
2874 }
2875
2876
2877 /*
2878  * fdrelse
2879  *
2880  * Description: Legacy KPI wrapper function for _fdrelse
2881  *
2882  * Parameters:  p                               Process in which fd lives
2883  *              fd                              fd to free
2884  *
2885  * Returns:     void
2886  *
2887  * Locks:       Assumes proc_fdlock for process is held by the caller
2888  */
2889 void
2890 fdrelse(proc_t p, int fd)
2891 {
2892         _fdrelse(p, fd);
2893 }
2894
2895
2896 /*
2897  * fdgetf_noref
2898  *
2899  * Description: Get the fileproc pointer for the given fd from the per process
2900  *              open file table without taking an explicit reference on it.
2901  *
2902  * Parameters:  p                               Process containing fd
2903  *              fd                              fd to obtain fileproc for
2904  *              resultfp                        Pointer to pointer return area
2905  *
2906  * Returns:     0                               Success
2907  *              EBADF
2908  *
2909  * Implicit returns:
2910  *              *resultfp (modified)            Pointer to fileproc pointer
2911  *
2912  * Locks:       Assumes proc_fdlock for process is held by the caller
2913  *
2914  * Notes:       Because there is no reference explicitly taken, the returned
2915  *              fileproc pointer is only valid so long as the proc_fdlock
2916  *              remains held by the caller.
2917  */
2918 int
2919 fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
2920 {
2921         struct filedesc *fdp = p->p_fd;
2922         struct fileproc *fp;
2923
2924         if (fd < 0 || fd >= fdp->fd_nfiles ||
2925                         (fp = fdp->fd_ofiles[fd]) == NULL ||
2926                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2927                 return (EBADF);
2928         }
2929         if (resultfp)
2930                 *resultfp = fp;
2931         return (0);
2932 }
2933
2934
2935 /*
2936  * fp_getfvp
2937  *
2938  * Description: Get fileproc and vnode pointer for a given fd from the per
2939  *              process open file table of the specified process, and if
2940  *              successful, increment the f_iocount
2941  *
2942  * Parameters:  p                               Process in which fd lives
2943  *              fd                              fd to get information for
2944  *              resultfp                        Pointer to result fileproc
2945  *                                              pointer area, or 0 if none
2946  *              resultvp                        Pointer to result vnode pointer
2947  *                                              area, or 0 if none
2948  *
2949  * Returns:     0                               Success
2950  *              EBADF                           Bad file descriptor
2951  *              ENOTSUP                         fd does not refer to a vnode
2952  *
2953  * Implicit returns:
2954  *              *resultfp (modified)            Fileproc pointer
2955  *              *resultvp (modified)            vnode pointer
2956  *
2957  * Notes:       The resultfp and resultvp fields are optional, and may be
2958  *              independently specified as NULL to skip returning information
2959  *
2960  * Locks:       Internally takes and releases proc_fdlock
2961  */
2962 int
2963 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
2964 {
2965         struct filedesc *fdp = p->p_fd;
2966         struct fileproc *fp;
2967
2968         proc_fdlock_spin(p);
2969         if (fd < 0 || fd >= fdp->fd_nfiles ||
2970                         (fp = fdp->fd_ofiles[fd]) == NULL ||
2971                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2972                 proc_fdunlock(p);
2973                 return (EBADF);
2974         }
2975         if (fp->f_type != DTYPE_VNODE) {
2976                 proc_fdunlock(p);
2977                 return(ENOTSUP);
2978         }
2979         fp->f_iocount++;
2980
2981         if (resultfp)
2982                 *resultfp = fp;
2983         if (resultvp)
2984                 *resultvp = (struct vnode *)fp->f_data;
2985         proc_fdunlock(p);
2986
2987         return (0);
2988 }
2989
2990
2991 /*
2992  * fp_getfvpandvid
2993  *
2994  * Description: Get fileproc, vnode pointer, and vid for a given fd from the
2995  *              per process open file table of the specified process, and if
2996  *              successful, increment the f_iocount
2997  *
2998  * Parameters:  p                               Process in which fd lives
2999  *              fd                              fd to get information for
3000  *              resultfp                        Pointer to result fileproc
3001  *                                              pointer area, or 0 if none
3002  *              resultvp                        Pointer to result vnode pointer
3003  *                                              area, or 0 if none
3004  *              vidp                            Pointer to resuld vid area
3005  *
3006  * Returns:     0                               Success
3007  *              EBADF                           Bad file descriptor
3008  *              ENOTSUP                         fd does not refer to a vnode
3009  *
3010  * Implicit returns:
3011  *              *resultfp (modified)            Fileproc pointer
3012  *              *resultvp (modified)            vnode pointer
3013  *              *vidp                           vid value
3014  *
3015  * Notes:       The resultfp and resultvp fields are optional, and may be
3016  *              independently specified as NULL to skip returning information
3017  *
3018  * Locks:       Internally takes and releases proc_fdlock
3019  */
3020 int
3021 fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3022                 struct vnode **resultvp, uint32_t *vidp)
3023 {
3024         struct filedesc *fdp = p->p_fd;
3025         struct fileproc *fp;
3026
3027         proc_fdlock_spin(p);
3028         if (fd < 0 || fd >= fdp->fd_nfiles ||
3029                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3030                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3031                 proc_fdunlock(p);
3032                 return (EBADF);
3033         }
3034         if (fp->f_type != DTYPE_VNODE) {
3035                 proc_fdunlock(p);
3036                 return(ENOTSUP);
3037         }
3038         fp->f_iocount++;
3039
3040         if (resultfp)
3041                 *resultfp = fp;
3042         if (resultvp)
3043                 *resultvp = (struct vnode *)fp->f_data;
3044         if (vidp)
3045                 *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
3046         proc_fdunlock(p);
3047
3048         return (0);
3049 }
3050
3051
3052 /*
3053  * fp_getfsock
3054  *
3055  * Description: Get fileproc and socket pointer for a given fd from the
3056  *              per process open file table of the specified process, and if
3057  *              successful, increment the f_iocount
3058  *
3059  * Parameters:  p                               Process in which fd lives
3060  *              fd                              fd to get information for
3061  *              resultfp                        Pointer to result fileproc
3062  *                                              pointer area, or 0 if none
3063  *              results                         Pointer to result socket
3064  *                                              pointer area, or 0 if none
3065  *
3066  * Returns:     EBADF                   The file descriptor is invalid
3067  *              EOPNOTSUPP              The file descriptor is not a socket
3068  *              0                       Success
3069  *
3070  * Implicit returns:
3071  *              *resultfp (modified)            Fileproc pointer
3072  *              *results (modified)             socket pointer
3073  *
3074  * Notes:       EOPNOTSUPP should probably be ENOTSOCK; this function is only
3075  *              ever called from accept1().
3076  */
3077 int
3078 fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3079             struct socket **results)
3080 {
3081         struct filedesc *fdp = p->p_fd;
3082         struct fileproc *fp;
3083
3084         proc_fdlock_spin(p);
3085         if (fd < 0 || fd >= fdp->fd_nfiles ||
3086                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3087                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3088                 proc_fdunlock(p);
3089                 return (EBADF);
3090         }
3091         if (fp->f_type != DTYPE_SOCKET) {
3092                 proc_fdunlock(p);
3093                 return(EOPNOTSUPP);
3094         }
3095         fp->f_iocount++;
3096
3097         if (resultfp)
3098                 *resultfp = fp;
3099         if (results)
3100                 *results = (struct socket *)fp->f_data;
3101         proc_fdunlock(p);
3102
3103         return (0);
3104 }
3105
3106
3107 /*
3108  * fp_getfkq
3109  *
3110  * Description: Get fileproc and kqueue pointer for a given fd from the
3111  *              per process open file table of the specified process, and if
3112  *              successful, increment the f_iocount
3113  *
3114  * Parameters:  p                               Process in which fd lives
3115  *              fd                              fd to get information for
3116  *              resultfp                        Pointer to result fileproc
3117  *                                              pointer area, or 0 if none
3118  *              resultkq                        Pointer to result kqueue
3119  *                                              pointer area, or 0 if none
3120  *
3121  * Returns:     EBADF                   The file descriptor is invalid
3122  *              EBADF                   The file descriptor is not a socket
3123  *              0                       Success
3124  *
3125  * Implicit returns:
3126  *              *resultfp (modified)            Fileproc pointer
3127  *              *resultkq (modified)            kqueue pointer
3128  *
3129  * Notes:       The second EBADF should probably be something else to make
3130  *              the error condition distinct.
3131  */
3132 int
3133 fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3134           struct kqueue **resultkq)
3135 {
3136         struct filedesc *fdp = p->p_fd;
3137         struct fileproc *fp;
3138
3139         proc_fdlock_spin(p);
3140         if ( fd < 0 || fd >= fdp->fd_nfiles ||
3141                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3142                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3143                 proc_fdunlock(p);
3144                 return (EBADF);
3145         }
3146         if (fp->f_type != DTYPE_KQUEUE) {
3147                 proc_fdunlock(p);
3148                 return(EBADF);
3149         }
3150         fp->f_iocount++;
3151
3152         if (resultfp)
3153                 *resultfp = fp;
3154         if (resultkq)
3155                 *resultkq = (struct kqueue *)fp->f_data;
3156         proc_fdunlock(p);
3157
3158         return (0);
3159 }
3160
3161
3162 /*
3163  * fp_getfpshm
3164  *
3165  * Description: Get fileproc and POSIX shared memory pointer for a given fd
3166  *              from the per process open file table of the specified process
3167  *              and if successful, increment the f_iocount
3168  *
3169  * Parameters:  p                               Process in which fd lives
3170  *              fd                              fd to get information for
3171  *              resultfp                        Pointer to result fileproc
3172  *                                              pointer area, or 0 if none
3173  *              resultpshm                      Pointer to result POSIX
3174  *                                              shared memory pointer
3175  *                                              pointer area, or 0 if none
3176  *
3177  * Returns:     EBADF                   The file descriptor is invalid
3178  *              EBADF                   The file descriptor is not a POSIX
3179  *                                      shared memory area
3180  *              0                       Success
3181  *
3182  * Implicit returns:
3183  *              *resultfp (modified)            Fileproc pointer
3184  *              *resultpshm (modified)          POSIX shared memory pointer
3185  *
3186  * Notes:       The second EBADF should probably be something else to make
3187  *              the error condition distinct.
3188  */
3189 int
3190 fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3191             struct pshmnode **resultpshm)
3192 {
3193         struct filedesc *fdp = p->p_fd;
3194         struct fileproc *fp;
3195
3196         proc_fdlock_spin(p);
3197         if (fd < 0 || fd >= fdp->fd_nfiles ||
3198                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3199                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3200                 proc_fdunlock(p);
3201                 return (EBADF);
3202         }
3203         if (fp->f_type != DTYPE_PSXSHM) {
3204
3205                 proc_fdunlock(p);
3206                 return(EBADF);
3207         }
3208         fp->f_iocount++;
3209
3210         if (resultfp)
3211                 *resultfp = fp;
3212         if (resultpshm)
3213                 *resultpshm = (struct pshmnode *)fp->f_data;
3214         proc_fdunlock(p);
3215
3216         return (0);
3217 }
3218
3219
3220 /*
3221  * fp_getfsem
3222  *
3223  * Description: Get fileproc and POSIX semaphore pointer for a given fd from
3224  *              the per process open file table of the specified process
3225  *              and if successful, increment the f_iocount
3226  *
3227  * Parameters:  p                               Process in which fd lives
3228  *              fd                              fd to get information for
3229  *              resultfp                        Pointer to result fileproc
3230  *                                              pointer area, or 0 if none
3231  *              resultpsem                      Pointer to result POSIX
3232  *                                              semaphore pointer area, or
3233  *                                              0 if none
3234  *
3235  * Returns:     EBADF                   The file descriptor is invalid
3236  *              EBADF                   The file descriptor is not a POSIX
3237  *                                      semaphore
3238  *              0                       Success
3239  *
3240  * Implicit returns:
3241  *              *resultfp (modified)            Fileproc pointer
3242  *              *resultpsem (modified)          POSIX semaphore pointer
3243  *
3244  * Notes:       The second EBADF should probably be something else to make
3245  *              the error condition distinct.
3246  *
3247  *              In order to support unnamed POSIX semaphores, the named
3248  *              POSIX semaphores will have to move out of the per-process
3249  *              open filetable, and into a global table that is shared with
3250  *              unnamed POSIX semaphores, since unnamed POSIX semaphores
3251  *              are typically used by declaring instances in shared memory,
3252  *              and there's no other way to do this without changing the
3253  *              underlying type, which would introduce binary compatibility
3254  *              issues.
3255  */
3256 int
3257 fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3258             struct psemnode **resultpsem)
3259 {
3260         struct filedesc *fdp = p->p_fd;
3261         struct fileproc *fp;
3262
3263         proc_fdlock_spin(p);
3264         if (fd < 0 || fd >= fdp->fd_nfiles ||
3265                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3266                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3267                 proc_fdunlock(p);
3268                 return (EBADF);
3269         }
3270         if (fp->f_type != DTYPE_PSXSEM) {
3271                 proc_fdunlock(p);
3272                 return(EBADF);
3273         }
3274         fp->f_iocount++;
3275
3276         if (resultfp)
3277                 *resultfp = fp;
3278         if (resultpsem)
3279                 *resultpsem = (struct psemnode *)fp->f_data;
3280         proc_fdunlock(p);
3281
3282         return (0);
3283 }
3284
3285
3286 /*
3287  * fp_getfpipe
3288  *
3289  * Description: Get fileproc and pipe pointer for a given fd from the
3290  *              per process open file table of the specified process
3291  *              and if successful, increment the f_iocount
3292  *
3293  * Parameters:  p                               Process in which fd lives
3294  *              fd                              fd to get information for
3295  *              resultfp                        Pointer to result fileproc
3296  *                                              pointer area, or 0 if none
3297  *              resultpipe                      Pointer to result pipe
3298  *                                              pointer area, or 0 if none
3299  *
3300  * Returns:     EBADF                   The file descriptor is invalid
3301  *              EBADF                   The file descriptor is not a socket
3302  *              0                       Success
3303  *
3304  * Implicit returns:
3305  *              *resultfp (modified)            Fileproc pointer
3306  *              *resultpipe (modified)          pipe pointer
3307  *
3308  * Notes:       The second EBADF should probably be something else to make
3309  *              the error condition distinct.
3310  */
3311 int
3312 fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
3313             struct pipe **resultpipe)
3314 {
3315         struct filedesc *fdp = p->p_fd;
3316         struct fileproc *fp;
3317
3318         proc_fdlock_spin(p);
3319         if (fd < 0 || fd >= fdp->fd_nfiles ||
3320                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3321                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3322                 proc_fdunlock(p);
3323                 return (EBADF);
3324         }
3325         if (fp->f_type != DTYPE_PIPE) {
3326                 proc_fdunlock(p);
3327                 return(EBADF);
3328         }
3329         fp->f_iocount++;
3330
3331         if (resultfp)
3332                 *resultfp = fp;
3333         if (resultpipe)
3334                 *resultpipe = (struct pipe *)fp->f_data;
3335         proc_fdunlock(p);
3336
3337         return (0);
3338 }
3339
3340 #if NETAT
3341 #define DTYPE_ATALK -1          /* XXX This does not belong here */
3342
3343
3344 /*
3345  * fp_getfatalk
3346  *
3347  * Description: Get fileproc and atalk pointer for a given fd from the
3348  *              per process open file table of the specified process
3349  *              and if successful, increment the f_iocount
3350  *
3351  * Parameters:  p                               Process in which fd lives
3352  *              fd                              fd to get information for
3353  *              resultfp                        Pointer to result fileproc
3354  *                                              pointer area, or 0 if none
3355  *              resultatalk                     Pointer to result atalk
3356  *                                              pointer area, or 0 if none
3357  * Returns:     EBADF                   The file descriptor is invalid
3358  *              EBADF                   The file descriptor is not a socket
3359  *              0                       Success
3360  *
3361  * Implicit returns:
3362  *              *resultfp (modified)            Fileproc pointer
3363  *              *resultatalk (modified)         atalk pointer
3364  *
3365  * Notes:       The second EBADF should probably be something else to make
3366  *              the error condition distinct.
3367  *
3368  *              XXX This code is specific to AppleTalk protocol support, and
3369  *              XXX should be conditionally compiled
3370  */
3371 int
3372 fp_getfatalk(proc_t p, int fd, struct fileproc **resultfp,
3373              struct atalk **resultatalk)
3374 {
3375         struct filedesc *fdp = p->p_fd;
3376         struct fileproc *fp;
3377
3378         proc_fdlock_spin(p);
3379         if (fd < 0 || fd >= fdp->fd_nfiles ||
3380                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3381                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3382                 proc_fdunlock(p);
3383                 return (EBADF);
3384         }
3385         if (fp->f_type != (DTYPE_ATALK+1)) {
3386                 proc_fdunlock(p);
3387                 return(EBADF);
3388         }
3389         fp->f_iocount++;
3390
3391         if (resultfp)
3392                 *resultfp = fp;
3393         if (resultatalk)
3394                 *resultatalk = (struct atalk *)fp->f_data;
3395         proc_fdunlock(p);
3396
3397         return (0);
3398 }
3399
3400 #endif /* NETAT */
3401
3402 /*
3403  * fp_lookup
3404  *
3405  * Description: Get fileproc pointer for a given fd from the per process
3406  *              open file table of the specified process and if successful,
3407  *              increment the f_iocount
3408  *
3409  * Parameters:  p                               Process in which fd lives
3410  *              fd                              fd to get information for
3411  *              resultfp                        Pointer to result fileproc
3412  *                                              pointer area, or 0 if none
3413  *              locked                          !0 if the caller holds the
3414  *                                              proc_fdlock, 0 otherwise
3415  *
3416  * Returns:     0                       Success
3417  *              EBADF                   Bad file descriptor
3418  *
3419  * Implicit returns:
3420  *              *resultfp (modified)            Fileproc pointer
3421  *
3422  * Locks:       If the argument 'locked' is non-zero, then the caller is
3423  *              expected to have taken and held the proc_fdlock; if it is
3424  *              zero, than this routine internally takes and drops this lock.
3425  */
3426 int
3427 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
3428 {
3429         struct filedesc *fdp = p->p_fd;
3430         struct fileproc *fp;
3431
3432         if (!locked)
3433                 proc_fdlock_spin(p);
3434         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
3435                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3436                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3437                 if (!locked)
3438                         proc_fdunlock(p);
3439                 return (EBADF);
3440         }
3441         fp->f_iocount++;
3442
3443         if (resultfp)
3444                 *resultfp = fp;
3445         if (!locked)
3446                 proc_fdunlock(p);
3447
3448         return (0);
3449 }
3450
3451
3452 /*
3453  * fp_drop_written
3454  *
3455  * Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
3456  *              reference previously taken by calling fp_lookup et. al.
3457  *
3458  * Parameters:  p                               Process in which the fd lives
3459  *              fd                              fd associated with the fileproc
3460  *              fp                              fileproc on which to set the
3461  *                                              flag and drop the reference
3462  *
3463  * Returns:     0                               Success
3464  *      fp_drop:EBADF                           Bad file descriptor
3465  *
3466  * Locks:       This function internally takes and drops the proc_fdlock for
3467  *              the supplied process
3468  *
3469  * Notes:       The fileproc must correspond to the fd in the supplied proc
3470  */
3471 int
3472 fp_drop_written(proc_t p, int fd, struct fileproc *fp)
3473 {
3474         int error;
3475
3476         proc_fdlock_spin(p);
3477
3478         fp->f_flags |= FP_WRITTEN;
3479
3480         error = fp_drop(p, fd, fp, 1);
3481
3482         proc_fdunlock(p);
3483
3484         return (error);
3485 }
3486
3487
3488 /*
3489  * fp_drop_event
3490  *
3491  * Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
3492  *              reference previously taken by calling fp_lookup et. al.
3493  *
3494  * Parameters:  p                               Process in which the fd lives
3495  *              fd                              fd associated with the fileproc
3496  *              fp                              fileproc on which to set the
3497  *                                              flag and drop the reference
3498  *
3499  * Returns:     0                               Success
3500  *      fp_drop:EBADF                           Bad file descriptor
3501  *
3502  * Locks:       This function internally takes and drops the proc_fdlock for
3503  *              the supplied process
3504  *
3505  * Notes:       The fileproc must correspond to the fd in the supplied proc
3506  */
3507 int
3508 fp_drop_event(proc_t p, int fd, struct fileproc *fp)
3509 {
3510         int error;
3511
3512         proc_fdlock_spin(p);
3513
3514         fp->f_flags |= FP_WAITEVENT;
3515
3516         error = fp_drop(p, fd, fp, 1);
3517
3518         proc_fdunlock(p);
3519
3520         return (error);
3521 }
3522
3523
3524 /*
3525  * fp_drop
3526  *
3527  * Description: Drop the I/O reference previously taken by calling fp_lookup
3528  *              et. al.
3529  *
3530  * Parameters:  p                               Process in which the fd lives
3531  *              fd                              fd associated with the fileproc
3532  *              fp                              fileproc on which to set the
3533  *                                              flag and drop the reference
3534  *              locked                          flag to internally take and
3535  *                                              drop proc_fdlock if it is not
3536  *                                              already held by the caller
3537  *
3538  * Returns:     0                               Success
3539  *              EBADF                           Bad file descriptor
3540  *
3541  * Locks:       This function internally takes and drops the proc_fdlock for
3542  *              the supplied process if 'locked' is non-zero, and assumes that
3543  *              the caller already holds this lock if 'locked' is non-zero.
3544  *
3545  * Notes:       The fileproc must correspond to the fd in the supplied proc
3546  */
3547 int
3548 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
3549 {
3550         struct filedesc *fdp = p->p_fd;
3551         int     needwakeup = 0;
3552
3553         if (!locked)
3554                 proc_fdlock_spin(p);
3555          if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
3556                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3557                         ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3558                          !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
3559                 if (!locked)
3560                         proc_fdunlock(p);
3561                 return (EBADF);
3562         }
3563         fp->f_iocount--;
3564
3565         if (fp->f_iocount == 0) {
3566                 if (fp->f_flags & FP_SELCONFLICT)
3567                         fp->f_flags &= ~FP_SELCONFLICT;
3568
3569                 if (p->p_fpdrainwait) {
3570                         p->p_fpdrainwait = 0;
3571                         needwakeup = 1;
3572                 }
3573         }
3574         if (!locked)
3575                 proc_fdunlock(p);
3576         if (needwakeup)
3577                 wakeup(&p->p_fpdrainwait);
3578
3579         return (0);
3580 }
3581
3582
3583 /*
3584  * file_vnode
3585  *
3586  * Description: Given an fd, look it up in the current process's per process
3587  *              open file table, and return its internal vnode pointer.
3588  *
3589  * Parameters:  fd                              fd to obtain vnode from
3590  *              vpp                             pointer to vnode return area
3591  *
3592  * Returns:     0                               Success
3593  *              EINVAL                          The fd does not refer to a
3594  *                                              vnode fileproc entry
3595  *      fp_lookup:EBADF                         Bad file descriptor
3596  *
3597  * Implicit returns:
3598  *              *vpp (modified)                 Returned vnode pointer
3599  *
3600  * Locks:       This function internally takes and drops the proc_fdlock for
3601  *              the current process
3602  *
3603  * Notes:       If successful, this function increments the f_iocount on the
3604  *              fd's corresponding fileproc.
3605  *
3606  *              The fileproc referenced is not returned; because of this, care
3607  *              must be taken to not drop the last reference (e.g. by closing
3608  *              the file).  This is inherently unsafe, since the reference may
3609  *              not be recoverable from the vnode, if there is a subsequent
3610  *              close that destroys the associate fileproc.  The caller should
3611  *              therefore retain their own reference on the fileproc so that
3612  *              the f_iocount can be dropped subsequently.  Failure to do this
3613  *              can result in the returned pointer immediately becoming invalid
3614  *              following the call.
3615  *
3616  *              Use of this function is discouraged.
3617  */
3618 int
3619 file_vnode(int fd, struct vnode **vpp)
3620 {
3621         proc_t p = current_proc();
3622         struct fileproc *fp;
3623         int error;
3624
3625         proc_fdlock_spin(p);
3626         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3627                 proc_fdunlock(p);
3628                 return(error);
3629         }
3630         if (fp->f_type != DTYPE_VNODE) {
3631                 fp_drop(p, fd, fp,1);
3632                 proc_fdunlock(p);
3633                 return(EINVAL);
3634         }
3635         if (vpp != NULL)
3636                 *vpp = (struct vnode *)fp->f_data;
3637         proc_fdunlock(p);
3638
3639         return(0);
3640 }
3641
3642
3643 /*
3644  * file_vnode_withvid
3645  *
3646  * Description: Given an fd, look it up in the current process's per process
3647  *              open file table, and return its internal vnode pointer.
3648  *
3649  * Parameters:  fd                              fd to obtain vnode from
3650  *              vpp                             pointer to vnode return area
3651  *              vidp                            pointer to vid of the returned vnode
3652  *
3653  * Returns:     0                               Success
3654  *              EINVAL                          The fd does not refer to a
3655  *                                              vnode fileproc entry
3656  *      fp_lookup:EBADF                         Bad file descriptor
3657  *
3658  * Implicit returns:
3659  *              *vpp (modified)                 Returned vnode pointer
3660  *
3661  * Locks:       This function internally takes and drops the proc_fdlock for
3662  *              the current process
3663  *
3664  * Notes:       If successful, this function increments the f_iocount on the
3665  *              fd's corresponding fileproc.
3666  *
3667  *              The fileproc referenced is not returned; because of this, care
3668  *              must be taken to not drop the last reference (e.g. by closing
3669  *              the file).  This is inherently unsafe, since the reference may
3670  *              not be recoverable from the vnode, if there is a subsequent
3671  *              close that destroys the associate fileproc.  The caller should
3672  *              therefore retain their own reference on the fileproc so that
3673  *              the f_iocount can be dropped subsequently.  Failure to do this
3674  *              can result in the returned pointer immediately becoming invalid
3675  *              following the call.
3676  *
3677  *              Use of this function is discouraged.
3678  */
3679 int
3680 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
3681 {
3682         proc_t p = current_proc();
3683         struct fileproc *fp;
3684         vnode_t vp;
3685         int error;
3686
3687         proc_fdlock_spin(p);
3688         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3689                 proc_fdunlock(p);
3690                 return(error);
3691         }
3692         if (fp->f_type != DTYPE_VNODE) {
3693                 fp_drop(p, fd, fp,1);
3694                 proc_fdunlock(p);
3695                 return(EINVAL);
3696         }
3697         vp = (struct vnode *)fp->f_data;
3698         if (vpp != NULL)
3699                 *vpp = vp;
3700
3701         if ((vidp != NULL) && (vp != NULLVP))
3702                 *vidp = (uint32_t)vp->v_id;
3703
3704         proc_fdunlock(p);
3705
3706         return(0);
3707 }
3708
3709
3710 /*
3711  * file_socket
3712  *
3713  * Description: Given an fd, look it up in the current process's per process
3714  *              open file table, and return its internal socket pointer.
3715  *
3716  * Parameters:  fd                              fd to obtain vnode from
3717  *              sp                              pointer to socket return area
3718  *
3719  * Returns:     0                               Success
3720  *              ENOTSOCK                        Not a socket
3721  *              fp_lookup:EBADF                 Bad file descriptor
3722  *
3723  * Implicit returns:
3724  *              *sp (modified)                  Returned socket pointer
3725  *
3726  * Locks:       This function internally takes and drops the proc_fdlock for
3727  *              the current process
3728  *
3729  * Notes:       If successful, this function increments the f_iocount on the
3730  *              fd's corresponding fileproc.
3731  *
3732  *              The fileproc referenced is not returned; because of this, care
3733  *              must be taken to not drop the last reference (e.g. by closing
3734  *              the file).  This is inherently unsafe, since the reference may
3735  *              not be recoverable from the socket, if there is a subsequent
3736  *              close that destroys the associate fileproc.  The caller should
3737  *              therefore retain their own reference on the fileproc so that
3738  *              the f_iocount can be dropped subsequently.  Failure to do this
3739  *              can result in the returned pointer immediately becoming invalid
3740  *              following the call.
3741  *
3742  *              Use of this function is discouraged.
3743  */
3744 int
3745 file_socket(int fd, struct socket **sp)
3746 {
3747         proc_t p = current_proc();
3748         struct fileproc *fp;
3749         int error;
3750
3751         proc_fdlock_spin(p);
3752         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3753                 proc_fdunlock(p);
3754                 return(error);
3755         }
3756         if (fp->f_type != DTYPE_SOCKET) {
3757                 fp_drop(p, fd, fp,1);
3758                 proc_fdunlock(p);
3759                 return(ENOTSOCK);
3760         }
3761         *sp = (struct socket *)fp->f_data;
3762         proc_fdunlock(p);
3763
3764         return(0);
3765 }
3766
3767
3768 /*
3769  * file_flags
3770  *
3771  * Description: Given an fd, look it up in the current process's per process
3772  *              open file table, and return its fileproc's flags field.
3773  *
3774  * Parameters:  fd                              fd whose flags are to be
3775  *                                              retrieved
3776  *              flags                           pointer to flags data area
3777  *
3778  * Returns:     0                               Success
3779  *              ENOTSOCK                        Not a socket
3780  *              fp_lookup:EBADF                 Bad file descriptor
3781  *
3782  * Implicit returns:
3783  *              *flags (modified)               Returned flags field
3784  *
3785  * Locks:       This function internally takes and drops the proc_fdlock for
3786  *              the current process
3787  *
3788  * Notes:       This function will internally increment and decrement the
3789  *              f_iocount of the fileproc as part of its operation.
3790  */
3791 int
3792 file_flags(int fd, int *flags)
3793 {
3794
3795         proc_t p = current_proc();
3796         struct fileproc *fp;
3797         int error;
3798
3799         proc_fdlock_spin(p);
3800         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
3801                 proc_fdunlock(p);
3802                 return(error);
3803         }
3804         *flags = (int)fp->f_flag;
3805         fp_drop(p, fd, fp,1);
3806         proc_fdunlock(p);
3807
3808         return(0);
3809 }
3810
3811
3812 /*
3813  * file_drop
3814  *
3815  * Description: Drop an iocount reference on an fd, and wake up any waiters
3816  *              for draining (i.e. blocked in fileproc_drain() called during
3817  *              the last attempt to close a file).
3818  *
3819  * Parameters:  fd                              fd on which an ioreference is
3820  *                                              to be dropped
3821  *
3822  * Returns:     0                               Success
3823  *              EBADF                           Bad file descriptor
3824  *
3825  * Description: Given an fd, look it up in the current process's per process
3826  *              open file table, and drop it's fileproc's f_iocount by one
3827  *
3828  * Notes:       This is intended as a corresponding operation to the functions
3829  *              file_vnode() and file_socket() operations.
3830  *
3831  *              Technically, the close reference is supposed to be protected
3832  *              by a fileproc_drain(), however, a drain will only block if
3833  *              the fd refers to a character device, and that device has had
3834  *              preparefileread() called on it.  If it refers to something
3835  *              other than a character device, then the drain will occur and
3836  *              block each close attempt, rather than merely the last close.
3837  *
3838  *              Since it's possible for an fd that refers to a character
3839  *              device to have an intermediate close followed by an open to
3840  *              cause a different file to correspond to that descriptor,
3841  *              unless there was a cautionary reference taken on the fileproc,
3842  *              this is an inherently unsafe function.  This happens in the
3843  *              case where multiple fd's in a process refer to the same
3844  *              character device (e.g. stdin/out/err pointing to a tty, etc.).
3845  *
3846  *              Use of this function is discouraged.
3847  */
3848 int
3849 file_drop(int fd)
3850 {
3851         struct fileproc *fp;
3852         proc_t p = current_proc();
3853         int     needwakeup = 0;
3854
3855         proc_fdlock_spin(p);
3856         if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
3857                         (fp = p->p_fd->fd_ofiles[fd]) == NULL ||
3858                         ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
3859                          !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
3860                 proc_fdunlock(p);
3861                 return (EBADF);
3862         }
3863         fp->f_iocount --;
3864
3865         if (fp->f_iocount == 0) {
3866                 if (fp->f_flags & FP_SELCONFLICT)
3867                         fp->f_flags &= ~FP_SELCONFLICT;
3868
3869                 if (p->p_fpdrainwait) {
3870                         p->p_fpdrainwait = 0;
3871                         needwakeup = 1;
3872                 }
3873         }
3874         proc_fdunlock(p);
3875
3876         if (needwakeup)
3877                 wakeup(&p->p_fpdrainwait);
3878         return(0);
3879 }
3880
3881
3882 /*
3883  * falloc
3884  *
3885  * Description: Allocate an entry in the per process open file table and
3886  *              return the corresponding fileproc and fd.
3887  *
3888  * Parameters:  p                               The process in whose open file
3889  *                                              table the fd is to be allocated
3890  *              resultfp                        Pointer to fileproc pointer
3891  *                                              return area
3892  *              resultfd                        Pointer to fd return area
3893  *              ctx                             VFS context
3894  *
3895  * Returns:     0                               Success
3896  *      falloc:ENFILE                           Too many open files in system
3897  *      falloc:EMFILE                           Too many open files in process
3898  *      falloc:ENOMEM                           M_FILEPROC or M_FILEGLOB zone
3899  *                                              exhausted
3900  *
3901  * Implicit returns:
3902  *              *resultfd (modified)            Returned fileproc pointer
3903  *              *resultfd (modified)            Returned fd
3904  *
3905  * Locks:       This function takes and drops the proc_fdlock; if this lock
3906  *              is already held, use falloc_locked() instead.
3907  *
3908  * Notes:       This function takes separate process and context arguments
3909  *              solely to support kern_exec.c; otherwise, it would take
3910  *              neither, and expect falloc_locked() to use the
3911  *              vfs_context_current() routine internally.
3912  */
3913 int
3914 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
3915 {
3916         int error;
3917
3918         proc_fdlock(p);
3919         error = falloc_locked(p, resultfp, resultfd, ctx, 1);
3920         proc_fdunlock(p);
3921
3922         return(error);
3923 }
3924
3925
3926 /*
3927  * falloc_locked
3928  *
3929  * Create a new open file structure and allocate
3930  * a file descriptor for the process that refers to it.
3931  *
3932  * Returns:     0                       Success
3933  *
3934  * Description: Allocate an entry in the per process open file table and
3935  *              return the corresponding fileproc and fd.
3936  *
3937  * Parameters:  p                               The process in whose open file
3938  *                                              table the fd is to be allocated
3939  *              resultfp                        Pointer to fileproc pointer
3940  *                                              return area
3941  *              resultfd                        Pointer to fd return area
3942  *              ctx                             VFS context
3943  *              locked                          Flag to indicate whether the
3944  *                                              caller holds proc_fdlock
3945  *
3946  * Returns:     0                               Success
3947  *              ENFILE                          Too many open files in system
3948  *              fdalloc:EMFILE                  Too many open files in process
3949  *              ENOMEM                          M_FILEPROC or M_FILEGLOB zone
3950  *                                              exhausted
3951  *      fdalloc:ENOMEM
3952  *
3953  * Implicit returns:
3954  *              *resultfd (modified)            Returned fileproc pointer
3955  *              *resultfd (modified)            Returned fd
3956  *
3957  * Locks:       If the parameter 'locked' is zero, this function takes and
3958  *              drops the proc_fdlock; if non-zero, the caller must hold the
3959  *              lock.
3960  *
3961  * Notes:       If you intend to use a non-zero 'locked' parameter, use the
3962  *              utility function falloc() instead.
3963  *
3964  *              This function takes separate process and context arguments
3965  *              solely to support kern_exec.c; otherwise, it would take
3966  *              neither, and use the vfs_context_current() routine internally.
3967  */
3968 int
3969 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
3970               vfs_context_t ctx, int locked)
3971 {
3972         struct fileproc *fp;
3973         struct fileglob *fg;
3974         int error, nfd;
3975
3976         if (!locked)
3977                 proc_fdlock(p);
3978         if ( (error = fdalloc(p, 0, &nfd)) ) {
3979                 if (!locked)
3980                         proc_fdunlock(p);
3981                 return (error);
3982         }
3983         if (nfiles >= maxfiles) {
3984                 if (!locked)
3985                         proc_fdunlock(p);
3986                 tablefull("file");
3987                 return (ENFILE);
3988         }
3989 #if CONFIG_MACF
3990         error = mac_file_check_create(proc_ucred(p));
3991         if (error) {
3992                 if (!locked)
3993                         proc_fdunlock(p);
3994                 return (error);
3995         }
3996 #endif
3997
3998         /*
3999          * Allocate a new file descriptor.
4000          * If the process has file descriptor zero open, add to the list
4001          * of open files at that point, otherwise put it at the front of
4002          * the list of open files.
4003          */
4004         proc_fdunlock(p);
4005
4006         MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
4007         if (fp == NULL) {
4008                 if (locked)
4009                         proc_fdlock(p);
4010                 return (ENOMEM);
4011         }
4012         MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
4013         if (fg == NULL) {
4014                 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4015                 if (locked)
4016                         proc_fdlock(p);
4017                 return (ENOMEM);
4018         }
4019         bzero(fp, sizeof(struct fileproc));
4020         bzero(fg, sizeof(struct fileglob));
4021         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4022
4023         fp->f_iocount = 1;
4024         fg->fg_count = 1;
4025         fp->f_fglob = fg;
4026 #if CONFIG_MACF
4027         mac_file_label_init(fg);
4028 #endif
4029
4030         kauth_cred_ref(ctx->vc_ucred);
4031
4032         proc_fdlock(p);
4033
4034         fp->f_cred = ctx->vc_ucred;
4035
4036 #if CONFIG_MACF
4037         mac_file_label_associate(fp->f_cred, fg);
4038 #endif
4039
4040         OSAddAtomic(1, &nfiles);
4041
4042         p->p_fd->fd_ofiles[nfd] = fp;
4043
4044         if (!locked)
4045                 proc_fdunlock(p);
4046
4047         if (resultfp)
4048                 *resultfp = fp;
4049         if (resultfd)
4050                 *resultfd = nfd;
4051
4052         return (0);
4053 }
4054
4055
4056 /*
4057  * fg_free
4058  *
4059  * Description: Free a file structure; drop the global open file count, and
4060  *              drop the credential reference, if the fileglob has one, and
4061  *              destroy the instance mutex before freeing
4062  *
4063  * Parameters:  fg                              Pointer to fileglob to be
4064  *                                              freed
4065  *
4066  * Returns:     void
4067  */
4068 void
4069 fg_free(struct fileglob *fg)
4070 {
4071         OSAddAtomic(-1, &nfiles);
4072
4073         if (IS_VALID_CRED(fg->fg_cred)) {
4074                 kauth_cred_unref(&fg->fg_cred);
4075         }
4076         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4077
4078 #if CONFIG_MACF
4079         mac_file_label_destroy(fg);
4080 #endif
4081         FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4082 }
4083
4084
4085 /*
4086  * fdexec
4087  *
4088  * Description: Perform close-on-exec processing for all files in a process
4089  *              that are either marked as close-on-exec, or which were in the
4090  *              process of being opened at the time of the execve
4091  *
4092  *              Also handles the case (via posix_spawn()) where -all-
4093  *              files except those marked with "inherit" as treated as
4094  *              close-on-exec.
4095  *
4096  * Parameters:  p                               Pointer to process calling
4097  *                                              execve
4098  *
4099  * Returns:     void
4100  *
4101  * Locks:       This function internally takes and drops proc_fdlock()
4102  *
4103  * Notes:       This function drops and retakes the kernel funnel; this is
4104  *              inherently unsafe, since another thread may have the
4105  *              proc_fdlock.
4106  *
4107  * XXX:         We should likely reverse the lock and funnel drop/acquire
4108  *              order to avoid the small race window; it's also possible that
4109  *              if the program doing the exec has an outstanding listen socket
4110  *              and a network connection is completed asynchronously that we
4111  *              will end up with a "ghost" socket reference in the new process.
4112  *
4113  *              This needs reworking to make it safe to remove the funnel from
4114  *              the execve and posix_spawn system calls.
4115  */
4116 void
4117 fdexec(proc_t p, short flags)
4118 {
4119         struct filedesc *fdp = p->p_fd;
4120         int i;
4121         boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4122
4123         proc_fdlock(p);
4124         for (i = fdp->fd_lastfile; i >= 0; i--) {
4125
4126                 struct fileproc *fp = fdp->fd_ofiles[i];
4127                 char *flagp = &fdp->fd_ofileflags[i];
4128
4129                 if (fp && cloexec_default) {
4130                         /*
4131                          * Reverse the usual semantics of file descriptor
4132                          * inheritance - all of them should be closed
4133                          * except files marked explicitly as "inherit" and
4134                          * not marked close-on-exec.
4135                          */
4136                         if ((*flagp & (UF_EXCLOSE|UF_INHERIT)) != UF_INHERIT)
4137                                 *flagp |= UF_EXCLOSE;
4138                         *flagp &= ~UF_INHERIT;
4139                 }
4140
4141                 if (
4142                     ((*flagp & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
4143 #if CONFIG_MACF
4144                     || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4145 #endif
4146                 ) {
4147                         if (i < fdp->fd_knlistsize)
4148                                 knote_fdclose(p, i);
4149                         procfdtbl_clearfd(p, i);
4150                         if (i == fdp->fd_lastfile && i > 0)
4151                                 fdp->fd_lastfile--;
4152                         if (i < fdp->fd_freefile)
4153                                 fdp->fd_freefile = i;
4154
4155                         /*
4156                          * Wait for any third party viewers (e.g., lsof)
4157                          * to release their references to this fileproc.
4158                          */
4159                         while (fp->f_iocount > 0) {
4160                                 p->p_fpdrainwait = 1;
4161                                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4162                                     "fpdrain", NULL);
4163                         }
4164
4165                         closef_locked(fp, fp->f_fglob, p);
4166
4167                         FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4168                 }
4169         }
4170         proc_fdunlock(p);
4171 }
4172
4173
4174 /*
4175  * fdcopy
4176  *
4177  * Description: Copy a filedesc structure.  This is normally used as part of
4178  *              forkproc() when forking a new process, to copy the per process
4179  *              open file table over to the new process.
4180  *
4181  * Parameters:  p                               Process whose open file table
4182  *                                              is to be copied (parent)
4183  *              uth_cdir                        Per thread current working
4184  *                                              cirectory, or NULL
4185  *
4186  * Returns:     NULL                            Copy failed
4187  *              !NULL                           Pointer to new struct filedesc
4188  *
4189  * Locks:       This function internally takes and drops proc_fdlock()
4190  *
4191  * Notes:       Files are copied directly, ignoring the new resource limits
4192  *              for the process that's being copied into.  Since the descriptor
4193  *              references are just additional references, this does not count
4194  *              against the number of open files on the system.
4195  *
4196  *              The struct filedesc includes the current working directory,
4197  *              and the current root directory, if the process is chroot'ed.
4198  *
4199  *              If the exec was called by a thread using a per thread current
4200  *              working directory, we inherit the working directory from the
4201  *              thread making the call, rather than from the process.
4202  *
4203  *              In the case of a failure to obtain a reference, for most cases,
4204  *              the file entry will be silently dropped.  There's an exception
4205  *              for the case of a chroot dir, since a failure to to obtain a
4206  *              reference there would constitute an "escape" from the chroot
4207  *              environment, which must not be allowed.  In that case, we will
4208  *              deny the execve() operation, rather than allowing the escape.
4209  */
4210 struct filedesc *
4211 fdcopy(proc_t p, vnode_t uth_cdir)
4212 {
4213         struct filedesc *newfdp, *fdp = p->p_fd;
4214         int i;
4215         struct fileproc *ofp, *fp;
4216         vnode_t v_dir;
4217
4218         MALLOC_ZONE(newfdp, struct filedesc *,
4219                         sizeof(*newfdp), M_FILEDESC, M_WAITOK);
4220         if (newfdp == NULL)
4221                 return(NULL);
4222
4223         proc_fdlock(p);
4224
4225         /*
4226          * the FD_CHROOT flag will be inherited via this copy
4227          */
4228         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4229
4230         /*
4231          * If we are running with per-thread current working directories,
4232          * inherit the new current working directory from the current thread
4233          * instead, before we take our references.
4234          */
4235         if (uth_cdir != NULLVP)
4236                 newfdp->fd_cdir = uth_cdir;
4237
4238         /*
4239          * For both fd_cdir and fd_rdir make sure we get
4240          * a valid reference... if we can't, than set
4241          * set the pointer(s) to NULL in the child... this
4242          * will keep us from using a non-referenced vp
4243          * and allows us to do the vnode_rele only on
4244          * a properly referenced vp
4245          */
4246         if ( (v_dir = newfdp->fd_cdir) ) {
4247                 if (vnode_getwithref(v_dir) == 0) {
4248                         if ( (vnode_ref(v_dir)) )
4249                                 newfdp->fd_cdir = NULL;
4250                         vnode_put(v_dir);
4251                 } else
4252                         newfdp->fd_cdir = NULL;
4253         }
4254         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4255                 /*
4256                  * we couldn't get a new reference on
4257                  * the current working directory being
4258                  * inherited... we might as well drop
4259                  * our reference from the parent also
4260                  * since the vnode has gone DEAD making
4261                  * it useless... by dropping it we'll
4262                  * be that much closer to recycling it
4263                  */
4264                 vnode_rele(fdp->fd_cdir);
4265                 fdp->fd_cdir = NULL;
4266         }
4267
4268         if ( (v_dir = newfdp->fd_rdir) ) {
4269                 if (vnode_getwithref(v_dir) == 0) {
4270                         if ( (vnode_ref(v_dir)) )
4271                                 newfdp->fd_rdir = NULL;
4272                         vnode_put(v_dir);
4273                 } else {
4274                         newfdp->fd_rdir = NULL;
4275                 }
4276         }
4277         /* Coming from a chroot environment and unable to get a reference... */
4278         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4279                 /*
4280                  * We couldn't get a new reference on
4281                  * the chroot directory being
4282                  * inherited... this is fatal, since
4283                  * otherwise it would constitute an
4284                  * escape from a chroot environment by
4285                  * the new process.
4286                  */
4287                 if (newfdp->fd_cdir)
4288                         vnode_rele(newfdp->fd_cdir);
4289                 FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
4290                 return(NULL);
4291         }
4292         newfdp->fd_refcnt = 1;
4293
4294         /*
4295          * If the number of open files fits in the internal arrays
4296          * of the open file structure, use them, otherwise allocate
4297          * additional memory for the number of descriptors currently
4298          * in use.
4299          */
4300         if (newfdp->fd_lastfile < NDFILE)
4301                 i = NDFILE;
4302         else {
4303                 /*
4304                  * Compute the smallest multiple of NDEXTENT needed
4305                  * for the file descriptors currently in use,
4306                  * allowing the table to shrink.
4307                  */
4308                 i = newfdp->fd_nfiles;
4309                 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
4310                         i /= 2;
4311         }
4312         proc_fdunlock(p);
4313
4314         MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
4315                                 i * OFILESIZE, M_OFILETABL, M_WAITOK);
4316         if (newfdp->fd_ofiles == NULL) {
4317                 if (newfdp->fd_cdir)
4318                         vnode_rele(newfdp->fd_cdir);
4319                 if (newfdp->fd_rdir)
4320                         vnode_rele(newfdp->fd_rdir);
4321
4322                 FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
4323                 return(NULL);
4324         }
4325         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4326         proc_fdlock(p);
4327
4328         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4329         newfdp->fd_nfiles = i;
4330
4331         if (fdp->fd_nfiles > 0) {
4332                 struct fileproc **fpp;
4333                 char *flags;
4334
4335                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4336                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4337                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4338                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4339
4340                 /*
4341                  * kq descriptors cannot be copied.
4342                  */
4343                 if (newfdp->fd_knlistsize != -1) {
4344                         fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4345                         for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
4346                                 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
4347                                         *fpp = NULL;
4348                                         newfdp->fd_ofileflags[i] = 0;
4349                                         if (i < newfdp->fd_freefile)
4350                                                 newfdp->fd_freefile = i;
4351                                 }
4352                                 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
4353                                         newfdp->fd_lastfile--;
4354                         }
4355                         newfdp->fd_knlist = NULL;
4356                         newfdp->fd_knlistsize = -1;
4357                         newfdp->fd_knhash = NULL;
4358                         newfdp->fd_knhashmask = 0;
4359                 }
4360                 fpp = newfdp->fd_ofiles;
4361                 flags = newfdp->fd_ofileflags;
4362
4363                 for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++)
4364                         if ((ofp = *fpp) != NULL && !(*flags & UF_RESERVED)) {
4365                                 MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK);
4366                                 if (fp == NULL) {
4367                                         /*
4368                                          * XXX no room to copy, unable to
4369                                          * XXX safely unwind state at present
4370                                          */
4371                                         *fpp = NULL;
4372                                 } else {
4373                                         bzero(fp, sizeof(struct fileproc));
4374                                         fp->f_flags = ofp->f_flags;
4375                                         //fp->f_iocount = ofp->f_iocount;
4376                                         fp->f_iocount = 0;
4377                                         fp->f_fglob = ofp->f_fglob;
4378                                         (void)fg_ref(fp);
4379                                         *fpp = fp;
4380                                 }
4381                         } else {
4382                                 if (i < newfdp->fd_freefile)
4383                                         newfdp->fd_freefile = i;
4384                                 *fpp = NULL;
4385                                 *flags = 0;
4386                         }
4387         }
4388
4389         proc_fdunlock(p);
4390         return (newfdp);
4391 }
4392
4393
4394 /*
4395  * fdfree
4396  *
4397  * Description: Release a filedesc (per process open file table) structure;
4398  *              this is done on process exit(), or from forkproc_free() if
4399  *              the fork fails for some reason subsequent to a successful
4400  *              call to fdcopy()
4401  *
4402  * Parameters:  p                               Pointer to process going away
4403  *
4404  * Returns:     void
4405  *
4406  * Locks:       This function internally takes and drops proc_fdlock()
4407  */
4408 void
4409 fdfree(proc_t p)
4410 {
4411         struct filedesc *fdp;
4412         struct fileproc *fp;
4413         int i;
4414
4415         proc_fdlock(p);
4416
4417         /* Certain daemons might not have file descriptors */
4418         fdp = p->p_fd;
4419
4420         if ((fdp == NULL) || (--fdp->fd_refcnt > 0)) {
4421                 proc_fdunlock(p);
4422                 return;
4423         }
4424         if (fdp->fd_refcnt == 0xffff)
4425                 panic("fdfree: bad fd_refcnt");
4426
4427         /* Last reference: the structure can't change out from under us */
4428
4429         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
4430                 for (i = fdp->fd_lastfile; i >= 0; i--) {
4431                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
4432
4433                           if (fdp->fd_ofileflags[i] & UF_RESERVED)
4434                                 panic("fdfree: found fp with UF_RESERVED");
4435
4436                                 /* closef drops the iocount ... */
4437                                 if ((fp->f_flags & FP_INCHRREAD) != 0)
4438                                         fp->f_iocount++;
4439                                 procfdtbl_reservefd(p, i);
4440
4441                                 if (i < fdp->fd_knlistsize)
4442                                         knote_fdclose(p, i);
4443                                 if (fp->f_flags & FP_WAITEVENT)
4444                                         (void)waitevent_close(p, fp);
4445                                 (void) closef_locked(fp, fp->f_fglob, p);
4446                                 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4447                         }
4448                 }
4449                 FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
4450                 fdp->fd_ofiles = NULL;
4451                 fdp->fd_nfiles = 0;
4452         }
4453
4454         proc_fdunlock(p);
4455
4456         if (fdp->fd_cdir)
4457                 vnode_rele(fdp->fd_cdir);
4458         if (fdp->fd_rdir)
4459                 vnode_rele(fdp->fd_rdir);
4460
4461         proc_fdlock_spin(p);
4462         p->p_fd = NULL;
4463         proc_fdunlock(p);
4464
4465         if (fdp->fd_knlist)
4466                 FREE(fdp->fd_knlist, M_KQUEUE);
4467         if (fdp->fd_knhash)
4468                 FREE(fdp->fd_knhash, M_KQUEUE);
4469
4470         FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
4471 }
4472
4473
4474 /*
4475  * closef_finish
4476  *
4477  * Description: Called on last open instance for a fileglob for a file being
4478  *              closed.
4479  *
4480  * Parameters:  fp                      Pointer to fileproc for fd
4481  *              fg                      Pointer to fileglob for fd
4482  *              p                       Pointer to proc structure
4483  *
4484  * Returns:     0                       Success
4485  *      <fo_close>:???                  Anything returnable by a per-fileops
4486  *                                      close function
4487  *
4488  * Note:        fp can only be non-NULL if p is also non-NULL.  If p is NULL,
4489  *              then fg must eith be locked (FHASLOCK) or must not have a
4490  *              type of DTYPE_VNODE.
4491  *
4492  *              On return, the fg is freed.
4493  *
4494  *              This function may block draining output to a character
4495  *              device on last close of that device.
4496  */
4497 static int
4498 closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx)
4499 {
4500         int error;
4501
4502
4503         /* fg_ops completed initialization? */
4504         if (fg->fg_ops)
4505                 error = fo_close(fg, ctx);
4506         else
4507                 error = 0;
4508
4509         /* if fp is non-NULL, drain it out */
4510         if (((fp != (struct fileproc *)0) && ((fp->f_flags & FP_INCHRREAD) != 0))) {
4511                 proc_fdlock_spin(p);
4512                 if ( ((fp->f_flags & FP_INCHRREAD) != 0) ) {
4513                         fileproc_drain(p, fp);
4514                 }
4515                 proc_fdunlock(p);
4516         }
4517         fg_free(fg);
4518
4519         return (error);
4520 }
4521
4522 /*
4523  * closef_locked
4524  *
4525  * Description: Internal form of closef; called with proc_fdlock held
4526  *
4527  * Parameters:  fp                      Pointer to fileproc for fd
4528  *              fg                      Pointer to fileglob for fd
4529  *              p                       Pointer to proc structure
4530  *
4531  * Returns:     0                       Success
4532  *      closef_finish:???               Anything returnable by a per-fileops
4533  *                                      close function
4534  *
4535  * Note:        Decrements reference count on file structure; if this was the
4536  *              last reference, then closef_finish() is called
4537  *
4538  *              p and fp are allowed to  be NULL when closing a file that was
4539  *              being passed in a message (but only if we are called when this
4540  *              is NOT the last reference).
4541  */
4542 int
4543 closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
4544 {
4545         struct vnode *vp;
4546         struct flock lf;
4547         struct vfs_context context;
4548         int error;
4549
4550         if (fg == NULL) {
4551                 return (0);
4552         }
4553
4554         /* Set up context with cred stashed in fg */
4555         if (p == current_proc())
4556                 context.vc_thread = current_thread();
4557         else
4558                 context.vc_thread = NULL;
4559         context.vc_ucred = fg->fg_cred;
4560
4561         /*
4562          * POSIX record locking dictates that any close releases ALL
4563          * locks owned by this process.  This is handled by setting
4564          * a flag in the unlock to free ONLY locks obeying POSIX
4565          * semantics, and not to free BSD-style file locks.
4566          * If the descriptor was in a message, POSIX-style locks
4567          * aren't passed with the descriptor.
4568          */
4569         if (p && (p->p_ladvflag & P_LADVLOCK) && fg->fg_type == DTYPE_VNODE) {
4570                 proc_fdunlock(p);
4571
4572                 lf.l_whence = SEEK_SET;
4573                 lf.l_start = 0;
4574                 lf.l_len = 0;
4575                 lf.l_type = F_UNLCK;
4576                 vp = (struct vnode *)fg->fg_data;
4577
4578                 if ( (error = vnode_getwithref(vp)) == 0 ) {
4579                         (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context);
4580                         (void)vnode_put(vp);
4581                 }
4582                 proc_fdlock(p);
4583         }
4584         lck_mtx_lock_spin(&fg->fg_lock);
4585         fg->fg_count--;
4586
4587         if (fg->fg_count > 0) {
4588                 lck_mtx_unlock(&fg->fg_lock);
4589                 return (0);
4590         }
4591 #if DIAGNOSTIC
4592         if (fg->fg_count != 0)
4593                 panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
4594 #endif
4595
4596         if (fp && (fp->f_flags & FP_WRITTEN))
4597                 fg->fg_flag |= FWASWRITTEN;
4598
4599         fg->fg_lflags |= FG_TERM;
4600         lck_mtx_unlock(&fg->fg_lock);
4601
4602         if (p)
4603                 proc_fdunlock(p);
4604         error = closef_finish(fp, fg, p, &context);
4605         if (p)
4606                 proc_fdlock(p);
4607
4608         return(error);
4609 }
4610
4611
4612 /*
4613  * fileproc_drain
4614  *
4615  * Description: Drain out pending I/O operations
4616  *
4617  * Parameters:  p                               Process closing this file
4618  *              fp                              fileproc struct for the open
4619  *                                              instance on the file
4620  *
4621  * Returns:     void
4622  *
4623  * Locks:       Assumes the caller holds the proc_fdlock
4624  *
4625  * Notes:       For character devices, this occurs on the last close of the
4626  *              device; for all other file descriptors, this occurs on each
4627  *              close to prevent fd's from being closed out from under
4628  *              operations currently in progress and blocked
4629  *
4630  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
4631  *              regarding their use and interaction with this function.
4632  */
4633 void
4634 fileproc_drain(proc_t p, struct fileproc * fp)
4635 {
4636         struct vfs_context context;
4637
4638         context.vc_thread = proc_thread(p);     /* XXX */
4639         context.vc_ucred = fp->f_fglob->fg_cred;
4640
4641         fp->f_iocount-- ; /* (the one the close holds) */
4642
4643         while (fp->f_iocount) {
4644
4645                 lck_mtx_convert_spin(&p->p_fdmlock);
4646
4647                 if (fp->f_fglob->fg_ops->fo_drain) {
4648                         (*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
4649                 }
4650                 if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
4651                         if (wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
4652                                 panic("bad wait queue for wait_queue_wakeup_all %p", fp->f_waddr);
4653                 }
4654                 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
4655                         if (wait_queue_wakeup_all(&select_conflict_queue, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
4656                                 panic("bad select_conflict_queue");
4657                 }
4658                 p->p_fpdrainwait = 1;
4659
4660                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
4661
4662         }
4663 #if DIAGNOSTIC
4664         if ((fp->f_flags & FP_INSELECT) != 0)
4665                 panic("FP_INSELECT set on drained fp");
4666 #endif
4667         if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT)
4668                 fp->f_flags &= ~FP_SELCONFLICT;
4669 }
4670
4671
4672 /*
4673  * fp_free
4674  *
4675  * Description: Release the fd and free the fileproc associated with the fd
4676  *              in the per process open file table of the specified process;
4677  *              these values must correspond.
4678  *
4679  * Parameters:  p                               Process containing fd
4680  *              fd                              fd to be released
4681  *              fp                              fileproc to be freed
4682  *
4683  * Returns:     0                               Success
4684  *
4685  * Notes:       XXX function should be void - no one interprets the returns
4686  *              XXX code
4687  */
4688 int
4689 fp_free(proc_t p, int fd, struct fileproc * fp)
4690 {
4691         proc_fdlock_spin(p);
4692         fdrelse(p, fd);
4693         proc_fdunlock(p);
4694
4695         fg_free(fp->f_fglob);
4696         FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4697         return(0);
4698 }
4699
4700
4701 /*
4702  * flock
4703  *
4704  * Description: Apply an advisory lock on a file descriptor.
4705  *
4706  * Parameters:  p                               Process making request
4707  *              uap->fd                         fd on which the lock is to be
4708  *                                              attempted
4709  *              uap->how                        (Un)Lock bits, including type
4710  *              retval                          Pointer to the call return area
4711  *
4712  * Returns:     0                               Success
4713  *      fp_getfvp:EBADF                         Bad file descriptor
4714  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
4715  *      vnode_getwithref:???
4716  *      VNOP_ADVLOCK:???
4717  *
4718  * Implicit returns:
4719  *              *retval (modified)              Size of dtable
4720  *
4721  * Notes:       Just attempt to get a record lock of the requested type on
4722  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
4723  */
4724 int
4725 flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
4726 {
4727         int fd = uap->fd;
4728         int how = uap->how;
4729         struct fileproc *fp;
4730         struct vnode *vp;
4731         struct flock lf;
4732         vfs_context_t ctx = vfs_context_current();
4733         int error=0;
4734
4735         AUDIT_ARG(fd, uap->fd);
4736         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
4737                 return(error);
4738         }
4739         if ( (error = vnode_getwithref(vp)) ) {
4740                 goto out1;
4741         }
4742         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4743
4744         lf.l_whence = SEEK_SET;
4745         lf.l_start = 0;
4746         lf.l_len = 0;
4747         if (how & LOCK_UN) {
4748                 lf.l_type = F_UNLCK;
4749                 fp->f_flag &= ~FHASLOCK;
4750                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx);
4751                 goto out;
4752         }
4753         if (how & LOCK_EX)
4754                 lf.l_type = F_WRLCK;
4755         else if (how & LOCK_SH)
4756                 lf.l_type = F_RDLCK;
4757         else {
4758                 error = EBADF;
4759                 goto out;
4760         }
4761 #if CONFIG_MACF
4762         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
4763         if (error)
4764                 goto out;
4765 #endif
4766         fp->f_flag |= FHASLOCK;
4767         if (how & LOCK_NB) {
4768                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx);
4769                 goto out;
4770         }
4771         error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx);
4772 out:
4773         (void)vnode_put(vp);
4774 out1:
4775         fp_drop(p, fd, fp, 0);
4776         return(error);
4777
4778 }
4779
4780 /*
4781  * fileport_makeport
4782  *
4783  * Description: Obtain a Mach send right for a given file descriptor.
4784  *
4785  * Parameters:  p               Process calling fileport
4786  *              uap->fd         The fd to reference
4787  *              uap->portnamep  User address at which to place port name.
4788  *
4789  * Returns:     0               Success.
4790  *              EBADF           Bad file descriptor.
4791  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
4792  *              EFAULT          Address at which to store port name is not valid.
4793  *              EAGAIN          Resource shortage.
4794  *
4795  * Implicit returns:
4796  *              On success, name of send right is stored at user-specified address.
4797  */
4798 int
4799 fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
4800     __unused int *retval)
4801 {
4802         int err;
4803         int fd = uap->fd;
4804         user_addr_t user_portaddr = uap->portnamep;
4805         struct fileproc *fp = FILEPROC_NULL;
4806         struct fileglob *fg = NULL;
4807         ipc_port_t fileport;
4808         mach_port_name_t name = MACH_PORT_NULL;
4809
4810         err = fp_lookup(p, fd, &fp, 0);
4811         if (err != 0) {
4812                 goto out;
4813         }
4814
4815         if (!filetype_issendable(fp->f_type)) {
4816                 err = EINVAL;
4817                 goto out;
4818         }
4819
4820         /* Dropped when port is deallocated */
4821         fg = fp->f_fglob;
4822         fg_ref(fp);
4823
4824         /* Allocate and initialize a port */
4825         fileport = fileport_alloc(fg);
4826         if (fileport == IPC_PORT_NULL) {
4827                 err = EAGAIN;
4828                 fg_drop(fp);
4829                 goto out;
4830         }
4831
4832         /* Add an entry.  Deallocates port on failure. */
4833         name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
4834         if (!MACH_PORT_VALID(name)) {
4835                 err = EINVAL;
4836                 goto out;
4837         }
4838
4839         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
4840         if (err != 0) {
4841                 goto out;
4842         }
4843
4844         /* Tag the fileglob for debugging purposes */
4845         lck_mtx_lock_spin(&fg->fg_lock);
4846         fg->fg_lflags |= FG_PORTMADE;
4847         lck_mtx_unlock(&fg->fg_lock);
4848
4849         fp_drop(p, fd, fp, 0);
4850
4851         return 0;
4852
4853 out:
4854         if (MACH_PORT_VALID(name)) {
4855                 /* Don't care if another thread races us to deallocate the entry */
4856                 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
4857         }
4858
4859         if (fp != FILEPROC_NULL) {
4860                 fp_drop(p, fd, fp, 0);
4861         }
4862
4863         return err;
4864 }
4865
4866 void
4867 fileport_releasefg(struct fileglob *fg)
4868 {
4869         (void)closef_locked(NULL, fg, PROC_NULL);
4870
4871         return;
4872 }
4873
4874
4875 /*
4876  * fileport_makefd
4877  *
4878  * Description: Obtain the file descriptor for a given Mach send right.
4879  *
4880  * Parameters:  p               Process calling fileport
4881  *              uap->port       Name of send right to file port.
4882  *
4883  * Returns:     0               Success
4884  *              EINVAL          Invalid Mach port name, or port is not for a file.
4885  *      fdalloc:EMFILE
4886  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
4887  *
4888  * Implicit returns:
4889  *              *retval (modified)              The new descriptor
4890  */
4891 int
4892 fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
4893 {
4894         struct fileglob *fg;
4895         struct fileproc *fp = FILEPROC_NULL;
4896         ipc_port_t port = IPC_PORT_NULL;
4897         mach_port_name_t send = uap->port;
4898         kern_return_t res;
4899         int fd;
4900         int err;
4901
4902         res = ipc_object_copyin(get_task_ipcspace(p->task),
4903                         send, MACH_MSG_TYPE_COPY_SEND, &port);
4904
4905         if (res != KERN_SUCCESS) {
4906                 err = EINVAL;
4907                 goto out;
4908         }
4909
4910         fg = fileport_port_to_fileglob(port);
4911         if (fg == NULL) {
4912                 err = EINVAL;
4913                 goto out;
4914         }
4915
4916         MALLOC_ZONE(fp, struct fileproc *, sizeof(*fp), M_FILEPROC, M_WAITOK);
4917         if (fp == FILEPROC_NULL) {
4918                 err = ENOMEM;
4919                 goto out;
4920         }
4921
4922         bzero(fp, sizeof(*fp));
4923
4924         fp->f_fglob = fg;
4925         fg_ref(fp);
4926
4927         proc_fdlock(p);
4928         err = fdalloc(p, 0, &fd);
4929         if (err != 0) {
4930                 proc_fdunlock(p);
4931                 goto out;
4932         }
4933         *fdflags(p, fd) |= UF_EXCLOSE;
4934
4935         procfdtbl_releasefd(p, fd, fp);
4936         proc_fdunlock(p);
4937
4938         *retval = fd;
4939         err = 0;
4940 out:
4941         if ((fp != NULL) && (0 != err)) {
4942                 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
4943         }
4944
4945         if (IPC_PORT_NULL != port) {
4946                 ipc_port_release_send(port);
4947         }
4948
4949         return err;
4950 }
4951
4952
4953 /*
4954  * dupfdopen
4955  *
4956  * Description: Duplicate the specified descriptor to a free descriptor;
4957  *              this is the second half of fdopen(), above.
4958  *
4959  * Parameters:  fdp                             filedesc pointer to fill in
4960  *              indx                            fd to dup to
4961  *              dfd                             fd to dup from
4962  *              mode                            mode to set on new fd
4963  *              error                           command code
4964  *
4965  * Returns:     0                               Success
4966  *              EBADF                           Source fd is bad
4967  *              EACCES                          Requested mode not allowed
4968  *              !0                              'error', if not ENODEV or
4969  *                                              ENXIO
4970  *
4971  * Notes:       XXX This is not thread safe; see fdopen() above
4972  */
4973 int
4974 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
4975 {
4976         struct fileproc *wfp;
4977         struct fileproc *fp;
4978 #if CONFIG_MACF
4979         int myerror;
4980 #endif
4981         proc_t p = current_proc();
4982
4983         /*
4984          * If the to-be-dup'd fd number is greater than the allowed number
4985          * of file descriptors, or the fd to be dup'd has already been
4986          * closed, reject.  Note, check for new == old is necessary as
4987          * falloc could allocate an already closed to-be-dup'd descriptor
4988          * as the new descriptor.
4989          */
4990         proc_fdlock(p);
4991
4992         fp = fdp->fd_ofiles[indx];
4993         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
4994                         (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
4995                         (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
4996
4997                 proc_fdunlock(p);
4998                 return (EBADF);
4999         }
5000 #if CONFIG_MACF
5001         myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5002         if (myerror) {
5003                 proc_fdunlock(p);
5004                 return (myerror);
5005         }
5006 #endif
5007         /*
5008          * There are two cases of interest here.
5009          *
5010          * For ENODEV simply dup (dfd) to file descriptor
5011          * (indx) and return.
5012          *
5013          * For ENXIO steal away the file structure from (dfd) and
5014          * store it in (indx).  (dfd) is effectively closed by
5015          * this operation.
5016          *
5017          * Any other error code is just returned.
5018          */
5019         switch (error) {
5020         case ENODEV:
5021                 /*
5022                  * Check that the mode the file is being opened for is a
5023                  * subset of the mode of the existing descriptor.
5024                  */
5025                 if (((flags & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5026                         proc_fdunlock(p);
5027                         return (EACCES);
5028                 }
5029                 if (indx > fdp->fd_lastfile)
5030                         fdp->fd_lastfile = indx;
5031                 (void)fg_ref(wfp);
5032
5033                 if (fp->f_fglob)
5034                         fg_free(fp->f_fglob);
5035                 fp->f_fglob = wfp->f_fglob;
5036
5037                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5038                         (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5039
5040                 proc_fdunlock(p);
5041                 return (0);
5042
5043         default:
5044                 proc_fdunlock(p);
5045                 return (error);
5046         }
5047         /* NOTREACHED */
5048 }
5049
5050
5051 /*
5052  * fg_ref
5053  *
5054  * Description: Add a reference to a fileglob by fileproc
5055  *
5056  * Parameters:  fp                              fileproc containing fileglob
5057  *                                              pointer
5058  *
5059  * Returns:     void
5060  *
5061  * Notes:       XXX Should use OSAddAtomic?
5062  */
5063 void
5064 fg_ref(struct fileproc * fp)
5065 {
5066         struct fileglob *fg;
5067
5068         fg = fp->f_fglob;
5069
5070         lck_mtx_lock_spin(&fg->fg_lock);
5071
5072 #if DIAGNOSTIC
5073         if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0)
5074                 panic("fg_ref: invalid bits on fp %p", fp);
5075
5076         if (fg->fg_count == 0)
5077                 panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5078                     fp, fg);
5079 #endif
5080         fg->fg_count++;
5081         lck_mtx_unlock(&fg->fg_lock);
5082 }
5083
5084
5085 /*
5086  * fg_drop
5087  *
5088  * Description: Remove a reference to a fileglob by fileproc
5089  *
5090  * Parameters:  fp                              fileproc containing fileglob
5091  *                                              pointer
5092  *
5093  * Returns:     void
5094  *
5095  * Notes:       XXX Should use OSAddAtomic?
5096  */
5097 void
5098 fg_drop(struct fileproc * fp)
5099 {
5100         struct fileglob *fg;
5101
5102         fg = fp->f_fglob;
5103         lck_mtx_lock_spin(&fg->fg_lock);
5104         fg->fg_count--;
5105         lck_mtx_unlock(&fg->fg_lock);
5106 }
5107
5108
5109 /*
5110  * fg_insertuipc
5111  *
5112  * Description: Insert fileglob onto message queue
5113  *
5114  * Parameters:  fg                              Fileglob pointer to insert
5115  *
5116  * Returns:     void
5117  *
5118  * Locks:       Takes and drops fg_lock, potentially many times
5119  */
5120 void
5121 fg_insertuipc(struct fileglob * fg)
5122 {
5123         int insertque = 0;
5124
5125         lck_mtx_lock_spin(&fg->fg_lock);
5126
5127         while (fg->fg_lflags & FG_RMMSGQ) {
5128                 lck_mtx_convert_spin(&fg->fg_lock);
5129
5130                 fg->fg_lflags |= FG_WRMMSGQ;
5131                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
5132         }
5133
5134         fg->fg_count++;
5135         fg->fg_msgcount++;
5136         if (fg->fg_msgcount == 1) {
5137                 fg->fg_lflags |= FG_INSMSGQ;
5138                 insertque=1;
5139         }
5140         lck_mtx_unlock(&fg->fg_lock);
5141
5142         if (insertque) {
5143                 lck_mtx_lock_spin(uipc_lock);
5144                 unp_gc_wait();
5145                 LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
5146                 lck_mtx_unlock(uipc_lock);
5147                 lck_mtx_lock(&fg->fg_lock);
5148                 fg->fg_lflags &= ~FG_INSMSGQ;
5149                 if (fg->fg_lflags & FG_WINSMSGQ) {
5150                         fg->fg_lflags &= ~FG_WINSMSGQ;
5151                         wakeup(&fg->fg_lflags);
5152                 }
5153                 lck_mtx_unlock(&fg->fg_lock);
5154         }
5155
5156 }
5157
5158
5159 /*
5160  * fg_removeuipc
5161  *
5162  * Description: Remove fileglob from message queue
5163  *
5164  * Parameters:  fg                              Fileglob pointer to remove
5165  *
5166  * Returns:     void
5167  *
5168  * Locks:       Takes and drops fg_lock, potentially many times
5169  */
5170 void
5171 fg_removeuipc(struct fileglob * fg)
5172 {
5173         int removeque = 0;
5174
5175         lck_mtx_lock_spin(&fg->fg_lock);
5176         while (fg->fg_lflags & FG_INSMSGQ) {
5177                 lck_mtx_convert_spin(&fg->fg_lock);
5178
5179                 fg->fg_lflags |= FG_WINSMSGQ;
5180                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
5181         }
5182         fg->fg_msgcount--;
5183         if (fg->fg_msgcount == 0) {
5184                 fg->fg_lflags |= FG_RMMSGQ;
5185                 removeque=1;
5186         }
5187         lck_mtx_unlock(&fg->fg_lock);
5188
5189         if (removeque) {
5190                 lck_mtx_lock_spin(uipc_lock);
5191                 unp_gc_wait();
5192                 LIST_REMOVE(fg, f_msglist);
5193                 lck_mtx_unlock(uipc_lock);
5194                 lck_mtx_lock(&fg->fg_lock);
5195                 fg->fg_lflags &= ~FG_RMMSGQ;
5196                 if (fg->fg_lflags & FG_WRMMSGQ) {
5197                         fg->fg_lflags &= ~FG_WRMMSGQ;
5198                         wakeup(&fg->fg_lflags);
5199                 }
5200                 lck_mtx_unlock(&fg->fg_lock);
5201         }
5202 }
5203
5204
5205 /*
5206  * fo_read
5207  *
5208  * Description: Generic fileops read indirected through the fileops pointer
5209  *              in the fileproc structure
5210  *
5211  * Parameters:  fp                              fileproc structure pointer
5212  *              uio                             user I/O structure pointer
5213  *              flags                           FOF_ flags
5214  *              ctx                             VFS context for operation
5215  *
5216  * Returns:     0                               Success
5217  *              !0                              Errno from read
5218  */
5219 int
5220 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5221 {
5222         return ((*fp->f_ops->fo_read)(fp, uio, flags, ctx));
5223 }
5224
5225
5226 /*
5227  * fo_write
5228  *
5229  * Description: Generic fileops write indirected through the fileops pointer
5230  *              in the fileproc structure
5231  *
5232  * Parameters:  fp                              fileproc structure pointer
5233  *              uio                             user I/O structure pointer
5234  *              flags                           FOF_ flags
5235  *              ctx                             VFS context for operation
5236  *
5237  * Returns:     0                               Success
5238  *              !0                              Errno from write
5239  */
5240 int
5241 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5242 {
5243         return((*fp->f_ops->fo_write)(fp, uio, flags, ctx));
5244 }
5245
5246
5247 /*
5248  * fo_ioctl
5249  *
5250  * Description: Generic fileops ioctl indirected through the fileops pointer
5251  *              in the fileproc structure
5252  *
5253  * Parameters:  fp                              fileproc structure pointer
5254  *              com                             ioctl command
5255  *              data                            pointer to internalized copy
5256  *                                              of user space ioctl command
5257  *                                              parameter data in kernel space
5258  *              ctx                             VFS context for operation
5259  *
5260  * Returns:     0                               Success
5261  *              !0                              Errno from ioctl
5262  *
5263  * Locks:       The caller is assumed to have held the proc_fdlock; this
5264  *              function releases and reacquires this lock.  If the caller
5265  *              accesses data protected by this lock prior to calling this
5266  *              function, it will need to revalidate/reacquire any cached
5267  *              protected data obtained prior to the call.
5268  */
5269 int
5270 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5271 {
5272         int error;
5273
5274         proc_fdunlock(vfs_context_proc(ctx));
5275         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5276         proc_fdlock(vfs_context_proc(ctx));
5277         return(error);
5278 }
5279
5280
5281 /*
5282  * fo_select
5283  *
5284  * Description: Generic fileops select indirected through the fileops pointer
5285  *              in the fileproc structure
5286  *
5287  * Parameters:  fp                              fileproc structure pointer
5288  *              which                           select which
5289  *              wql                             pointer to wait queue list
5290  *              ctx                             VFS context for operation
5291  *
5292  * Returns:     0                               Success
5293  *              !0                              Errno from select
5294  */
5295 int
5296 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5297 {
5298         return((*fp->f_ops->fo_select)(fp, which, wql, ctx));
5299 }
5300
5301
5302 /*
5303  * fo_close
5304  *
5305  * Description: Generic fileops close indirected through the fileops pointer
5306  *              in the fileproc structure
5307  *
5308  * Parameters:  fp                              fileproc structure pointer for
5309  *                                              file to close
5310  *              ctx                             VFS context for operation
5311  *
5312  * Returns:     0                               Success
5313  *              !0                              Errno from close
5314  */
5315 int
5316 fo_close(struct fileglob *fg, vfs_context_t ctx)
5317 {
5318         return((*fg->fg_ops->fo_close)(fg, ctx));
5319 }
5320
5321
5322 /*
5323  * fo_kqfilter
5324  *
5325  * Description: Generic fileops kqueue filter indirected through the fileops
5326  *              pointer in the fileproc structure
5327  *
5328  * Parameters:  fp                              fileproc structure pointer
5329  *              kn                              pointer to knote to filter on
5330  *              ctx                             VFS context for operation
5331  *
5332  * Returns:     0                               Success
5333  *              !0                              Errno from kqueue filter
5334  */
5335 int
5336 fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
5337 {
5338         return ((*fp->f_ops->fo_kqfilter)(fp, kn, ctx));
5339 }
5340
5341 /*
5342  * The ability to send a file descriptor to another
5343  * process is opt-in by file type.
5344  */
5345 boolean_t
5346 filetype_issendable(file_type_t fdtype)
5347 {
5348         switch (fdtype) {
5349                 case DTYPE_VNODE:
5350                 case DTYPE_SOCKET:
5351                 case DTYPE_PIPE:
5352                 case DTYPE_PSXSHM:
5353                         return TRUE;
5354                 default:
5355                         /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
5356                         return FALSE;
5357         }
5358 }