bsd/kern/kern_descrip.c

   1 /*
   2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/file_internal.h>
  83 #include <sys/guarded.h>
  84 #include <sys/socket.h>
  85 #include <sys/socketvar.h>
  86 #include <sys/stat.h>
  87 #include <sys/ioctl.h>
  88 #include <sys/fcntl.h>
  89 #include <sys/malloc.h>
  90 #include <sys/mman.h>
  91 #include <sys/syslog.h>
  92 #include <sys/unistd.h>
  93 #include <sys/resourcevar.h>
  94 #include <sys/aio_kern.h>
  95 #include <sys/ev.h>
  96 #include <kern/locks.h>
  97 #include <sys/uio_internal.h>
  98 #include <sys/codesign.h>
  99 #include <sys/codedir_internal.h>
 100
 101 #include <security/audit/audit.h>
 102
 103 #include <sys/mount_internal.h>
 104 #include <sys/kdebug.h>
 105 #include <sys/sysproto.h>
 106 #include <sys/pipe.h>
 107 #include <sys/spawn.h>
 108 #include <kern/kern_types.h>
 109 #include <kern/kalloc.h>
 110 #include <libkern/OSAtomic.h>
 111
 112 #include <sys/ubc_internal.h>
 113
 114 #include <kern/ipc_misc.h>
 115 #include <vm/vm_protos.h>
 116
 117 #include <mach/mach_port.h>
 118 #include <stdbool.h>
 119
 120 #if CONFIG_PROTECT
 121 #include <sys/cprotect.h>
 122 #endif
 123 #include <hfs/hfs.h>
 124
 125 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
 126     mach_msg_type_name_t, ipc_port_t *);
 127 void ipc_port_release_send(ipc_port_t);
 128
 129 struct psemnode;
 130 struct pshmnode;
 131
 132 static int finishdup(proc_t p,
 133     struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 134
 135 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
 136 void fg_drop(struct fileproc * fp);
 137 void fg_free(struct fileglob *fg);
 138 void fg_ref(struct fileproc * fp);
 139 void fileport_releasefg(struct fileglob *fg);
 140
 141 /* flags for close_internal_locked */
 142 #define FD_DUP2RESV 1
 143
 144 /* We don't want these exported */
 145
 146 __private_extern__
 147 int unlink1(vfs_context_t, struct nameidata *, int);
 148
 149 static void _fdrelse(struct proc * p, int fd);
 150
 151
 152 extern void file_lock_init(void);
 153
 154 extern kauth_scope_t    kauth_scope_fileop;
 155
 156 /* Conflict wait queue for when selects collide (opaque type) */
 157 extern struct wait_queue select_conflict_queue;
 158
 159 #define f_flag f_fglob->fg_flag
 160 #define f_type f_fglob->fg_ops->fo_type
 161 #define f_msgcount f_fglob->fg_msgcount
 162 #define f_cred f_fglob->fg_cred
 163 #define f_ops f_fglob->fg_ops
 164 #define f_offset f_fglob->fg_offset
 165 #define f_data f_fglob->fg_data
 166 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
 167                 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
 168                 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
 169                 ? 1 : 0)
 170 /*
 171  * Descriptor management.
 172  */
 173 struct fmsglist fmsghead;       /* head of list of open files */
 174 struct fmsglist fmsg_ithead;    /* head of list of open files */
 175 int nfiles;                     /* actual number of open files */
 176
 177
 178 lck_grp_attr_t * file_lck_grp_attr;
 179 lck_grp_t * file_lck_grp;
 180 lck_attr_t * file_lck_attr;
 181
 182 lck_mtx_t * uipc_lock;
 183
 184
 185 /*
 186  * check_file_seek_range
 187  *
 188  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
 189  *
 190  * Parameters:  fl              Flock structure.
 191  *              cur_file_offset Current offset in the file.
 192  *
 193  * Returns:     0               on Success.
 194  *              EOVERFLOW       on overflow.
 195  *              EINVAL          on offset less than zero.
 196  */
 197
 198 static int
 199 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
 200 {
 201         if (fl->l_whence == SEEK_CUR) {
 202                 /* Check if the start marker is beyond LLONG_MAX. */
 203                 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
 204                         /* Check if start marker is negative */
 205                         if (fl->l_start < 0) {
 206                                 return EINVAL;
 207                         }
 208                         return EOVERFLOW;
 209                 }
 210                 /* Check if the start marker is negative. */
 211                 if (fl->l_start + cur_file_offset < 0) {
 212                         return EINVAL;
 213                 }
 214                 /* Check if end marker is beyond LLONG_MAX. */
 215                 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
 216                         cur_file_offset, fl->l_len - 1))) {
 217                         return EOVERFLOW;
 218                 }
 219                 /* Check if the end marker is negative. */
 220                 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
 221                         fl->l_len < 0)) {
 222                         return EINVAL;
 223                 }
 224         } else if (fl->l_whence == SEEK_SET) {
 225                 /* Check if the start marker is negative. */
 226                 if (fl->l_start < 0) {
 227                         return EINVAL;
 228                 }
 229                 /* Check if the end marker is beyond LLONG_MAX. */
 230                 if ((fl->l_len > 0) &&
 231                     CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
 232                         return EOVERFLOW;
 233                 }
 234                 /* Check if the end marker is negative. */
 235                 if ((fl->l_len < 0) &&  fl->l_start + fl->l_len < 0) {
 236                         return EINVAL;
 237                 }
 238         }
 239         return 0;
 240 }
 241
 242
 243 /*
 244  * file_lock_init
 245  *
 246  * Description: Initialize the file lock group and the uipc and flist locks
 247  *
 248  * Parameters:  (void)
 249  *
 250  * Returns:     void
 251  *
 252  * Notes:       Called at system startup from bsd_init().
 253  */
 254 void
 255 file_lock_init(void)
 256 {
 257         /* allocate file lock group attribute and group */
 258         file_lck_grp_attr= lck_grp_attr_alloc_init();
 259
 260         file_lck_grp = lck_grp_alloc_init("file",  file_lck_grp_attr);
 261
 262         /* Allocate file lock attribute */
 263         file_lck_attr = lck_attr_alloc_init();
 264
 265         uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
 266 }
 267
 268
 269 /*
 270  * proc_fdlock, proc_fdlock_spin
 271  *
 272  * Description: Lock to control access to the per process struct fileproc
 273  *              and struct filedesc
 274  *
 275  * Parameters:  p                               Process to take the lock on
 276  *
 277  * Returns:     void
 278  *
 279  * Notes:       The lock is initialized in forkproc() and destroyed in
 280  *              reap_child_process().
 281  */
 282 void
 283 proc_fdlock(proc_t p)
 284 {
 285         lck_mtx_lock(&p->p_fdmlock);
 286 }
 287
 288 void
 289 proc_fdlock_spin(proc_t p)
 290 {
 291         lck_mtx_lock_spin(&p->p_fdmlock);
 292 }
 293
 294 void
 295 proc_fdlock_assert(proc_t p, int assertflags)
 296 {
 297         lck_mtx_assert(&p->p_fdmlock, assertflags);
 298 }
 299
 300
 301 /*
 302  * proc_fdunlock
 303  *
 304  * Description: Unlock the lock previously locked by a call to proc_fdlock()
 305  *
 306  * Parameters:  p                               Process to drop the lock on
 307  *
 308  * Returns:     void
 309  */
 310 void
 311 proc_fdunlock(proc_t p)
 312 {
 313         lck_mtx_unlock(&p->p_fdmlock);
 314 }
 315
 316
 317 /*
 318  * System calls on descriptors.
 319  */
 320
 321
 322 /*
 323  * getdtablesize
 324  *
 325  * Description: Returns the per process maximum size of the descriptor table
 326  *
 327  * Parameters:  p                               Process being queried
 328  *              retval                          Pointer to the call return area
 329  *
 330  * Returns:     0                               Success
 331  *
 332  * Implicit returns:
 333  *              *retval (modified)              Size of dtable
 334  */
 335 int
 336 getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 337 {
 338         proc_fdlock_spin(p);
 339         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 340         proc_fdunlock(p);
 341
 342         return (0);
 343 }
 344
 345
 346 void
 347 procfdtbl_reservefd(struct proc * p, int fd)
 348 {
 349         p->p_fd->fd_ofiles[fd] = NULL;
 350         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
 351 }
 352
 353 void
 354 procfdtbl_markclosefd(struct proc * p, int fd)
 355 {
 356         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
 357 }
 358
 359 void
 360 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
 361 {
 362         if (fp != NULL)
 363                 p->p_fd->fd_ofiles[fd] = fp;
 364         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
 365         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
 366                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
 367                 wakeup(&p->p_fd);
 368         }
 369 }
 370
 371 void
 372 procfdtbl_waitfd(struct proc * p, int fd)
 373 {
 374         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
 375         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
 376 }
 377
 378
 379 void
 380 procfdtbl_clearfd(struct proc * p, int fd)
 381 {
 382         int waiting;
 383
 384         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
 385         p->p_fd->fd_ofiles[fd] = NULL;
 386         p->p_fd->fd_ofileflags[fd] = 0;
 387         if ( waiting == UF_RESVWAIT) {
 388                 wakeup(&p->p_fd);
 389         }
 390 }
 391
 392 /*
 393  * _fdrelse
 394  *
 395  * Description: Inline utility function to free an fd in a filedesc
 396  *
 397  * Parameters:  fdp                             Pointer to filedesc fd lies in
 398  *              fd                              fd to free
 399  *              reserv                          fd should be reserved
 400  *
 401  * Returns:     void
 402  *
 403  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 404  *              the caller
 405  */
 406 static void
 407 _fdrelse(struct proc * p, int fd)
 408 {
 409         struct filedesc *fdp = p->p_fd;
 410         int nfd = 0;
 411
 412         if (fd < fdp->fd_freefile)
 413                 fdp->fd_freefile = fd;
 414 #if DIAGNOSTIC
 415         if (fd > fdp->fd_lastfile)
 416                 panic("fdrelse: fd_lastfile inconsistent");
 417 #endif
 418         procfdtbl_clearfd(p, fd);
 419
 420         while ((nfd = fdp->fd_lastfile) > 0 &&
 421                         fdp->fd_ofiles[nfd] == NULL &&
 422                         !(fdp->fd_ofileflags[nfd] & UF_RESERVED))
 423                 fdp->fd_lastfile--;
 424 }
 425
 426
 427 int
 428 fd_rdwr(
 429         int fd,
 430         enum uio_rw rw,
 431         uint64_t base,
 432         int64_t len,
 433         enum uio_seg segflg,
 434         off_t   offset,
 435         int     io_flg,
 436         int64_t *aresid)
 437 {
 438         struct fileproc *fp;
 439         proc_t  p;
 440         int error = 0;
 441         int flags = 0;
 442         int spacetype;
 443         uio_t auio = NULL;
 444         char uio_buf[ UIO_SIZEOF(1) ];
 445         struct vfs_context context = *(vfs_context_current());
 446         bool wrote_some = false;
 447
 448         p = current_proc();
 449
 450         error = fp_lookup(p, fd, &fp, 0);
 451         if (error)
 452                 return(error);
 453
 454         if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
 455                 error = EINVAL;
 456                 goto out;
 457         }
 458         if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
 459                 error = EBADF;
 460                 goto out;
 461         }
 462
 463         if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
 464                 error = EBADF;
 465                 goto out;
 466         }
 467
 468         context.vc_ucred = fp->f_fglob->fg_cred;
 469
 470         if (UIO_SEG_IS_USER_SPACE(segflg))
 471                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 472         else
 473                 spacetype = UIO_SYSSPACE;
 474
 475         auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
 476
 477         uio_addiov(auio, base, len);
 478
 479         if ( !(io_flg & IO_APPEND))
 480                 flags = FOF_OFFSET;
 481
 482         if (rw == UIO_WRITE) {
 483                 user_ssize_t orig_resid = uio_resid(auio);
 484                 error = fo_write(fp, auio, flags, &context);
 485                 wrote_some = uio_resid(auio) < orig_resid;
 486         } else
 487                 error = fo_read(fp, auio, flags, &context);
 488
 489         if (aresid)
 490                 *aresid = uio_resid(auio);
 491         else {
 492                 if (uio_resid(auio) && error == 0)
 493                         error = EIO;
 494         }
 495 out:
 496         if (wrote_some)
 497                 fp_drop_written(p, fd, fp);
 498         else
 499                 fp_drop(p, fd, fp, 0);
 500
 501         return error;
 502 }
 503
 504
 505
 506 /*
 507  * dup
 508  *
 509  * Description: Duplicate a file descriptor.
 510  *
 511  * Parameters:  p                               Process performing the dup
 512  *              uap->fd                         The fd to dup
 513  *              retval                          Pointer to the call return area
 514  *
 515  * Returns:     0                               Success
 516  *              !0                              Errno
 517  *
 518  * Implicit returns:
 519  *              *retval (modified)              The new descriptor
 520  */
 521 int
 522 dup(proc_t p, struct dup_args *uap, int32_t *retval)
 523 {
 524         struct filedesc *fdp = p->p_fd;
 525         int old = uap->fd;
 526         int new, error;
 527         struct fileproc *fp;
 528
 529         proc_fdlock(p);
 530         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
 531                 proc_fdunlock(p);
 532                 return(error);
 533         }
 534         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 535                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 536                 (void) fp_drop(p, old, fp, 1);
 537                 proc_fdunlock(p);
 538                 return (error);
 539         }
 540         if ( (error = fdalloc(p, 0, &new)) ) {
 541                 fp_drop(p, old, fp, 1);
 542                 proc_fdunlock(p);
 543                 return (error);
 544         }
 545         error = finishdup(p, fdp, old, new, 0, retval);
 546         fp_drop(p, old, fp, 1);
 547         proc_fdunlock(p);
 548
 549         return (error);
 550 }
 551
 552 /*
 553  * dup2
 554  *
 555  * Description: Duplicate a file descriptor to a particular value.
 556  *
 557  * Parameters:  p                               Process performing the dup
 558  *              uap->from                       The fd to dup
 559  *              uap->to                         The fd to dup it to
 560  *              retval                          Pointer to the call return area
 561  *
 562  * Returns:     0                               Success
 563  *              !0                              Errno
 564  *
 565  * Implicit returns:
 566  *              *retval (modified)              The new descriptor
 567  */
 568 int
 569 dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 570 {
 571         struct filedesc *fdp = p->p_fd;
 572         int old = uap->from, new = uap->to;
 573         int i, error;
 574         struct fileproc *fp, *nfp;
 575
 576         proc_fdlock(p);
 577
 578 startover:
 579         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
 580                 proc_fdunlock(p);
 581                 return(error);
 582         }
 583         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 584                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 585                 (void) fp_drop(p, old, fp, 1);
 586                 proc_fdunlock(p);
 587                 return (error);
 588         }
 589         if (new < 0 ||
 590                 (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 591             new >= maxfiles) {
 592                 fp_drop(p, old, fp, 1);
 593                 proc_fdunlock(p);
 594                 return (EBADF);
 595         }
 596         if (old == new) {
 597                 fp_drop(p, old, fp, 1);
 598                 *retval = new;
 599                 proc_fdunlock(p);
 600                 return (0);
 601         }
 602         if (new < 0 || new >= fdp->fd_nfiles) {
 603                 if ( (error = fdalloc(p, new, &i)) ) {
 604                         fp_drop(p, old, fp, 1);
 605                         proc_fdunlock(p);
 606                         return (error);
 607                 }
 608                 if (new != i) {
 609                         fdrelse(p, i);
 610                         goto closeit;
 611                 }
 612         } else {
 613 closeit:
 614                 while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED)  {
 615                                 fp_drop(p, old, fp, 1);
 616                                 procfdtbl_waitfd(p, new);
 617 #if DIAGNOSTIC
 618                                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 619 #endif
 620                                 goto startover;
 621                 }
 622
 623                 if ((fdp->fd_ofiles[new] != NULL) &&
 624                     ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
 625                         fp_drop(p, old, fp, 1);
 626                         if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
 627                                 error = fp_guard_exception(p,
 628                                     new, nfp, kGUARD_EXC_CLOSE);
 629                                 (void) fp_drop(p, new, nfp, 1);
 630                                 proc_fdunlock(p);
 631                                 return (error);
 632                         }
 633                         (void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
 634 #if DIAGNOSTIC
 635                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 636 #endif
 637                         procfdtbl_clearfd(p, new);
 638                         goto startover;
 639                 } else  {
 640 #if DIAGNOSTIC
 641                         if (fdp->fd_ofiles[new] != NULL)
 642                                 panic("dup2: no ref on fileproc %d", new);
 643 #endif
 644                         procfdtbl_reservefd(p, new);
 645                 }
 646
 647 #if DIAGNOSTIC
 648                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 649 #endif
 650
 651         }
 652 #if DIAGNOSTIC
 653         if (fdp->fd_ofiles[new] != 0)
 654                 panic("dup2: overwriting fd_ofiles with new %d", new);
 655         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
 656                 panic("dup2: unreserved fileflags with new %d", new);
 657 #endif
 658         error = finishdup(p, fdp, old, new, 0, retval);
 659         fp_drop(p, old, fp, 1);
 660         proc_fdunlock(p);
 661
 662         return(error);
 663 }
 664
 665
 666 /*
 667  * fcntl
 668  *
 669  * Description: The file control system call.
 670  *
 671  * Parameters:  p                               Process performing the fcntl
 672  *              uap->fd                         The fd to operate against
 673  *              uap->cmd                        The command to perform
 674  *              uap->arg                        Pointer to the command argument
 675  *              retval                          Pointer to the call return area
 676  *
 677  * Returns:     0                               Success
 678  *              !0                              Errno (see fcntl_nocancel)
 679  *
 680  * Implicit returns:
 681  *              *retval (modified)              fcntl return value (if any)
 682  *
 683  * Notes:       This system call differs from fcntl_nocancel() in that it
 684  *              tests for cancellation prior to performing a potentially
 685  *              blocking operation.
 686  */
 687 int
 688 fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 689 {
 690         __pthread_testcancel(1);
 691         return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval));
 692 }
 693
 694
 695 /*
 696  * fcntl_nocancel
 697  *
 698  * Description: A non-cancel-testing file control system call.
 699  *
 700  * Parameters:  p                               Process performing the fcntl
 701  *              uap->fd                         The fd to operate against
 702  *              uap->cmd                        The command to perform
 703  *              uap->arg                        Pointer to the command argument
 704  *              retval                          Pointer to the call return area
 705  *
 706  * Returns:     0                               Success
 707  *              EINVAL
 708  *      fp_lookup:EBADF                         Bad file descriptor
 709  * [F_DUPFD]
 710  *      fdalloc:EMFILE
 711  *      fdalloc:ENOMEM
 712  *      finishdup:EBADF
 713  *      finishdup:ENOMEM
 714  * [F_SETOWN]
 715  *              ESRCH
 716  * [F_SETLK]
 717  *              EBADF
 718  *              EOVERFLOW
 719  *      copyin:EFAULT
 720  *      vnode_getwithref:???
 721  *      VNOP_ADVLOCK:???
 722  *      msleep:ETIMEDOUT
 723  * [F_GETLK]
 724  *              EBADF
 725  *              EOVERFLOW
 726  *      copyin:EFAULT
 727  *      copyout:EFAULT
 728  *      vnode_getwithref:???
 729  *      VNOP_ADVLOCK:???
 730  * [F_PREALLOCATE]
 731  *              EBADF
 732  *              EINVAL
 733  *      copyin:EFAULT
 734  *      copyout:EFAULT
 735  *      vnode_getwithref:???
 736  *      VNOP_ALLOCATE:???
 737  * [F_SETSIZE,F_RDADVISE]
 738  *              EBADF
 739  *      copyin:EFAULT
 740  *      vnode_getwithref:???
 741  * [F_RDAHEAD,F_NOCACHE]
 742  *              EBADF
 743  *      vnode_getwithref:???
 744  * [???]
 745  *
 746  * Implicit returns:
 747  *              *retval (modified)              fcntl return value (if any)
 748  */
 749 int
 750 fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 751 {
 752         int fd = uap->fd;
 753         struct filedesc *fdp = p->p_fd;
 754         struct fileproc *fp;
 755         char *pop;
 756         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 757         int i, tmp, error, error2, flg = F_POSIX;
 758         struct flock fl;
 759         struct flocktimeout fltimeout;
 760         struct timespec *timeout = NULL;
 761         struct vfs_context context;
 762         off_t offset;
 763         int newmin;
 764         daddr64_t lbn, bn;
 765         unsigned int fflag;
 766         user_addr_t argp;
 767         boolean_t is64bit;
 768
 769         AUDIT_ARG(fd, uap->fd);
 770         AUDIT_ARG(cmd, uap->cmd);
 771
 772         proc_fdlock(p);
 773         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
 774                 proc_fdunlock(p);
 775                 return(error);
 776         }
 777         context.vc_thread = current_thread();
 778         context.vc_ucred = fp->f_cred;
 779
 780         is64bit = proc_is64bit(p);
 781         if (is64bit) {
 782                 argp = uap->arg;
 783         }
 784         else {
 785                 /*
 786                  * Since the arg parameter is defined as a long but may be
 787                  * either a long or a pointer we must take care to handle
 788                  * sign extension issues.  Our sys call munger will sign
 789                  * extend a long when we are called from a 32-bit process.
 790                  * Since we can never have an address greater than 32-bits
 791                  * from a 32-bit process we lop off the top 32-bits to avoid
 792                  * getting the wrong address
 793                  */
 794                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
 795         }
 796
 797         pop = &fdp->fd_ofileflags[fd];
 798
 799 #if CONFIG_MACF
 800         error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
 801             uap->arg);
 802         if (error)
 803                 goto out;
 804 #endif
 805
 806         switch (uap->cmd) {
 807
 808         case F_DUPFD:
 809         case F_DUPFD_CLOEXEC:
 810                 if (FP_ISGUARDED(fp, GUARD_DUP)) {
 811                         error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
 812                         goto out;
 813                 }
 814                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 815                 AUDIT_ARG(value32, newmin);
 816                 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 817                     newmin >= maxfiles) {
 818                         error = EINVAL;
 819                         goto out;
 820                 }
 821                 if ( (error = fdalloc(p, newmin, &i)) )
 822                         goto out;
 823                 error = finishdup(p, fdp, fd, i,
 824                     uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
 825                 goto out;
 826
 827         case F_GETFD:
 828                 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
 829                 error = 0;
 830                 goto out;
 831
 832         case F_SETFD:
 833                 AUDIT_ARG(value32, uap->arg);
 834                 if (uap->arg & FD_CLOEXEC)
 835                         *pop |= UF_EXCLOSE;
 836                 else {
 837                         if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
 838                                 error = fp_guard_exception(p,
 839                                     fd, fp, kGUARD_EXC_NOCLOEXEC);
 840                                 goto out;
 841                         }
 842                         *pop &= ~UF_EXCLOSE;
 843                 }
 844                 error = 0;
 845                 goto out;
 846
 847         case F_GETFL:
 848                 *retval = OFLAGS(fp->f_flag);
 849                 error = 0;
 850                 goto out;
 851
 852         case F_SETFL:
 853                 fp->f_flag &= ~FCNTLFLAGS;
 854                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 855                 AUDIT_ARG(value32, tmp);
 856                 fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
 857                 tmp = fp->f_flag & FNONBLOCK;
 858                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 859                 if (error)
 860                         goto out;
 861                 tmp = fp->f_flag & FASYNC;
 862                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 863                 if (!error)
 864                         goto out;
 865                 fp->f_flag &= ~FNONBLOCK;
 866                 tmp = 0;
 867                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 868                 goto out;
 869
 870         case F_GETOWN:
 871                 if (fp->f_type == DTYPE_SOCKET) {
 872                         *retval = ((struct socket *)fp->f_data)->so_pgid;
 873                         error = 0;
 874                         goto out;
 875                 }
 876                 error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, &context);
 877                 *retval = -*retval;
 878                 goto out;
 879
 880         case F_SETOWN:
 881                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
 882                 AUDIT_ARG(value32, tmp);
 883                 if (fp->f_type == DTYPE_SOCKET) {
 884                         ((struct socket *)fp->f_data)->so_pgid = tmp;
 885                         error =0;
 886                         goto out;
 887                 }
 888                 if (fp->f_type == DTYPE_PIPE) {
 889                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 890                         goto out;
 891                 }
 892
 893                 if (tmp <= 0) {
 894                         tmp = -tmp;
 895                 } else {
 896                         proc_t p1 = proc_find(tmp);
 897                         if (p1 == 0) {
 898                                 error = ESRCH;
 899                                 goto out;
 900                         }
 901                         tmp = (int)p1->p_pgrpid;
 902                         proc_rele(p1);
 903                 }
 904                 error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 905                 goto out;
 906
 907         case F_SETNOSIGPIPE:
 908                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
 909                 if (fp->f_type == DTYPE_SOCKET) {
 910 #if SOCKETS
 911                         error = sock_setsockopt((struct socket *)fp->f_data,
 912                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp));
 913 #else
 914                         error = EINVAL;
 915 #endif
 916                 } else {
 917                         struct fileglob *fg = fp->f_fglob;
 918
 919                         lck_mtx_lock_spin(&fg->fg_lock);
 920                         if (tmp)
 921                                 fg->fg_lflags |= FG_NOSIGPIPE;
 922                         else
 923                                 fg->fg_lflags &= FG_NOSIGPIPE;
 924                         lck_mtx_unlock(&fg->fg_lock);
 925                         error = 0;
 926                 }
 927                 goto out;
 928
 929         case F_GETNOSIGPIPE:
 930                 if (fp->f_type == DTYPE_SOCKET) {
 931 #if SOCKETS
 932                         int retsize = sizeof (*retval);
 933                         error = sock_getsockopt((struct socket *)fp->f_data,
 934                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
 935 #else
 936                         error = EINVAL;
 937 #endif
 938                 } else {
 939                         *retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
 940                                 1 : 0;
 941                         error = 0;
 942                 }
 943                 goto out;
 944
 945         case F_SETLKWTIMEOUT:
 946         case F_SETLKW:
 947                 flg |= F_WAIT;
 948                 /* Fall into F_SETLK */
 949
 950         case F_SETLK:
 951                 if (fp->f_type != DTYPE_VNODE) {
 952                         error = EBADF;
 953                         goto out;
 954                 }
 955                 vp = (struct vnode *)fp->f_data;
 956
 957                 fflag = fp->f_flag;
 958                 offset = fp->f_offset;
 959                 proc_fdunlock(p);
 960
 961                 /* Copy in the lock structure */
 962                 if (uap->cmd == F_SETLKWTIMEOUT) {
 963                         error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
 964                         if (error) {
 965                                 goto outdrop;
 966                         }
 967                         fl = fltimeout.fl;
 968                         timeout = &fltimeout.timeout;
 969                 } else {
 970                         error = copyin(argp, (caddr_t)&fl, sizeof(fl));
 971                         if (error) {
 972                                 goto outdrop;
 973                         }
 974                 }
 975
 976                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
 977                 /* and ending byte for EOVERFLOW in SEEK_SET */
 978                 error = check_file_seek_range(&fl, offset);
 979                 if (error) {
 980                         goto outdrop;
 981                 }
 982
 983                 if ( (error = vnode_getwithref(vp)) ) {
 984                         goto outdrop;
 985                 }
 986                 if (fl.l_whence == SEEK_CUR)
 987                         fl.l_start += offset;
 988
 989 #if CONFIG_MACF
 990                 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
 991                     F_SETLK, &fl);
 992                 if (error) {
 993                         (void)vnode_put(vp);
 994                         goto outdrop;
 995                 }
 996 #endif
 997                 switch (fl.l_type) {
 998
 999                 case F_RDLCK:
1000                         if ((fflag & FREAD) == 0) {
1001                                 (void)vnode_put(vp);
1002                                 error = EBADF;
1003                                 goto outdrop;
1004                         }
1005                         // XXX UInt32 unsafe for LP64 kernel
1006                         OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1007                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout);
1008                         (void)vnode_put(vp);
1009                         goto outdrop;
1010
1011                 case F_WRLCK:
1012                         if ((fflag & FWRITE) == 0) {
1013                                 (void)vnode_put(vp);
1014                                 error = EBADF;
1015                                 goto outdrop;
1016                         }
1017                         // XXX UInt32 unsafe for LP64 kernel
1018                         OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1019                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout);
1020                         (void)vnode_put(vp);
1021                         goto outdrop;
1022
1023                 case F_UNLCK:
1024                         error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
1025                                 F_POSIX, &context, timeout);
1026                         (void)vnode_put(vp);
1027                         goto outdrop;
1028
1029                 default:
1030                         (void)vnode_put(vp);
1031                         error = EINVAL;
1032                         goto outdrop;
1033                 }
1034
1035         case F_GETLK:
1036                 if (fp->f_type != DTYPE_VNODE) {
1037                         error = EBADF;
1038                         goto out;
1039                 }
1040                 vp = (struct vnode *)fp->f_data;
1041
1042                 offset = fp->f_offset;
1043                 proc_fdunlock(p);
1044
1045                 /* Copy in the lock structure */
1046                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1047                 if (error)
1048                         goto outdrop;
1049
1050                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1051                 /* and ending byte for EOVERFLOW in SEEK_SET */
1052                 error = check_file_seek_range(&fl, offset);
1053                 if (error) {
1054                         goto outdrop;
1055                 }
1056
1057                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1058                         error = EINVAL;
1059                         goto outdrop;
1060                 }
1061
1062                 switch (fl.l_type) {
1063                 case F_RDLCK:
1064                 case F_UNLCK:
1065                 case F_WRLCK:
1066                         break;
1067                 default:
1068                         error = EINVAL;
1069                         goto outdrop;
1070                 }
1071
1072                 switch (fl.l_whence) {
1073                 case SEEK_CUR:
1074                 case SEEK_SET:
1075                 case SEEK_END:
1076                         break;
1077                 default:
1078                         error = EINVAL;
1079                         goto outdrop;
1080                 }
1081
1082                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1083                         if (fl.l_whence == SEEK_CUR)
1084                                 fl.l_start += offset;
1085
1086 #if CONFIG_MACF
1087                         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1088                             uap->cmd, &fl);
1089                         if (error == 0)
1090 #endif
1091                         error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context, NULL);
1092
1093                         (void)vnode_put(vp);
1094
1095                         if (error == 0)
1096                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1097                 }
1098                 goto outdrop;
1099
1100         case F_PREALLOCATE: {
1101                 fstore_t alloc_struct;    /* structure for allocate command */
1102                 u_int32_t alloc_flags = 0;
1103
1104                 if (fp->f_type != DTYPE_VNODE) {
1105                         error = EBADF;
1106                         goto out;
1107                 }
1108
1109                 vp = (struct vnode *)fp->f_data;
1110                 proc_fdunlock(p);
1111
1112                 /* make sure that we have write permission */
1113                 if ((fp->f_flag & FWRITE) == 0) {
1114                         error = EBADF;
1115                         goto outdrop;
1116                 }
1117
1118                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1119                 if (error)
1120                         goto outdrop;
1121
1122                 /* now set the space allocated to 0 */
1123                 alloc_struct.fst_bytesalloc = 0;
1124
1125                 /*
1126                  * Do some simple parameter checking
1127                  */
1128
1129                 /* set up the flags */
1130
1131                 alloc_flags |= PREALLOCATE;
1132
1133                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
1134                         alloc_flags |= ALLOCATECONTIG;
1135
1136                 if (alloc_struct.fst_flags & F_ALLOCATEALL)
1137                         alloc_flags |= ALLOCATEALL;
1138
1139                 /*
1140                  * Do any position mode specific stuff.  The only
1141                  * position mode  supported now is PEOFPOSMODE
1142                  */
1143
1144                 switch (alloc_struct.fst_posmode) {
1145
1146                 case F_PEOFPOSMODE:
1147                         if (alloc_struct.fst_offset != 0) {
1148                                 error = EINVAL;
1149                                 goto outdrop;
1150                         }
1151
1152                         alloc_flags |= ALLOCATEFROMPEOF;
1153                         break;
1154
1155                 case F_VOLPOSMODE:
1156                         if (alloc_struct.fst_offset <= 0) {
1157                                 error = EINVAL;
1158                                 goto outdrop;
1159                         }
1160
1161                         alloc_flags |= ALLOCATEFROMVOL;
1162                         break;
1163
1164                 default: {
1165                         error = EINVAL;
1166                         goto outdrop;
1167                         }
1168                 }
1169                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1170                         /*
1171                          * call allocate to get the space
1172                          */
1173                         error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
1174                                               &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1175                                               &context);
1176                         (void)vnode_put(vp);
1177
1178                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1179
1180                         if (error == 0)
1181                                 error = error2;
1182                 }
1183                 goto outdrop;
1184
1185                 }
1186         case F_SETSIZE:
1187                 if (fp->f_type != DTYPE_VNODE) {
1188                         error = EBADF;
1189                         goto out;
1190                 }
1191                 vp = (struct vnode *)fp->f_data;
1192                 proc_fdunlock(p);
1193
1194                 error = copyin(argp, (caddr_t)&offset, sizeof (off_t));
1195                 if (error)
1196                         goto outdrop;
1197                 AUDIT_ARG(value64, offset);
1198
1199                 error = vnode_getwithref(vp);
1200                 if (error)
1201                         goto outdrop;
1202
1203 #if CONFIG_MACF
1204                 error = mac_vnode_check_truncate(&context,
1205                     fp->f_fglob->fg_cred, vp);
1206                 if (error) {
1207                         (void)vnode_put(vp);
1208                         goto outdrop;
1209                 }
1210 #endif
1211                 /*
1212                  * Make sure that we are root.  Growing a file
1213                  * without zero filling the data is a security hole
1214                  * root would have access anyway so we'll allow it
1215                  */
1216                 if (!kauth_cred_issuser(kauth_cred_get())) {
1217                         error = EACCES;
1218                 } else {
1219                         /*
1220                          * set the file size
1221                          */
1222                         error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1223                             &context);
1224                 }
1225
1226                 (void)vnode_put(vp);
1227                 goto outdrop;
1228
1229         case F_RDAHEAD:
1230                 if (fp->f_type != DTYPE_VNODE) {
1231                         error = EBADF;
1232                         goto out;
1233                 }
1234                 if (uap->arg)
1235                         fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1236                 else
1237                         fp->f_fglob->fg_flag |= FNORDAHEAD;
1238
1239                 goto out;
1240
1241         case F_NOCACHE:
1242                 if (fp->f_type != DTYPE_VNODE) {
1243                         error = EBADF;
1244                         goto out;
1245                 }
1246                 if (uap->arg)
1247                         fp->f_fglob->fg_flag |= FNOCACHE;
1248                 else
1249                         fp->f_fglob->fg_flag &= ~FNOCACHE;
1250
1251                 goto out;
1252
1253         case F_NODIRECT:
1254                 if (fp->f_type != DTYPE_VNODE) {
1255                         error = EBADF;
1256                         goto out;
1257                 }
1258                 if (uap->arg)
1259                         fp->f_fglob->fg_flag |= FNODIRECT;
1260                 else
1261                         fp->f_fglob->fg_flag &= ~FNODIRECT;
1262
1263                 goto out;
1264
1265         case F_SINGLE_WRITER:
1266                 if (fp->f_type != DTYPE_VNODE) {
1267                         error = EBADF;
1268                         goto out;
1269                 }
1270                 if (uap->arg)
1271                         fp->f_fglob->fg_flag |= FSINGLE_WRITER;
1272                 else
1273                         fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1274
1275                 goto out;
1276
1277         case F_GLOBAL_NOCACHE:
1278                 if (fp->f_type != DTYPE_VNODE) {
1279                         error = EBADF;
1280                         goto out;
1281                 }
1282                 vp = (struct vnode *)fp->f_data;
1283                 proc_fdunlock(p);
1284
1285                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1286
1287                         *retval = vnode_isnocache(vp);
1288
1289                         if (uap->arg)
1290                                 vnode_setnocache(vp);
1291                         else
1292                                 vnode_clearnocache(vp);
1293
1294                         (void)vnode_put(vp);
1295                 }
1296                 goto outdrop;
1297
1298         case F_CHECK_OPENEVT:
1299                 if (fp->f_type != DTYPE_VNODE) {
1300                         error = EBADF;
1301                         goto out;
1302                 }
1303                 vp = (struct vnode *)fp->f_data;
1304                 proc_fdunlock(p);
1305
1306                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1307
1308                         *retval = vnode_is_openevt(vp);
1309
1310                         if (uap->arg)
1311                                 vnode_set_openevt(vp);
1312                         else
1313                                 vnode_clear_openevt(vp);
1314
1315                         (void)vnode_put(vp);
1316                 }
1317                 goto outdrop;
1318
1319         case F_RDADVISE: {
1320                 struct radvisory ra_struct;
1321
1322                 if (fp->f_type != DTYPE_VNODE) {
1323                         error = EBADF;
1324                         goto out;
1325                 }
1326                 vp = (struct vnode *)fp->f_data;
1327                 proc_fdunlock(p);
1328
1329                 if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct))) )
1330                         goto outdrop;
1331                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1332                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1333
1334                         (void)vnode_put(vp);
1335                 }
1336                 goto outdrop;
1337                 }
1338
1339         case F_FLUSH_DATA:
1340
1341                 if (fp->f_type != DTYPE_VNODE) {
1342                         error = EBADF;
1343                         goto out;
1344                 }
1345                 vp = (struct vnode *)fp->f_data;
1346                 proc_fdunlock(p);
1347
1348                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1349                         error = cluster_push(vp, 0);
1350
1351                         (void)vnode_put(vp);
1352                 }
1353                 goto outdrop;
1354
1355         case F_LOG2PHYS:
1356         case F_LOG2PHYS_EXT: {
1357                 struct log2phys l2p_struct;    /* structure for allocate command */
1358                 int devBlockSize;
1359
1360                 off_t file_offset = 0;
1361                 size_t a_size = 0;
1362                 size_t run = 0;
1363
1364                 if (uap->cmd == F_LOG2PHYS_EXT) {
1365                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1366                         if (error)
1367                                 goto out;
1368                         file_offset = l2p_struct.l2p_devoffset;
1369                 } else {
1370                         file_offset = fp->f_offset;
1371                 }
1372                 if (fp->f_type != DTYPE_VNODE) {
1373                         error = EBADF;
1374                         goto out;
1375                 }
1376                 vp = (struct vnode *)fp->f_data;
1377                 proc_fdunlock(p);
1378                 if ( (error = vnode_getwithref(vp)) ) {
1379                         goto outdrop;
1380                 }
1381                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1382                 if (error) {
1383                         (void)vnode_put(vp);
1384                         goto outdrop;
1385                 }
1386                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1387                 if (error) {
1388                         (void)vnode_put(vp);
1389                         goto outdrop;
1390                 }
1391                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1392                 if (uap->cmd == F_LOG2PHYS_EXT) {
1393 #if defined(__LP64__)
1394                         a_size = l2p_struct.l2p_contigbytes;
1395 #else
1396                         if ((l2p_struct.l2p_contigbytes > SIZE_MAX) || (l2p_struct.l2p_contigbytes < 0)) {
1397                                 /* size_t is 32-bit on a 32-bit kernel, therefore
1398                                  * assigning l2p_contigbytes to a_size may have
1399                                  * caused integer overflow.  We, therefore, return
1400                                  * an error here instead of calculating incorrect
1401                                  * value.
1402                                  */
1403                                 printf ("fcntl: F_LOG2PHYS_EXT: l2p_contigbytes=%lld will overflow, returning error\n", l2p_struct.l2p_contigbytes);
1404                                 error = EFBIG;
1405                                 goto outdrop;
1406                         } else {
1407                                 a_size = l2p_struct.l2p_contigbytes;
1408                         }
1409 #endif
1410                 } else {
1411                         a_size = devBlockSize;
1412                 }
1413
1414                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1415
1416                 (void)vnode_put(vp);
1417
1418                 if (!error) {
1419                         l2p_struct.l2p_flags = 0;       /* for now */
1420                         if (uap->cmd == F_LOG2PHYS_EXT) {
1421                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1422                         } else {
1423                                 l2p_struct.l2p_contigbytes = 0; /* for now */
1424                         }
1425
1426                         /*
1427                          * The block number being -1 suggests that the file offset is not backed
1428                          * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
1429                          */
1430                         if (bn == -1) {
1431                                 /* Don't multiply it by the block size */
1432                                 l2p_struct.l2p_devoffset = bn;
1433                         }
1434                         else {
1435                                 l2p_struct.l2p_devoffset = bn * devBlockSize;
1436                                 l2p_struct.l2p_devoffset += file_offset - offset;
1437                         }
1438                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1439                 }
1440                 goto outdrop;
1441                 }
1442         case F_GETPATH: {
1443                 char *pathbufp;
1444                 int pathlen;
1445
1446                 if (fp->f_type != DTYPE_VNODE) {
1447                         error = EBADF;
1448                         goto out;
1449                 }
1450                 vp = (struct vnode *)fp->f_data;
1451                 proc_fdunlock(p);
1452
1453                 pathlen = MAXPATHLEN;
1454                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1455                 if (pathbufp == NULL) {
1456                         error = ENOMEM;
1457                         goto outdrop;
1458                 }
1459                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1460                         error = vn_getpath(vp, pathbufp, &pathlen);
1461                         (void)vnode_put(vp);
1462
1463                         if (error == 0)
1464                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
1465                 }
1466                 FREE(pathbufp, M_TEMP);
1467                 goto outdrop;
1468         }
1469
1470         case F_PATHPKG_CHECK: {
1471                 char *pathbufp;
1472                 size_t pathlen;
1473
1474                 if (fp->f_type != DTYPE_VNODE) {
1475                         error = EBADF;
1476                         goto out;
1477                 }
1478                 vp = (struct vnode *)fp->f_data;
1479                 proc_fdunlock(p);
1480
1481                 pathlen = MAXPATHLEN;
1482                 pathbufp = kalloc(MAXPATHLEN);
1483
1484                 if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) {
1485                         if ( (error = vnode_getwithref(vp)) == 0 ) {
1486                                 AUDIT_ARG(text, pathbufp);
1487                                 error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1488
1489                                 (void)vnode_put(vp);
1490                         }
1491                 }
1492                 kfree(pathbufp, MAXPATHLEN);
1493                 goto outdrop;
1494         }
1495
1496         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1497         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZECACHE
1498         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1499         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1500                 if (fp->f_type != DTYPE_VNODE) {
1501                         error = EBADF;
1502                         goto out;
1503                 }
1504                 vp = (struct vnode *)fp->f_data;
1505                 proc_fdunlock(p);
1506
1507                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1508                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1509
1510                         (void)vnode_put(vp);
1511                 }
1512                 break;
1513         }
1514
1515         /*
1516          * SPI (private) for opening a file starting from a dir fd
1517          */
1518         case F_OPENFROM: {
1519                 struct user_fopenfrom fopen;
1520                 struct vnode_attr va;
1521                 struct nameidata nd;
1522                 int cmode;
1523
1524                 /* Check if this isn't a valid file descriptor */
1525                 if ((fp->f_type != DTYPE_VNODE) ||
1526                     (fp->f_flag & FREAD) == 0) {
1527                         error = EBADF;
1528                         goto out;
1529                 }
1530                 vp = (struct vnode *)fp->f_data;
1531                 proc_fdunlock(p);
1532
1533                 if (vnode_getwithref(vp)) {
1534                         error = ENOENT;
1535                         goto outdrop;
1536                 }
1537
1538                 /* Only valid for directories */
1539                 if (vp->v_type != VDIR) {
1540                         vnode_put(vp);
1541                         error = ENOTDIR;
1542                         goto outdrop;
1543                 }
1544
1545                 /* Get flags, mode and pathname arguments. */
1546                 if (IS_64BIT_PROCESS(p)) {
1547                         error = copyin(argp, &fopen, sizeof(fopen));
1548                 } else {
1549                         struct user32_fopenfrom fopen32;
1550
1551                         error = copyin(argp, &fopen32, sizeof(fopen32));
1552                         fopen.o_flags = fopen32.o_flags;
1553                         fopen.o_mode = fopen32.o_mode;
1554                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1555                 }
1556                 if (error) {
1557                         vnode_put(vp);
1558                         goto outdrop;
1559                 }
1560                 AUDIT_ARG(fflags, fopen.o_flags);
1561                 AUDIT_ARG(mode, fopen.o_mode);
1562                 VATTR_INIT(&va);
1563                 /* Mask off all but regular access permissions */
1564                 cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1565                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1566
1567                 /* Start the lookup relative to the file descriptor's vnode. */
1568                 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
1569                        fopen.o_pathname, &context);
1570                 nd.ni_dvp = vp;
1571
1572                 error = open1(&context, &nd, fopen.o_flags, &va,
1573                               fileproc_alloc_init, NULL, retval);
1574
1575                 vnode_put(vp);
1576                 break;
1577         }
1578         /*
1579          * SPI (private) for unlinking a file starting from a dir fd
1580          */
1581         case F_UNLINKFROM: {
1582                 struct nameidata nd;
1583                 user_addr_t pathname;
1584
1585                 /* Check if this isn't a valid file descriptor */
1586                 if ((fp->f_type != DTYPE_VNODE) ||
1587                     (fp->f_flag & FREAD) == 0) {
1588                         error = EBADF;
1589                         goto out;
1590                 }
1591                 vp = (struct vnode *)fp->f_data;
1592                 proc_fdunlock(p);
1593
1594                 if (vnode_getwithref(vp)) {
1595                         error = ENOENT;
1596                         goto outdrop;
1597                 }
1598
1599                 /* Only valid for directories */
1600                 if (vp->v_type != VDIR) {
1601                         vnode_put(vp);
1602                         error = ENOTDIR;
1603                         goto outdrop;
1604                 }
1605
1606                 /* Get flags, mode and pathname arguments. */
1607                 if (IS_64BIT_PROCESS(p)) {
1608                         pathname = (user_addr_t)argp;
1609                 } else {
1610                         pathname = CAST_USER_ADDR_T(argp);
1611                 }
1612
1613                 /* Start the lookup relative to the file descriptor's vnode. */
1614                 NDINIT(&nd, DELETE, OP_UNLINK, USEDVP | AUDITVNPATH1, UIO_USERSPACE,
1615                        pathname, &context);
1616                 nd.ni_dvp = vp;
1617
1618                 error = unlink1(&context, &nd, 0);
1619
1620                 vnode_put(vp);
1621                 break;
1622
1623         }
1624
1625         case F_ADDSIGS:
1626         case F_ADDFILESIGS:
1627         {
1628                 struct user_fsignatures fs;
1629                 kern_return_t kr;
1630                 vm_offset_t kernel_blob_addr;
1631                 vm_size_t kernel_blob_size;
1632
1633                 if (fp->f_type != DTYPE_VNODE) {
1634                         error = EBADF;
1635                         goto out;
1636                 }
1637                 vp = (struct vnode *)fp->f_data;
1638                 proc_fdunlock(p);
1639                 error = vnode_getwithref(vp);
1640                 if (error)
1641                         goto outdrop;
1642
1643                 if (IS_64BIT_PROCESS(p)) {
1644                         error = copyin(argp, &fs, sizeof (fs));
1645                 } else {
1646                         struct user32_fsignatures fs32;
1647
1648                         error = copyin(argp, &fs32, sizeof (fs32));
1649                         fs.fs_file_start = fs32.fs_file_start;
1650                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1651                         fs.fs_blob_size = fs32.fs_blob_size;
1652                 }
1653
1654                 if (error) {
1655                         vnode_put(vp);
1656                         goto outdrop;
1657                 }
1658
1659                 if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start))
1660                 {
1661                         vnode_put(vp);
1662                         goto outdrop;
1663                 }
1664 /*
1665  * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
1666  * our use cases for the immediate future, but note that at the time of this commit, some
1667  * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
1668  *
1669  * We should consider how we can manage this more effectively; the above means that some
1670  * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
1671  * threshold considered ridiculous at the time of this change.
1672  */
1673 #define CS_MAX_BLOB_SIZE (10ULL * 1024ULL * 1024ULL)
1674                 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1675                         error = E2BIG;
1676                         vnode_put(vp);
1677                         goto outdrop;
1678                 }
1679
1680                 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1681                 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1682                 if (kr != KERN_SUCCESS) {
1683                         error = ENOMEM;
1684                         vnode_put(vp);
1685                         goto outdrop;
1686                 }
1687
1688                 if(uap->cmd == F_ADDSIGS) {
1689                         error = copyin(fs.fs_blob_start,
1690                                        (void *) kernel_blob_addr,
1691                                        kernel_blob_size);
1692                 } else /* F_ADDFILESIGS */ {
1693                         error = vn_rdwr(UIO_READ,
1694                                         vp,
1695                                         (caddr_t) kernel_blob_addr,
1696                                         kernel_blob_size,
1697                                          fs.fs_file_start + fs.fs_blob_start,
1698                                         UIO_SYSSPACE,
1699                                         0,
1700                                         kauth_cred_get(),
1701                                         0,
1702                                         p);
1703                 }
1704
1705                 if (error) {
1706                         ubc_cs_blob_deallocate(kernel_blob_addr,
1707                                                kernel_blob_size);
1708                         vnode_put(vp);
1709                         goto outdrop;
1710                 }
1711
1712                 error = ubc_cs_blob_add(
1713                         vp,
1714                         CPU_TYPE_ANY,   /* not for a specific architecture */
1715                         fs.fs_file_start,
1716                         kernel_blob_addr,
1717                         kernel_blob_size);
1718                 if (error) {
1719                         ubc_cs_blob_deallocate(kernel_blob_addr,
1720                                                kernel_blob_size);
1721                 } else {
1722                         /* ubc_blob_add() has consumed "kernel_blob_addr" */
1723 #if CHECK_CS_VALIDATION_BITMAP
1724                         ubc_cs_validation_bitmap_allocate( vp );
1725 #endif
1726                 }
1727
1728                 (void) vnode_put(vp);
1729                 break;
1730         }
1731         case F_FINDSIGS: {
1732 #ifdef SECURE_KERNEL
1733                 error = ENOTSUP;
1734 #else /* !SECURE_KERNEL */
1735                 off_t offsetMacho;
1736
1737                 if (fp->f_type != DTYPE_VNODE) {
1738                         error = EBADF;
1739                         goto out;
1740                 }
1741                 vp = (struct vnode *)fp->f_data;
1742                 proc_fdunlock(p);
1743                 error = vnode_getwithref(vp);
1744                 if (error)
1745                         goto outdrop;
1746
1747                 error = copyin(argp, &offsetMacho, sizeof(offsetMacho));
1748                 if (error) {
1749                         (void)vnode_put(vp);
1750                         goto outdrop;
1751                 }
1752
1753 #if CONFIG_MACF
1754                 error = mac_vnode_find_sigs(p, vp, offsetMacho);
1755 #else
1756                 error = EPERM;
1757 #endif
1758                 if (error) {
1759                         (void)vnode_put(vp);
1760                         goto outdrop;
1761                 }
1762 #endif /* SECURE_KERNEL */
1763                 break;
1764         }
1765 #if CONFIG_PROTECT
1766         case F_GETPROTECTIONCLASS: {
1767                 int class = 0;
1768
1769                 if (fp->f_type != DTYPE_VNODE) {
1770                         error = EBADF;
1771                         goto out;
1772                 }
1773                 vp = (struct vnode *)fp->f_data;
1774
1775                 proc_fdunlock(p);
1776
1777                 if (vnode_getwithref(vp)) {
1778                         error = ENOENT;
1779                         goto outdrop;
1780                 }
1781
1782                 error = cp_vnode_getclass (vp, &class);
1783                 if (error == 0) {
1784                         *retval = class;
1785                 }
1786
1787                 vnode_put(vp);
1788                 break;
1789         }
1790
1791         case F_SETPROTECTIONCLASS: {
1792                 /* tmp must be a valid PROTECTION_CLASS_* */
1793                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
1794
1795                 if (fp->f_type != DTYPE_VNODE) {
1796                         error = EBADF;
1797                         goto out;
1798                 }
1799                 vp = (struct vnode *)fp->f_data;
1800
1801                 proc_fdunlock(p);
1802
1803                 if (vnode_getwithref(vp)) {
1804                         error = ENOENT;
1805                         goto outdrop;
1806                 }
1807
1808                 /* Only go forward if you have write access */
1809                 vfs_context_t ctx = vfs_context_current();
1810                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1811                         vnode_put(vp);
1812                         error = EBADF;
1813                         goto outdrop;
1814                 }
1815                 error = cp_vnode_setclass (vp, tmp);
1816                 vnode_put(vp);
1817                 break;
1818         }
1819
1820         case F_TRANSCODEKEY: {
1821
1822                 char *backup_keyp = NULL;
1823                 unsigned backup_key_len = CP_MAX_WRAPPEDKEYSIZE;
1824
1825                 if (fp->f_type != DTYPE_VNODE) {
1826                         error = EBADF;
1827                         goto out;
1828                 }
1829
1830                 vp = (struct vnode *)fp->f_data;
1831                 proc_fdunlock(p);
1832
1833                 if (vnode_getwithref(vp)) {
1834                         error = ENOENT;
1835                         goto outdrop;
1836                 }
1837
1838                 MALLOC(backup_keyp, char *, backup_key_len, M_TEMP, M_WAITOK);
1839                 if (backup_keyp == NULL) {
1840                         error = ENOMEM;
1841                         goto outdrop;
1842                 }
1843
1844                 error = cp_vnode_transcode (vp, backup_keyp, &backup_key_len);
1845                 vnode_put(vp);
1846
1847                 if (error == 0) {
1848                         error = copyout((caddr_t)backup_keyp, argp, backup_key_len);
1849                         *retval = backup_key_len;
1850                 }
1851
1852                 FREE(backup_keyp, M_TEMP);
1853
1854                 break;
1855         }
1856
1857         case F_GETPROTECTIONLEVEL:  {
1858                 uint32_t cp_version = 0;
1859
1860                 if (fp->f_type != DTYPE_VNODE) {
1861                         error = EBADF;
1862                         goto out;
1863                 }
1864
1865                 vp = (struct vnode*) fp->f_data;
1866                 proc_fdunlock (p);
1867
1868                 if (vnode_getwithref(vp)) {
1869                         error = ENOENT;
1870                         goto outdrop;
1871                 }
1872
1873                 /*
1874                  * if cp_get_major_vers fails, error will be set to proper errno
1875                  * and cp_version will still be 0.
1876                  */
1877
1878                 error = cp_get_root_major_vers (vp, &cp_version);
1879                 *retval = cp_version;
1880
1881                 vnode_put (vp);
1882                 break;
1883         }
1884
1885         case F_GETDEFAULTPROTLEVEL:  {
1886                 uint32_t cp_default = 0;
1887
1888                 if (fp->f_type != DTYPE_VNODE) {
1889                         error = EBADF;
1890                         goto out;
1891                 }
1892
1893                 vp = (struct vnode*) fp->f_data;
1894                 proc_fdunlock (p);
1895
1896                 if (vnode_getwithref(vp)) {
1897                         error = ENOENT;
1898                         goto outdrop;
1899                 }
1900
1901                 /*
1902                  * if cp_get_major_vers fails, error will be set to proper errno
1903                  * and cp_version will still be 0.
1904                  */
1905
1906                 error = cp_get_default_level(vp, &cp_default);
1907                 *retval = cp_default;
1908
1909                 vnode_put (vp);
1910                 break;
1911         }
1912
1913
1914 #endif /* CONFIG_PROTECT */
1915
1916         case F_MOVEDATAEXTENTS: {
1917                 struct fileproc *fp2 = NULL;
1918                 struct vnode *src_vp = NULLVP;
1919                 struct vnode *dst_vp = NULLVP;
1920                 /* We need to grab the 2nd FD out of the argments before moving on. */
1921                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
1922
1923                 if (fp->f_type != DTYPE_VNODE) {
1924                         error = EBADF;
1925                         goto out;
1926                 }
1927
1928                 /* For now, special case HFS+ only, since this is SPI. */
1929                 src_vp = (struct vnode *)fp->f_data;
1930                 if (src_vp->v_tag != VT_HFS) {
1931                         error = EINVAL;
1932                         goto out;
1933                 }
1934
1935                 /*
1936                  * Get the references before we start acquiring iocounts on the vnodes,
1937                  * while we still hold the proc fd lock
1938                  */
1939                 if ( (error = fp_lookup(p, fd2, &fp2, 1)) ) {
1940                         error = EBADF;
1941                         goto out;
1942                 }
1943                 if (fp2->f_type != DTYPE_VNODE) {
1944                         fp_drop(p, fd2, fp2, 1);
1945                         error = EBADF;
1946                         goto out;
1947                 }
1948                 dst_vp = (struct vnode *)fp2->f_data;
1949                 if (dst_vp->v_tag != VT_HFS) {
1950                         fp_drop(p, fd2, fp2, 1);
1951                         error = EINVAL;
1952                         goto out;
1953                 }
1954
1955 #if CONFIG_MACF
1956                 /* Re-do MAC checks against the new FD, pass in a fake argument */
1957                 error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
1958                 if (error) {
1959                         fp_drop(p, fd2, fp2, 1);
1960                         goto out;
1961                 }
1962 #endif
1963                 /* Audit the 2nd FD */
1964                 AUDIT_ARG(fd, fd2);
1965
1966                 proc_fdunlock(p);
1967
1968                 if (vnode_getwithref(src_vp)) {
1969                         fp_drop(p, fd2, fp2, 0);
1970                         error = ENOENT;
1971                         goto outdrop;
1972                 }
1973                 if (vnode_getwithref(dst_vp)) {
1974                         vnode_put (src_vp);
1975                         fp_drop(p, fd2, fp2, 0);
1976                         error = ENOENT;
1977                         goto outdrop;
1978                 }
1979
1980                 /*
1981                  * Basic asserts; validate they are not the same and that
1982                  * both live on the same filesystem.
1983                  */
1984                 if (dst_vp == src_vp) {
1985                         vnode_put (src_vp);
1986                         vnode_put (dst_vp);
1987                         fp_drop (p, fd2, fp2, 0);
1988                         error = EINVAL;
1989                         goto outdrop;
1990                 }
1991
1992                 if (dst_vp->v_mount != src_vp->v_mount) {
1993                         vnode_put (src_vp);
1994                         vnode_put (dst_vp);
1995                         fp_drop (p, fd2, fp2, 0);
1996                         error = EXDEV;
1997                         goto outdrop;
1998                 }
1999
2000                 /* Now we have a legit pair of FDs.  Go to work */
2001
2002                 /* Now check for write access to the target files */
2003                 if(vnode_authorize(src_vp, NULLVP,
2004                                                    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2005                         vnode_put(src_vp);
2006                         vnode_put(dst_vp);
2007                         fp_drop(p, fd2, fp2, 0);
2008                         error = EBADF;
2009                         goto outdrop;
2010                 }
2011
2012                 if(vnode_authorize(dst_vp, NULLVP,
2013                                                    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2014                         vnode_put(src_vp);
2015                         vnode_put(dst_vp);
2016                         fp_drop(p, fd2, fp2, 0);
2017                         error = EBADF;
2018                         goto outdrop;
2019                 }
2020
2021                 /* Verify that both vps point to files and not directories */
2022                 if ( !vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2023                         error = EINVAL;
2024                         vnode_put (src_vp);
2025                         vnode_put (dst_vp);
2026                         fp_drop (p, fd2, fp2, 0);
2027                         goto outdrop;
2028                 }
2029
2030                 /*
2031                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2032                  * We'll pass in our special bit indicating that the new behavior is expected
2033                  */
2034
2035                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2036
2037                 vnode_put (src_vp);
2038                 vnode_put (dst_vp);
2039                 fp_drop(p, fd2, fp2, 0);
2040                 break;
2041         }
2042
2043         /*
2044          * SPI for making a file compressed.
2045          */
2046         case F_MAKECOMPRESSED: {
2047                 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2048
2049                 if (fp->f_type != DTYPE_VNODE) {
2050                         error = EBADF;
2051                         goto out;
2052                 }
2053
2054                 vp = (struct vnode*) fp->f_data;
2055                 proc_fdunlock (p);
2056
2057                 /* get the vnode */
2058                 if (vnode_getwithref(vp)) {
2059                         error = ENOENT;
2060                         goto outdrop;
2061                 }
2062
2063                 /* Is it a file? */
2064                 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2065                         vnode_put(vp);
2066                         error = EBADF;
2067                         goto outdrop;
2068                 }
2069
2070                 /* invoke ioctl to pass off to FS */
2071                 /* Only go forward if you have write access */
2072                 vfs_context_t ctx = vfs_context_current();
2073                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2074                         vnode_put(vp);
2075                         error = EBADF;
2076                         goto outdrop;
2077                 }
2078
2079                 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2080
2081                 vnode_put (vp);
2082                 break;
2083         }
2084
2085         /*
2086          * SPI (private) for indicating to a filesystem that subsequent writes to
2087          * the open FD will written to the Fastflow.
2088          */
2089         case F_SET_GREEDY_MODE:
2090                 /* intentionally drop through to the same handler as F_SETSTATIC.
2091                  * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2092                  */
2093
2094         /*
2095          * SPI (private) for indicating to a filesystem that subsequent writes to
2096          * the open FD will represent static content.
2097          */
2098         case F_SETSTATICCONTENT: {
2099                 caddr_t ioctl_arg = NULL;
2100
2101                 if (uap->arg) {
2102                         ioctl_arg = (caddr_t) 1;
2103                 }
2104
2105                 if (fp->f_type != DTYPE_VNODE) {
2106                         error = EBADF;
2107                         goto out;
2108                 }
2109                 vp = (struct vnode *)fp->f_data;
2110                 proc_fdunlock(p);
2111
2112                 error = vnode_getwithref(vp);
2113                 if (error) {
2114                         error = ENOENT;
2115                         goto outdrop;
2116                 }
2117
2118                 /* Only go forward if you have write access */
2119                 vfs_context_t ctx = vfs_context_current();
2120                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2121                         vnode_put(vp);
2122                         error = EBADF;
2123                         goto outdrop;
2124                 }
2125
2126                 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2127                 (void)vnode_put(vp);
2128
2129                 break;
2130         }
2131
2132         /*
2133          * SPI (private) for indicating to the lower level storage driver that the
2134          * subsequent writes should be of a particular IO type (burst, greedy, static),
2135          * or other flavors that may be necessary.
2136          */
2137         case F_SETIOTYPE: {
2138                 caddr_t param_ptr;
2139                 uint32_t param;
2140
2141                 if (uap->arg) {
2142                         /* extract 32 bits of flags from userland */
2143                         param_ptr = (caddr_t) uap->arg;
2144                         param = (uint32_t) param_ptr;
2145                 }
2146                 else {
2147                         /* If no argument is specified, error out */
2148                         error = EINVAL;
2149                         goto out;
2150                 }
2151
2152                 /*
2153                  * Validate the different types of flags that can be specified:
2154                  * all of them are mutually exclusive for now.
2155                  */
2156                 switch (param) {
2157                         case F_IOTYPE_ISOCHRONOUS:
2158                                 break;
2159
2160                         default:
2161                                 error = EINVAL;
2162                                 goto out;
2163                 }
2164
2165
2166                 if (fp->f_type != DTYPE_VNODE) {
2167                         error = EBADF;
2168                         goto out;
2169                 }
2170                 vp = (struct vnode *)fp->f_data;
2171                 proc_fdunlock(p);
2172
2173                 error = vnode_getwithref(vp);
2174                 if (error) {
2175                         error = ENOENT;
2176                         goto outdrop;
2177                 }
2178
2179                 /* Only go forward if you have write access */
2180                 vfs_context_t ctx = vfs_context_current();
2181                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2182                         vnode_put(vp);
2183                         error = EBADF;
2184                         goto outdrop;
2185                 }
2186
2187                 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2188                 (void)vnode_put(vp);
2189
2190                 break;
2191         }
2192
2193
2194         /*
2195          * Extract the CodeDirectory of the vnode associated with
2196          * the file descriptor and copy it back to user space
2197          */
2198         case F_GETCODEDIR: {
2199                 struct user_fcodeblobs args;
2200
2201                 if (fp->f_type != DTYPE_VNODE) {
2202                         error = EBADF;
2203                         goto out;
2204                 }
2205
2206                 vp = (struct vnode *)fp->f_data;
2207                 proc_fdunlock(p);
2208
2209                 if ((fp->f_flag & FREAD) == 0) {
2210                         error = EBADF;
2211                         goto outdrop;
2212                 }
2213
2214                 if (IS_64BIT_PROCESS(p)) {
2215                         struct user64_fcodeblobs args64;
2216
2217                         error = copyin(argp, &args64, sizeof(args64));
2218                         if (error)
2219                                 goto outdrop;
2220
2221                         args.f_cd_hash = args64.f_cd_hash;
2222                         args.f_hash_size = args64.f_hash_size;
2223                         args.f_cd_buffer = args64.f_cd_buffer;
2224                         args.f_cd_size = args64.f_cd_size;
2225                         args.f_out_size = args64.f_out_size;
2226                         args.f_arch = args64.f_arch;
2227                 } else {
2228                         struct user32_fcodeblobs args32;
2229
2230                         error = copyin(argp, &args32, sizeof(args32));
2231                         if (error)
2232                                 goto outdrop;
2233
2234                         args.f_cd_hash = CAST_USER_ADDR_T(args32.f_cd_hash);
2235                         args.f_hash_size = args32.f_hash_size;
2236                         args.f_cd_buffer = CAST_USER_ADDR_T(args32.f_cd_buffer);
2237                         args.f_cd_size = args32.f_cd_size;
2238                         args.f_out_size = CAST_USER_ADDR_T(args32.f_out_size);
2239                         args.f_arch = args32.f_arch;
2240                 }
2241
2242                 if (vp->v_ubcinfo == NULL) {
2243                         error = EINVAL;
2244                         goto outdrop;
2245                 }
2246
2247                 struct cs_blob *t_blob = vp->v_ubcinfo->cs_blobs;
2248
2249                 /*
2250                  * This call fails if there is no cs_blob corresponding to the
2251                  * vnode, or if there are multiple cs_blobs present, and the caller
2252                  * did not specify which cpu_type they want the cs_blob for
2253                  */
2254                 if (t_blob == NULL) {
2255                         error = ENOENT; /* there is no codesigning blob for this process */
2256                         goto outdrop;
2257                 } else if (args.f_arch == 0 && t_blob->csb_next != NULL) {
2258                         error = ENOENT; /* too many architectures and none specified */
2259                         goto outdrop;
2260                 }
2261
2262                 /* If the user specified an architecture, find the right blob */
2263                 if (args.f_arch != 0) {
2264                         while (t_blob) {
2265                                 if (t_blob->csb_cpu_type == args.f_arch)
2266                                         break;
2267                                 t_blob = t_blob->csb_next;
2268                         }
2269                         /* The cpu_type the user requested could not be found */
2270                         if (t_blob == NULL) {
2271                                 error = ENOENT;
2272                                 goto outdrop;
2273                         }
2274                 }
2275
2276                 const CS_SuperBlob *super_blob = (void *)t_blob->csb_mem_kaddr;
2277                 const CS_CodeDirectory *cd = findCodeDirectory(super_blob,
2278                                                          (char *) super_blob,
2279                                                          (char *) super_blob + t_blob->csb_mem_size);
2280                 if (cd == NULL) {
2281                         error = ENOENT;
2282                         goto outdrop;
2283                 }
2284
2285                 uint64_t buffer_size = ntohl(cd->length);
2286
2287                 if (buffer_size > UINT_MAX) {
2288                         error = ERANGE;
2289                         goto outdrop;
2290                 }
2291
2292                 error = copyout(&buffer_size, args.f_out_size, sizeof(unsigned int));
2293                 if (error)
2294                         goto outdrop;
2295
2296                 if (sizeof(t_blob->csb_sha1) > args.f_hash_size ||
2297                                         buffer_size > args.f_cd_size) {
2298                         error = ERANGE;
2299                         goto outdrop;
2300                 }
2301
2302                 error = copyout(t_blob->csb_sha1, args.f_cd_hash, sizeof(t_blob->csb_sha1));
2303                 if (error)
2304                         goto outdrop;
2305                 error = copyout(cd, args.f_cd_buffer, buffer_size);
2306                 if (error)
2307                         goto outdrop;
2308
2309                 break;
2310         }
2311
2312         /*
2313          * Set the vnode pointed to by 'fd'
2314          * and tag it as the (potentially future) backing store
2315          * for another filesystem
2316          */
2317         case F_SETBACKINGSTORE: {
2318                 if (fp->f_type != DTYPE_VNODE) {
2319                         error = EBADF;
2320                         goto out;
2321                 }
2322
2323                 vp = (struct vnode *)fp->f_data;
2324
2325                 if (vp->v_tag != VT_HFS) {
2326                         error = EINVAL;
2327                         goto out;
2328                 }
2329                 proc_fdunlock(p);
2330
2331                 if (vnode_getwithref(vp)) {
2332                         error = ENOENT;
2333                         goto outdrop;
2334                 }
2335
2336                 /* only proceed if you have write access */
2337                 vfs_context_t ctx = vfs_context_current();
2338                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2339                         vnode_put(vp);
2340                         error = EBADF;
2341                         goto outdrop;
2342                 }
2343
2344
2345                 /* If arg != 0, set, otherwise unset */
2346                 if (uap->arg) {
2347                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)1, 0, &context);
2348                 }
2349                 else {
2350                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)NULL, 0, &context);
2351                 }
2352
2353                 vnode_put(vp);
2354                 break;
2355         }
2356
2357         /*
2358          * like F_GETPATH, but special semantics for
2359          * the mobile time machine handler.
2360          */
2361         case F_GETPATH_MTMINFO: {
2362                 char *pathbufp;
2363                 int pathlen;
2364
2365                 if (fp->f_type != DTYPE_VNODE) {
2366                         error = EBADF;
2367                         goto out;
2368                 }
2369                 vp = (struct vnode *)fp->f_data;
2370                 proc_fdunlock(p);
2371
2372                 pathlen = MAXPATHLEN;
2373                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
2374                 if (pathbufp == NULL) {
2375                         error = ENOMEM;
2376                         goto outdrop;
2377                 }
2378                 if ( (error = vnode_getwithref(vp)) == 0 ) {
2379                         int backingstore = 0;
2380
2381                         /* Check for error from vn_getpath before moving on */
2382                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
2383                                 if (vp->v_tag == VT_HFS) {
2384                                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
2385                                 }
2386                                 (void)vnode_put(vp);
2387
2388                                 if (error == 0) {
2389                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
2390                                 }
2391                                 if (error == 0) {
2392                                         /*
2393                                          * If the copyout was successful, now check to ensure
2394                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
2395                                          * wants the path regardless.
2396                                          */
2397                                         if (backingstore) {
2398                                                 error = EBUSY;
2399                                         }
2400                                 }
2401                         } else
2402                                 (void)vnode_put(vp);
2403                 }
2404                 FREE(pathbufp, M_TEMP);
2405                 goto outdrop;
2406         }
2407
2408         default:
2409                 /*
2410                  * This is an fcntl() that we d not recognize at this level;
2411                  * if this is a vnode, we send it down into the VNOP_IOCTL
2412                  * for this vnode; this can include special devices, and will
2413                  * effectively overload fcntl() to send ioctl()'s.
2414                  */
2415                 if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
2416             error = EINVAL;
2417                         goto out;
2418                 }
2419
2420                 /* Catch any now-invalid fcntl() selectors */
2421                 switch (uap->cmd) {
2422                         case F_MARKDEPENDENCY:
2423                                 error = EINVAL;
2424                                 goto out;
2425                         default:
2426                                 break;
2427                 }
2428
2429                 if (fp->f_type != DTYPE_VNODE) {
2430                         error = EBADF;
2431                         goto out;
2432                 }
2433                 vp = (struct vnode *)fp->f_data;
2434                 proc_fdunlock(p);
2435
2436                 if ( (error = vnode_getwithref(vp)) == 0 ) {
2437 #define STK_PARAMS 128
2438                         char stkbuf[STK_PARAMS];
2439                         unsigned int size;
2440                         caddr_t data, memp;
2441                         /*
2442                          * For this to work properly, we have to copy in the
2443                          * ioctl() cmd argument if there is one; we must also
2444                          * check that a command parameter, if present, does
2445                          * not exceed the maximum command length dictated by
2446                          * the number of bits we have available in the command
2447                          * to represent a structure length.  Finally, we have
2448                          * to copy the results back out, if it is that type of
2449                          * ioctl().
2450                          */
2451                         size = IOCPARM_LEN(uap->cmd);
2452                         if (size > IOCPARM_MAX) {
2453                                 (void)vnode_put(vp);
2454                                 error = EINVAL;
2455                                 break;
2456                         }
2457
2458                         memp = NULL;
2459                         if (size > sizeof (stkbuf)) {
2460                                 if ((memp = (caddr_t)kalloc(size)) == 0) {
2461                                         (void)vnode_put(vp);
2462                                         error = ENOMEM;
2463                                         goto outdrop;
2464                                 }
2465                                 data = memp;
2466                         } else {
2467                                 data = &stkbuf[0];
2468                         }
2469
2470                         if (uap->cmd & IOC_IN) {
2471                                 if (size) {
2472                                         /* structure */
2473                                         error = copyin(argp, data, size);
2474                                         if (error) {
2475                                                 (void)vnode_put(vp);
2476                                                 if (memp)
2477                                                         kfree(memp, size);
2478                                                 goto outdrop;
2479                                         }
2480
2481                                         /* Bzero the section beyond that which was needed */
2482                                         if (size <= sizeof(stkbuf)) {
2483                                                 bzero ( (((uint8_t*)data) + size), (sizeof(stkbuf) - size));
2484                                         }
2485                                 } else {
2486                                         /* int */
2487                                         if (is64bit) {
2488                                                 *(user_addr_t *)data = argp;
2489                                         } else {
2490                                                 *(uint32_t *)data = (uint32_t)argp;
2491                                         }
2492                                 };
2493                         } else if ((uap->cmd & IOC_OUT) && size) {
2494                                 /*
2495                                  * Zero the buffer so the user always
2496                                  * gets back something deterministic.
2497                                  */
2498                                 bzero(data, size);
2499                         } else if (uap->cmd & IOC_VOID) {
2500                                 if (is64bit) {
2501                                     *(user_addr_t *)data = argp;
2502                                 } else {
2503                                     *(uint32_t *)data = (uint32_t)argp;
2504                                 }
2505                         }
2506
2507                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
2508
2509                         (void)vnode_put(vp);
2510
2511                         /* Copy any output data to user */
2512                         if (error == 0 && (uap->cmd & IOC_OUT) && size)
2513                                 error = copyout(data, argp, size);
2514                         if (memp)
2515                                 kfree(memp, size);
2516                 }
2517                 break;
2518         }
2519
2520 outdrop:
2521         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2522         fp_drop(p, fd, fp, 0);
2523         return(error);
2524 out:
2525         fp_drop(p, fd, fp, 1);
2526         proc_fdunlock(p);
2527         return(error);
2528 }
2529
2530
2531 /*
2532  * finishdup
2533  *
2534  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
2535  *
2536  * Parameters:  p                               Process performing the dup
2537  *              old                             The fd to dup
2538  *              new                             The fd to dup it to
2539  *              fd_flags                        Flags to augment the new fd
2540  *              retval                          Pointer to the call return area
2541  *
2542  * Returns:     0                               Success
2543  *              EBADF
2544  *              ENOMEM
2545  *
2546  * Implicit returns:
2547  *              *retval (modified)              The new descriptor
2548  *
2549  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
2550  *              the caller
2551  *
2552  * Notes:       This function may drop and reacquire this lock; it is unsafe
2553  *              for a caller to assume that other state protected by the lock
2554  *              has not been subsequently changed out from under it.
2555  */
2556 int
2557 finishdup(proc_t p,
2558     struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
2559 {
2560         struct fileproc *nfp;
2561         struct fileproc *ofp;
2562 #if CONFIG_MACF
2563         int error;
2564 #endif
2565
2566 #if DIAGNOSTIC
2567         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2568 #endif
2569         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
2570             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
2571                 fdrelse(p, new);
2572                 return (EBADF);
2573         }
2574         fg_ref(ofp);
2575
2576 #if CONFIG_MACF
2577         error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2578         if (error) {
2579                 fg_drop(ofp);
2580                 fdrelse(p, new);
2581                 return (error);
2582         }
2583 #endif
2584
2585         proc_fdunlock(p);
2586
2587         nfp = fileproc_alloc_init(NULL);
2588
2589         proc_fdlock(p);
2590
2591         if (nfp == NULL) {
2592                 fg_drop(ofp);
2593                 fdrelse(p, new);
2594                 return (ENOMEM);
2595         }
2596
2597         nfp->f_fglob = ofp->f_fglob;
2598
2599 #if DIAGNOSTIC
2600         if (fdp->fd_ofiles[new] != 0)
2601                 panic("finishdup: overwriting fd_ofiles with new %d", new);
2602         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
2603                 panic("finishdup: unreserved fileflags with new %d", new);
2604 #endif
2605
2606         if (new > fdp->fd_lastfile)
2607                 fdp->fd_lastfile = new;
2608         *fdflags(p, new) |= fd_flags;
2609         procfdtbl_releasefd(p, new, nfp);
2610         *retval = new;
2611         return (0);
2612 }
2613
2614
2615 /*
2616  * close
2617  *
2618  * Description: The implementation of the close(2) system call
2619  *
2620  * Parameters:  p                       Process in whose per process file table
2621  *                                      the close is to occur
2622  *              uap->fd                 fd to be closed
2623  *              retval                  <unused>
2624  *
2625  * Returns:     0                       Success
2626  *      fp_lookup:EBADF                 Bad file descriptor
2627  *      fp_guard_exception:???          Guarded file descriptor
2628  *      close_internal:EBADF
2629  *      close_internal:???              Anything returnable by a per-fileops
2630  *                                      close function
2631  */
2632 int
2633 close(proc_t p, struct close_args *uap, int32_t *retval)
2634 {
2635         __pthread_testcancel(1);
2636         return(close_nocancel(p, (struct close_nocancel_args *)uap, retval));
2637 }
2638
2639
2640 int
2641 close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
2642 {
2643         struct fileproc *fp;
2644         int fd = uap->fd;
2645         int error;
2646
2647         AUDIT_SYSCLOSE(p, fd);
2648
2649         proc_fdlock(p);
2650
2651         if ( (error = fp_lookup(p,fd,&fp, 1)) ) {
2652                 proc_fdunlock(p);
2653                 return(error);
2654         }
2655
2656         if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
2657                 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
2658                 (void) fp_drop(p, fd, fp, 1);
2659                 proc_fdunlock(p);
2660                 return (error);
2661         }
2662
2663         error = close_internal_locked(p, fd, fp, 0);
2664
2665         proc_fdunlock(p);
2666
2667         return (error);
2668 }
2669
2670
2671 /*
2672  * close_internal_locked
2673  *
2674  * Close a file descriptor.
2675  *
2676  * Parameters:  p                       Process in whose per process file table
2677  *                                      the close is to occur
2678  *              fd                      fd to be closed
2679  *              fp                      fileproc associated with the fd
2680  *
2681  * Returns:     0                       Success
2682  *              EBADF                   fd already in close wait state
2683  *      closef_locked:???               Anything returnable by a per-fileops
2684  *                                      close function
2685  *
2686  * Locks:       Assumes proc_fdlock for process is held by the caller and returns
2687  *              with lock held
2688  *
2689  * Notes:       This function may drop and reacquire this lock; it is unsafe
2690  *              for a caller to assume that other state protected by the lock
2691  *              has not been subsequently changed out from under it.
2692  */
2693 int
2694 close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
2695 {
2696         struct filedesc *fdp = p->p_fd;
2697         int error =0;
2698         int resvfd = flags & FD_DUP2RESV;
2699
2700
2701 #if DIAGNOSTIC
2702         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2703 #endif
2704
2705         /* Keep people from using the filedesc while we are closing it */
2706         procfdtbl_markclosefd(p, fd);
2707
2708
2709         if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2710                 panic("close_internal_locked: being called on already closing fd");
2711         }
2712
2713
2714 #if DIAGNOSTIC
2715         if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2716                 panic("close_internal: unreserved fileflags with fd %d", fd);
2717 #endif
2718
2719         fp->f_flags |= FP_CLOSING;
2720
2721         if ( (fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners() ) {
2722
2723                 proc_fdunlock(p);
2724
2725                 if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) {
2726                         /*
2727                          * call out to allow 3rd party notification of close.
2728                          * Ignore result of kauth_authorize_fileop call.
2729                          */
2730                         if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
2731                                 u_int   fileop_flags = 0;
2732                                 if ((fp->f_flags & FP_WRITTEN) != 0)
2733                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
2734                                 kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2735                                                        (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2736                                 vnode_put((vnode_t)fp->f_data);
2737                         }
2738                 }
2739                 if (fp->f_flags & FP_AIOISSUED)
2740                         /*
2741                          * cancel all async IO requests that can be cancelled.
2742                          */
2743                         _aio_close( p, fd );
2744
2745                 proc_fdlock(p);
2746         }
2747
2748         if (fd < fdp->fd_knlistsize)
2749                 knote_fdclose(p, fd);
2750
2751         if (fp->f_flags & FP_WAITEVENT)
2752                 (void)waitevent_close(p, fp);
2753
2754         fileproc_drain(p, fp);
2755
2756         if (resvfd == 0) {
2757                 _fdrelse(p, fd);
2758         } else {
2759                 procfdtbl_reservefd(p, fd);
2760         }
2761
2762         error = closef_locked(fp, fp->f_fglob, p);
2763         if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
2764                 wakeup(&fp->f_flags);
2765         fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
2766
2767         proc_fdunlock(p);
2768
2769         fileproc_free(fp);
2770
2771         proc_fdlock(p);
2772
2773 #if DIAGNOSTIC
2774         if (resvfd != 0) {
2775                 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2776                         panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
2777         }
2778 #endif
2779
2780         return(error);
2781 }
2782
2783
2784 /*
2785  * fstat1
2786  *
2787  * Description: Return status information about a file descriptor.
2788  *
2789  * Parameters:  p                               The process doing the fstat
2790  *              fd                              The fd to stat
2791  *              ub                              The user stat buffer
2792  *              xsecurity                       The user extended security
2793  *                                              buffer, or 0 if none
2794  *              xsecurity_size                  The size of xsecurity, or 0
2795  *                                              if no xsecurity
2796  *              isstat64                        Flag to indicate 64 bit version
2797  *                                              for inode size, etc.
2798  *
2799  * Returns:     0                               Success
2800  *              EBADF
2801  *              EFAULT
2802  *      fp_lookup:EBADF                         Bad file descriptor
2803  *      vnode_getwithref:???
2804  *      copyout:EFAULT
2805  *      vnode_getwithref:???
2806  *      vn_stat:???
2807  *      soo_stat:???
2808  *      pipe_stat:???
2809  *      pshm_stat:???
2810  *      kqueue_stat:???
2811  *
2812  * Notes:       Internal implementation for all other fstat() related
2813  *              functions
2814  *
2815  *              XXX switch on node type is bogus; need a stat in struct
2816  *              XXX fileops instead.
2817  */
2818 static int
2819 fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
2820 {
2821         struct fileproc *fp;
2822         union {
2823                 struct stat sb;
2824                 struct stat64 sb64;
2825         } source;
2826         union {
2827                 struct user64_stat user64_sb;
2828                 struct user32_stat user32_sb;
2829                 struct user64_stat64 user64_sb64;
2830                 struct user32_stat64 user32_sb64;
2831         } dest;
2832         int error, my_size;
2833         file_type_t type;
2834         caddr_t data;
2835         kauth_filesec_t fsec;
2836         user_size_t xsecurity_bufsize;
2837         vfs_context_t ctx = vfs_context_current();
2838         void * sbptr;
2839
2840
2841         AUDIT_ARG(fd, fd);
2842
2843         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
2844                 return(error);
2845         }
2846         type = fp->f_type;
2847         data = fp->f_data;
2848         fsec = KAUTH_FILESEC_NONE;
2849
2850         sbptr = (void *)&source;
2851
2852         switch (type) {
2853
2854         case DTYPE_VNODE:
2855                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
2856                         /*
2857                          * If the caller has the file open, and is not
2858                          * requesting extended security information, we are
2859                          * going to let them get the basic stat information.
2860                          */
2861                         if (xsecurity == USER_ADDR_NULL) {
2862                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx);
2863                         } else {
2864                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx);
2865                         }
2866
2867                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
2868                         (void)vnode_put((vnode_t)data);
2869                 }
2870                 break;
2871
2872 #if SOCKETS
2873         case DTYPE_SOCKET:
2874                 error = soo_stat((struct socket *)data, sbptr, isstat64);
2875                 break;
2876 #endif /* SOCKETS */
2877
2878         case DTYPE_PIPE:
2879                 error = pipe_stat((void *)data, sbptr, isstat64);
2880                 break;
2881
2882         case DTYPE_PSXSHM:
2883                 error = pshm_stat((void *)data, sbptr, isstat64);
2884                 break;
2885
2886         case DTYPE_KQUEUE:
2887                 error = kqueue_stat((void *)data, sbptr, isstat64, p);
2888                 break;
2889
2890         default:
2891                 error = EBADF;
2892                 goto out;
2893         }
2894         if (error == 0) {
2895                 caddr_t sbp;
2896
2897                 if (isstat64 != 0) {
2898                         source.sb64.st_lspare = 0;
2899                         source.sb64.st_qspare[0] = 0LL;
2900                         source.sb64.st_qspare[1] = 0LL;
2901
2902                         if (IS_64BIT_PROCESS(current_proc())) {
2903                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
2904                                 my_size = sizeof(dest.user64_sb64);
2905                                 sbp = (caddr_t)&dest.user64_sb64;
2906                         } else {
2907                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
2908                                 my_size = sizeof(dest.user32_sb64);
2909                                 sbp = (caddr_t)&dest.user32_sb64;
2910                         }
2911                 } else {
2912                         source.sb.st_lspare = 0;
2913                         source.sb.st_qspare[0] = 0LL;
2914                         source.sb.st_qspare[1] = 0LL;
2915                         if (IS_64BIT_PROCESS(current_proc())) {
2916                                 munge_user64_stat(&source.sb, &dest.user64_sb);
2917                                 my_size = sizeof(dest.user64_sb);
2918                                 sbp = (caddr_t)&dest.user64_sb;
2919                         } else {
2920                                 munge_user32_stat(&source.sb, &dest.user32_sb);
2921                                 my_size = sizeof(dest.user32_sb);
2922                                 sbp = (caddr_t)&dest.user32_sb;
2923                         }
2924                 }
2925
2926                 error = copyout(sbp, ub, my_size);
2927         }
2928
2929         /* caller wants extended security information? */
2930         if (xsecurity != USER_ADDR_NULL) {
2931
2932                 /* did we get any? */
2933                  if (fsec == KAUTH_FILESEC_NONE) {
2934                         if (susize(xsecurity_size, 0) != 0) {
2935                                 error = EFAULT;
2936                                 goto out;
2937                         }
2938                 } else {
2939                         /* find the user buffer size */
2940                         xsecurity_bufsize = fusize(xsecurity_size);
2941
2942                         /* copy out the actual data size */
2943                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
2944                                 error = EFAULT;
2945                                 goto out;
2946                         }
2947
2948                         /* if the caller supplied enough room, copy out to it */
2949                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
2950                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
2951                 }
2952         }
2953 out:
2954         fp_drop(p, fd, fp, 0);
2955         if (fsec != NULL)
2956                 kauth_filesec_free(fsec);
2957         return (error);
2958 }
2959
2960
2961 /*
2962  * fstat_extended
2963  *
2964  * Description: Extended version of fstat supporting returning extended
2965  *              security information
2966  *
2967  * Parameters:  p                               The process doing the fstat
2968  *              uap->fd                         The fd to stat
2969  *              uap->ub                         The user stat buffer
2970  *              uap->xsecurity                  The user extended security
2971  *                                              buffer, or 0 if none
2972  *              uap->xsecurity_size             The size of xsecurity, or 0
2973  *
2974  * Returns:     0                               Success
2975  *              !0                              Errno (see fstat1)
2976  */
2977 int
2978 fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
2979 {
2980         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
2981 }
2982
2983
2984 /*
2985  * fstat
2986  *
2987  * Description: Get file status for the file associated with fd
2988  *
2989  * Parameters:  p                               The process doing the fstat
2990  *              uap->fd                         The fd to stat
2991  *              uap->ub                         The user stat buffer
2992  *
2993  * Returns:     0                               Success
2994  *              !0                              Errno (see fstat1)
2995  */
2996 int
2997 fstat(proc_t p, register struct fstat_args *uap, __unused int32_t *retval)
2998 {
2999         return(fstat1(p, uap->fd, uap->ub, 0, 0, 0));
3000 }
3001
3002
3003 /*
3004  * fstat64_extended
3005  *
3006  * Description: Extended version of fstat64 supporting returning extended
3007  *              security information
3008  *
3009  * Parameters:  p                               The process doing the fstat
3010  *              uap->fd                         The fd to stat
3011  *              uap->ub                         The user stat buffer
3012  *              uap->xsecurity                  The user extended security
3013  *                                              buffer, or 0 if none
3014  *              uap->xsecurity_size             The size of xsecurity, or 0
3015  *
3016  * Returns:     0                               Success
3017  *              !0                              Errno (see fstat1)
3018  */
3019 int
3020 fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3021 {
3022         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3023 }
3024
3025
3026 /*
3027  * fstat64
3028  *
3029  * Description: Get 64 bit version of the file status for the file associated
3030  *              with fd
3031  *
3032  * Parameters:  p                               The process doing the fstat
3033  *              uap->fd                         The fd to stat
3034  *              uap->ub                         The user stat buffer
3035  *
3036  * Returns:     0                               Success
3037  *              !0                              Errno (see fstat1)
3038  */
3039 int
3040 fstat64(proc_t p, register struct fstat64_args *uap, __unused int32_t *retval)
3041 {
3042         return(fstat1(p, uap->fd, uap->ub, 0, 0, 1));
3043 }
3044
3045
3046 /*
3047  * fpathconf
3048  *
3049  * Description: Return pathconf information about a file descriptor.
3050  *
3051  * Parameters:  p                               Process making the request
3052  *              uap->fd                         fd to get information about
3053  *              uap->name                       Name of information desired
3054  *              retval                          Pointer to the call return area
3055  *
3056  * Returns:     0                               Success
3057  *              EINVAL
3058  *      fp_lookup:EBADF                         Bad file descriptor
3059  *      vnode_getwithref:???
3060  *      vn_pathconf:???
3061  *
3062  * Implicit returns:
3063  *              *retval (modified)              Returned information (numeric)
3064  */
3065 int
3066 fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3067 {
3068         int fd = uap->fd;
3069         struct fileproc *fp;
3070         struct vnode *vp;
3071         int error = 0;
3072         file_type_t type;
3073         caddr_t data;
3074
3075
3076         AUDIT_ARG(fd, uap->fd);
3077         if ( (error = fp_lookup(p, fd, &fp, 0)) )
3078                 return(error);
3079         type = fp->f_type;
3080         data = fp->f_data;
3081
3082         switch (type) {
3083
3084         case DTYPE_SOCKET:
3085                 if (uap->name != _PC_PIPE_BUF) {
3086                         error = EINVAL;
3087                         goto out;
3088                 }
3089                 *retval = PIPE_BUF;
3090                 error = 0;
3091                 goto out;
3092
3093         case DTYPE_PIPE:
3094                 if (uap->name != _PC_PIPE_BUF) {
3095                         error = EINVAL;
3096                         goto out;
3097                 }
3098                 *retval = PIPE_BUF;
3099                 error = 0;
3100                 goto out;
3101
3102         case DTYPE_VNODE:
3103                 vp = (struct vnode *)data;
3104
3105                 if ( (error = vnode_getwithref(vp)) == 0) {
3106                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3107
3108                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3109
3110                         (void)vnode_put(vp);
3111                 }
3112                 goto out;
3113
3114         default:
3115                 error = EINVAL;
3116                 goto out;
3117
3118         }
3119         /*NOTREACHED*/
3120 out:
3121         fp_drop(p, fd, fp, 0);
3122         return(error);
3123 }
3124
3125 /*
3126  * Statistics counter for the number of times a process calling fdalloc()
3127  * has resulted in an expansion of the per process open file table.
3128  *
3129  * XXX This would likely be of more use if it were per process
3130  */
3131 int fdexpand;
3132
3133
3134 /*
3135  * fdalloc
3136  *
3137  * Description: Allocate a file descriptor for the process.
3138  *
3139  * Parameters:  p                               Process to allocate the fd in
3140  *              want                            The fd we would prefer to get
3141  *              result                          Pointer to fd we got
3142  *
3143  * Returns:     0                               Success
3144  *              EMFILE
3145  *              ENOMEM
3146  *
3147  * Implicit returns:
3148  *              *result (modified)              The fd which was allocated
3149  */
3150 int
3151 fdalloc(proc_t p, int want, int *result)
3152 {
3153         struct filedesc *fdp = p->p_fd;
3154         int i;
3155         int lim, last, numfiles, oldnfiles;
3156         struct fileproc **newofiles, **ofiles;
3157         char *newofileflags;
3158
3159         /*
3160          * Search for a free descriptor starting at the higher
3161          * of want or fd_freefile.  If that fails, consider
3162          * expanding the ofile array.
3163          */
3164 #if DIAGNOSTIC
3165         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3166 #endif
3167
3168         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3169         for (;;) {
3170                 last = min(fdp->fd_nfiles, lim);
3171                 if ((i = want) < fdp->fd_freefile)
3172                         i = fdp->fd_freefile;
3173                 for (; i < last; i++) {
3174                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3175                                 procfdtbl_reservefd(p, i);
3176                                 if (i > fdp->fd_lastfile)
3177                                         fdp->fd_lastfile = i;
3178                                 if (want <= fdp->fd_freefile)
3179                                         fdp->fd_freefile = i;
3180                                 *result = i;
3181                                 return (0);
3182                         }
3183                 }
3184
3185                 /*
3186                  * No space in current array.  Expand?
3187                  */
3188                 if (fdp->fd_nfiles >= lim)
3189                         return (EMFILE);
3190                 if (fdp->fd_nfiles < NDEXTENT)
3191                         numfiles = NDEXTENT;
3192                 else
3193                         numfiles = 2 * fdp->fd_nfiles;
3194                 /* Enforce lim */
3195                 if (numfiles > lim)
3196                         numfiles = lim;
3197                 proc_fdunlock(p);
3198                 MALLOC_ZONE(newofiles, struct fileproc **,
3199                                 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3200                 proc_fdlock(p);
3201                 if (newofiles == NULL) {
3202                         return (ENOMEM);
3203                 }
3204                 if (fdp->fd_nfiles >= numfiles) {
3205                         FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
3206                         continue;
3207                 }
3208                 newofileflags = (char *) &newofiles[numfiles];
3209                 /*
3210                  * Copy the existing ofile and ofileflags arrays
3211                  * and zero the new portion of each array.
3212                  */
3213                 oldnfiles = fdp->fd_nfiles;
3214                 (void) memcpy(newofiles, fdp->fd_ofiles,
3215                                 oldnfiles * sizeof(*fdp->fd_ofiles));
3216                 (void) memset(&newofiles[oldnfiles], 0,
3217                                 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3218
3219                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3220                                 oldnfiles * sizeof(*fdp->fd_ofileflags));
3221                 (void) memset(&newofileflags[oldnfiles], 0,
3222                                 (numfiles - oldnfiles) *
3223                                                 sizeof(*fdp->fd_ofileflags));
3224                 ofiles = fdp->fd_ofiles;
3225                 fdp->fd_ofiles = newofiles;
3226                 fdp->fd_ofileflags = newofileflags;
3227                 fdp->fd_nfiles = numfiles;
3228                 FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
3229                 fdexpand++;
3230         }
3231 }
3232
3233
3234 /*
3235  * fdavail
3236  *
3237  * Description: Check to see whether n user file descriptors are available
3238  *              to the process p.
3239  *
3240  * Parameters:  p                               Process to check in
3241  *              n                               The number of fd's desired
3242  *
3243  * Returns:     0                               No
3244  *              1                               Yes
3245  *
3246  * Locks:       Assumes proc_fdlock for process is held by the caller
3247  *
3248  * Notes:       The answer only remains valid so long as the proc_fdlock is
3249  *              held by the caller.
3250  */
3251 int
3252 fdavail(proc_t p, int n)
3253 {
3254         struct filedesc *fdp = p->p_fd;
3255         struct fileproc **fpp;
3256         char *flags;
3257         int i, lim;
3258
3259         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3260         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
3261                 return (1);
3262         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3263         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3264         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++)
3265                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0)
3266                         return (1);
3267         return (0);
3268 }
3269
3270
3271 /*
3272  * fdrelse
3273  *
3274  * Description: Legacy KPI wrapper function for _fdrelse
3275  *
3276  * Parameters:  p                               Process in which fd lives
3277  *              fd                              fd to free
3278  *
3279  * Returns:     void
3280  *
3281  * Locks:       Assumes proc_fdlock for process is held by the caller
3282  */
3283 void
3284 fdrelse(proc_t p, int fd)
3285 {
3286         _fdrelse(p, fd);
3287 }
3288
3289
3290 /*
3291  * fdgetf_noref
3292  *
3293  * Description: Get the fileproc pointer for the given fd from the per process
3294  *              open file table without taking an explicit reference on it.
3295  *
3296  * Parameters:  p                               Process containing fd
3297  *              fd                              fd to obtain fileproc for
3298  *              resultfp                        Pointer to pointer return area
3299  *
3300  * Returns:     0                               Success
3301  *              EBADF
3302  *
3303  * Implicit returns:
3304  *              *resultfp (modified)            Pointer to fileproc pointer
3305  *
3306  * Locks:       Assumes proc_fdlock for process is held by the caller
3307  *
3308  * Notes:       Because there is no reference explicitly taken, the returned
3309  *              fileproc pointer is only valid so long as the proc_fdlock
3310  *              remains held by the caller.
3311  */
3312 int
3313 fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
3314 {
3315         struct filedesc *fdp = p->p_fd;
3316         struct fileproc *fp;
3317
3318         if (fd < 0 || fd >= fdp->fd_nfiles ||
3319                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3320                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3321                 return (EBADF);
3322         }
3323         if (resultfp)
3324                 *resultfp = fp;
3325         return (0);
3326 }
3327
3328
3329 /*
3330  * fp_getfvp
3331  *
3332  * Description: Get fileproc and vnode pointer for a given fd from the per
3333  *              process open file table of the specified process, and if
3334  *              successful, increment the f_iocount
3335  *
3336  * Parameters:  p                               Process in which fd lives
3337  *              fd                              fd to get information for
3338  *              resultfp                        Pointer to result fileproc
3339  *                                              pointer area, or 0 if none
3340  *              resultvp                        Pointer to result vnode pointer
3341  *                                              area, or 0 if none
3342  *
3343  * Returns:     0                               Success
3344  *              EBADF                           Bad file descriptor
3345  *              ENOTSUP                         fd does not refer to a vnode
3346  *
3347  * Implicit returns:
3348  *              *resultfp (modified)            Fileproc pointer
3349  *              *resultvp (modified)            vnode pointer
3350  *
3351  * Notes:       The resultfp and resultvp fields are optional, and may be
3352  *              independently specified as NULL to skip returning information
3353  *
3354  * Locks:       Internally takes and releases proc_fdlock
3355  */
3356 int
3357 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
3358 {
3359         struct filedesc *fdp = p->p_fd;
3360         struct fileproc *fp;
3361
3362         proc_fdlock_spin(p);
3363         if (fd < 0 || fd >= fdp->fd_nfiles ||
3364                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3365                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3366                 proc_fdunlock(p);
3367                 return (EBADF);
3368         }
3369         if (fp->f_type != DTYPE_VNODE) {
3370                 proc_fdunlock(p);
3371                 return(ENOTSUP);
3372         }
3373         fp->f_iocount++;
3374
3375         if (resultfp)
3376                 *resultfp = fp;
3377         if (resultvp)
3378                 *resultvp = (struct vnode *)fp->f_data;
3379         proc_fdunlock(p);
3380
3381         return (0);
3382 }
3383
3384
3385 /*
3386  * fp_getfvpandvid
3387  *
3388  * Description: Get fileproc, vnode pointer, and vid for a given fd from the
3389  *              per process open file table of the specified process, and if
3390  *              successful, increment the f_iocount
3391  *
3392  * Parameters:  p                               Process in which fd lives
3393  *              fd                              fd to get information for
3394  *              resultfp                        Pointer to result fileproc
3395  *                                              pointer area, or 0 if none
3396  *              resultvp                        Pointer to result vnode pointer
3397  *                                              area, or 0 if none
3398  *              vidp                            Pointer to resuld vid area
3399  *
3400  * Returns:     0                               Success
3401  *              EBADF                           Bad file descriptor
3402  *              ENOTSUP                         fd does not refer to a vnode
3403  *
3404  * Implicit returns:
3405  *              *resultfp (modified)            Fileproc pointer
3406  *              *resultvp (modified)            vnode pointer
3407  *              *vidp                           vid value
3408  *
3409  * Notes:       The resultfp and resultvp fields are optional, and may be
3410  *              independently specified as NULL to skip returning information
3411  *
3412  * Locks:       Internally takes and releases proc_fdlock
3413  */
3414 int
3415 fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3416                 struct vnode **resultvp, uint32_t *vidp)
3417 {
3418         struct filedesc *fdp = p->p_fd;
3419         struct fileproc *fp;
3420
3421         proc_fdlock_spin(p);
3422         if (fd < 0 || fd >= fdp->fd_nfiles ||
3423                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3424                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3425                 proc_fdunlock(p);
3426                 return (EBADF);
3427         }
3428         if (fp->f_type != DTYPE_VNODE) {
3429                 proc_fdunlock(p);
3430                 return(ENOTSUP);
3431         }
3432         fp->f_iocount++;
3433
3434         if (resultfp)
3435                 *resultfp = fp;
3436         if (resultvp)
3437                 *resultvp = (struct vnode *)fp->f_data;
3438         if (vidp)
3439                 *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
3440         proc_fdunlock(p);
3441
3442         return (0);
3443 }
3444
3445
3446 /*
3447  * fp_getfsock
3448  *
3449  * Description: Get fileproc and socket pointer for a given fd from the
3450  *              per process open file table of the specified process, and if
3451  *              successful, increment the f_iocount
3452  *
3453  * Parameters:  p                               Process in which fd lives
3454  *              fd                              fd to get information for
3455  *              resultfp                        Pointer to result fileproc
3456  *                                              pointer area, or 0 if none
3457  *              results                         Pointer to result socket
3458  *                                              pointer area, or 0 if none
3459  *
3460  * Returns:     EBADF                   The file descriptor is invalid
3461  *              EOPNOTSUPP              The file descriptor is not a socket
3462  *              0                       Success
3463  *
3464  * Implicit returns:
3465  *              *resultfp (modified)            Fileproc pointer
3466  *              *results (modified)             socket pointer
3467  *
3468  * Notes:       EOPNOTSUPP should probably be ENOTSOCK; this function is only
3469  *              ever called from accept1().
3470  */
3471 int
3472 fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3473             struct socket **results)
3474 {
3475         struct filedesc *fdp = p->p_fd;
3476         struct fileproc *fp;
3477
3478         proc_fdlock_spin(p);
3479         if (fd < 0 || fd >= fdp->fd_nfiles ||
3480                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3481                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3482                 proc_fdunlock(p);
3483                 return (EBADF);
3484         }
3485         if (fp->f_type != DTYPE_SOCKET) {
3486                 proc_fdunlock(p);
3487                 return(EOPNOTSUPP);
3488         }
3489         fp->f_iocount++;
3490
3491         if (resultfp)
3492                 *resultfp = fp;
3493         if (results)
3494                 *results = (struct socket *)fp->f_data;
3495         proc_fdunlock(p);
3496
3497         return (0);
3498 }
3499
3500
3501 /*
3502  * fp_getfkq
3503  *
3504  * Description: Get fileproc and kqueue pointer for a given fd from the
3505  *              per process open file table of the specified process, and if
3506  *              successful, increment the f_iocount
3507  *
3508  * Parameters:  p                               Process in which fd lives
3509  *              fd                              fd to get information for
3510  *              resultfp                        Pointer to result fileproc
3511  *                                              pointer area, or 0 if none
3512  *              resultkq                        Pointer to result kqueue
3513  *                                              pointer area, or 0 if none
3514  *
3515  * Returns:     EBADF                   The file descriptor is invalid
3516  *              EBADF                   The file descriptor is not a socket
3517  *              0                       Success
3518  *
3519  * Implicit returns:
3520  *              *resultfp (modified)            Fileproc pointer
3521  *              *resultkq (modified)            kqueue pointer
3522  *
3523  * Notes:       The second EBADF should probably be something else to make
3524  *              the error condition distinct.
3525  */
3526 int
3527 fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3528           struct kqueue **resultkq)
3529 {
3530         struct filedesc *fdp = p->p_fd;
3531         struct fileproc *fp;
3532
3533         proc_fdlock_spin(p);
3534         if ( fd < 0 || fd >= fdp->fd_nfiles ||
3535                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3536                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3537                 proc_fdunlock(p);
3538                 return (EBADF);
3539         }
3540         if (fp->f_type != DTYPE_KQUEUE) {
3541                 proc_fdunlock(p);
3542                 return(EBADF);
3543         }
3544         fp->f_iocount++;
3545
3546         if (resultfp)
3547                 *resultfp = fp;
3548         if (resultkq)
3549                 *resultkq = (struct kqueue *)fp->f_data;
3550         proc_fdunlock(p);
3551
3552         return (0);
3553 }
3554
3555
3556 /*
3557  * fp_getfpshm
3558  *
3559  * Description: Get fileproc and POSIX shared memory pointer for a given fd
3560  *              from the per process open file table of the specified process
3561  *              and if successful, increment the f_iocount
3562  *
3563  * Parameters:  p                               Process in which fd lives
3564  *              fd                              fd to get information for
3565  *              resultfp                        Pointer to result fileproc
3566  *                                              pointer area, or 0 if none
3567  *              resultpshm                      Pointer to result POSIX
3568  *                                              shared memory pointer
3569  *                                              pointer area, or 0 if none
3570  *
3571  * Returns:     EBADF                   The file descriptor is invalid
3572  *              EBADF                   The file descriptor is not a POSIX
3573  *                                      shared memory area
3574  *              0                       Success
3575  *
3576  * Implicit returns:
3577  *              *resultfp (modified)            Fileproc pointer
3578  *              *resultpshm (modified)          POSIX shared memory pointer
3579  *
3580  * Notes:       The second EBADF should probably be something else to make
3581  *              the error condition distinct.
3582  */
3583 int
3584 fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3585             struct pshmnode **resultpshm)
3586 {
3587         struct filedesc *fdp = p->p_fd;
3588         struct fileproc *fp;
3589
3590         proc_fdlock_spin(p);
3591         if (fd < 0 || fd >= fdp->fd_nfiles ||
3592                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3593                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3594                 proc_fdunlock(p);
3595                 return (EBADF);
3596         }
3597         if (fp->f_type != DTYPE_PSXSHM) {
3598
3599                 proc_fdunlock(p);
3600                 return(EBADF);
3601         }
3602         fp->f_iocount++;
3603
3604         if (resultfp)
3605                 *resultfp = fp;
3606         if (resultpshm)
3607                 *resultpshm = (struct pshmnode *)fp->f_data;
3608         proc_fdunlock(p);
3609
3610         return (0);
3611 }
3612
3613
3614 /*
3615  * fp_getfsem
3616  *
3617  * Description: Get fileproc and POSIX semaphore pointer for a given fd from
3618  *              the per process open file table of the specified process
3619  *              and if successful, increment the f_iocount
3620  *
3621  * Parameters:  p                               Process in which fd lives
3622  *              fd                              fd to get information for
3623  *              resultfp                        Pointer to result fileproc
3624  *                                              pointer area, or 0 if none
3625  *              resultpsem                      Pointer to result POSIX
3626  *                                              semaphore pointer area, or
3627  *                                              0 if none
3628  *
3629  * Returns:     EBADF                   The file descriptor is invalid
3630  *              EBADF                   The file descriptor is not a POSIX
3631  *                                      semaphore
3632  *              0                       Success
3633  *
3634  * Implicit returns:
3635  *              *resultfp (modified)            Fileproc pointer
3636  *              *resultpsem (modified)          POSIX semaphore pointer
3637  *
3638  * Notes:       The second EBADF should probably be something else to make
3639  *              the error condition distinct.
3640  *
3641  *              In order to support unnamed POSIX semaphores, the named
3642  *              POSIX semaphores will have to move out of the per-process
3643  *              open filetable, and into a global table that is shared with
3644  *              unnamed POSIX semaphores, since unnamed POSIX semaphores
3645  *              are typically used by declaring instances in shared memory,
3646  *              and there's no other way to do this without changing the
3647  *              underlying type, which would introduce binary compatibility
3648  *              issues.
3649  */
3650 int
3651 fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3652             struct psemnode **resultpsem)
3653 {
3654         struct filedesc *fdp = p->p_fd;
3655         struct fileproc *fp;
3656
3657         proc_fdlock_spin(p);
3658         if (fd < 0 || fd >= fdp->fd_nfiles ||
3659                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3660                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3661                 proc_fdunlock(p);
3662                 return (EBADF);
3663         }
3664         if (fp->f_type != DTYPE_PSXSEM) {
3665                 proc_fdunlock(p);
3666                 return(EBADF);
3667         }
3668         fp->f_iocount++;
3669
3670         if (resultfp)
3671                 *resultfp = fp;
3672         if (resultpsem)
3673                 *resultpsem = (struct psemnode *)fp->f_data;
3674         proc_fdunlock(p);
3675
3676         return (0);
3677 }
3678
3679
3680 /*
3681  * fp_getfpipe
3682  *
3683  * Description: Get fileproc and pipe pointer for a given fd from the
3684  *              per process open file table of the specified process
3685  *              and if successful, increment the f_iocount
3686  *
3687  * Parameters:  p                               Process in which fd lives
3688  *              fd                              fd to get information for
3689  *              resultfp                        Pointer to result fileproc
3690  *                                              pointer area, or 0 if none
3691  *              resultpipe                      Pointer to result pipe
3692  *                                              pointer area, or 0 if none
3693  *
3694  * Returns:     EBADF                   The file descriptor is invalid
3695  *              EBADF                   The file descriptor is not a socket
3696  *              0                       Success
3697  *
3698  * Implicit returns:
3699  *              *resultfp (modified)            Fileproc pointer
3700  *              *resultpipe (modified)          pipe pointer
3701  *
3702  * Notes:       The second EBADF should probably be something else to make
3703  *              the error condition distinct.
3704  */
3705 int
3706 fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
3707             struct pipe **resultpipe)
3708 {
3709         struct filedesc *fdp = p->p_fd;
3710         struct fileproc *fp;
3711
3712         proc_fdlock_spin(p);
3713         if (fd < 0 || fd >= fdp->fd_nfiles ||
3714                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3715                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3716                 proc_fdunlock(p);
3717                 return (EBADF);
3718         }
3719         if (fp->f_type != DTYPE_PIPE) {
3720                 proc_fdunlock(p);
3721                 return(EBADF);
3722         }
3723         fp->f_iocount++;
3724
3725         if (resultfp)
3726                 *resultfp = fp;
3727         if (resultpipe)
3728                 *resultpipe = (struct pipe *)fp->f_data;
3729         proc_fdunlock(p);
3730
3731         return (0);
3732 }
3733
3734 /*
3735  * fp_lookup
3736  *
3737  * Description: Get fileproc pointer for a given fd from the per process
3738  *              open file table of the specified process and if successful,
3739  *              increment the f_iocount
3740  *
3741  * Parameters:  p                               Process in which fd lives
3742  *              fd                              fd to get information for
3743  *              resultfp                        Pointer to result fileproc
3744  *                                              pointer area, or 0 if none
3745  *              locked                          !0 if the caller holds the
3746  *                                              proc_fdlock, 0 otherwise
3747  *
3748  * Returns:     0                       Success
3749  *              EBADF                   Bad file descriptor
3750  *
3751  * Implicit returns:
3752  *              *resultfp (modified)            Fileproc pointer
3753  *
3754  * Locks:       If the argument 'locked' is non-zero, then the caller is
3755  *              expected to have taken and held the proc_fdlock; if it is
3756  *              zero, than this routine internally takes and drops this lock.
3757  */
3758 int
3759 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
3760 {
3761         struct filedesc *fdp = p->p_fd;
3762         struct fileproc *fp;
3763
3764         if (!locked)
3765                 proc_fdlock_spin(p);
3766         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
3767                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3768                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3769                 if (!locked)
3770                         proc_fdunlock(p);
3771                 return (EBADF);
3772         }
3773         fp->f_iocount++;
3774
3775         if (resultfp)
3776                 *resultfp = fp;
3777         if (!locked)
3778                 proc_fdunlock(p);
3779
3780         return (0);
3781 }
3782
3783
3784 /*
3785  * fp_tryswap
3786  *
3787  * Description: Swap the fileproc pointer for a given fd with a new
3788  *              fileproc pointer in the per-process open file table of
3789  *              the specified process.  The fdlock must be held at entry.
3790  *
3791  * Parameters:  p               Process containing the fd
3792  *              fd              The fd of interest
3793  *              nfp             Pointer to the newfp
3794  *
3795  * Returns:     0               Success
3796  *              EBADF           Bad file descriptor
3797  *              EINTR           Interrupted
3798  *              EKEEPLOOKING    f_iocount changed while lock was dropped.
3799  */
3800 int
3801 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
3802 {
3803         struct fileproc *fp;
3804         int error;
3805
3806         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3807
3808         if (0 != (error = fp_lookup(p, fd, &fp, 1)))
3809                 return (error);
3810         /*
3811          * At this point, our caller (change_guardedfd_np) has
3812          * one f_iocount reference, and we just took another
3813          * one to begin the replacement.
3814          */
3815         if (fp->f_iocount < 2) {
3816                 panic("f_iocount too small %d", fp->f_iocount);
3817         } else if (2 == fp->f_iocount) {
3818
3819                 /* Copy the contents of *fp, preserving the "type" of *nfp */
3820
3821                 nfp->f_flags = (nfp->f_flags & FP_TYPEMASK) |
3822                         (fp->f_flags & ~FP_TYPEMASK);
3823                 nfp->f_iocount = fp->f_iocount;
3824                 nfp->f_fglob = fp->f_fglob;
3825                 nfp->f_waddr = fp->f_waddr;
3826
3827                 p->p_fd->fd_ofiles[fd] = nfp;
3828                 (void) fp_drop(p, fd, nfp, 1);
3829         } else {
3830                 /*
3831                  * Wait for all other active references to evaporate.
3832                  */
3833                 p->p_fpdrainwait = 1;
3834                 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
3835                     PRIBIO | PCATCH, "tryswap fpdrain", NULL);
3836                 if (0 == error) {
3837                         /*
3838                          * Return an "internal" errno to trigger a full
3839                          * reevaluation of the change-guard attempt.
3840                          */
3841                         error = EKEEPLOOKING;
3842                         printf("%s: lookup collision fd %d\n", __func__, fd);
3843                 }
3844                 (void) fp_drop(p, fd, fp, 1);
3845         }
3846         return (error);
3847 }
3848
3849
3850 /*
3851  * fp_drop_written
3852  *
3853  * Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
3854  *              reference previously taken by calling fp_lookup et. al.
3855  *
3856  * Parameters:  p                               Process in which the fd lives
3857  *              fd                              fd associated with the fileproc
3858  *              fp                              fileproc on which to set the
3859  *                                              flag and drop the reference
3860  *
3861  * Returns:     0                               Success
3862  *      fp_drop:EBADF                           Bad file descriptor
3863  *
3864  * Locks:       This function internally takes and drops the proc_fdlock for
3865  *              the supplied process
3866  *
3867  * Notes:       The fileproc must correspond to the fd in the supplied proc
3868  */
3869 int
3870 fp_drop_written(proc_t p, int fd, struct fileproc *fp)
3871 {
3872         int error;
3873
3874         proc_fdlock_spin(p);
3875
3876         fp->f_flags |= FP_WRITTEN;
3877
3878         error = fp_drop(p, fd, fp, 1);
3879
3880         proc_fdunlock(p);
3881
3882         return (error);
3883 }
3884
3885
3886 /*
3887  * fp_drop_event
3888  *
3889  * Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
3890  *              reference previously taken by calling fp_lookup et. al.
3891  *
3892  * Parameters:  p                               Process in which the fd lives
3893  *              fd                              fd associated with the fileproc
3894  *              fp                              fileproc on which to set the
3895  *                                              flag and drop the reference
3896  *
3897  * Returns:     0                               Success
3898  *      fp_drop:EBADF                           Bad file descriptor
3899  *
3900  * Locks:       This function internally takes and drops the proc_fdlock for
3901  *              the supplied process
3902  *
3903  * Notes:       The fileproc must correspond to the fd in the supplied proc
3904  */
3905 int
3906 fp_drop_event(proc_t p, int fd, struct fileproc *fp)
3907 {
3908         int error;
3909
3910         proc_fdlock_spin(p);
3911
3912         fp->f_flags |= FP_WAITEVENT;
3913
3914         error = fp_drop(p, fd, fp, 1);
3915
3916         proc_fdunlock(p);
3917
3918         return (error);
3919 }
3920
3921
3922 /*
3923  * fp_drop
3924  *
3925  * Description: Drop the I/O reference previously taken by calling fp_lookup
3926  *              et. al.
3927  *
3928  * Parameters:  p                               Process in which the fd lives
3929  *              fd                              fd associated with the fileproc
3930  *              fp                              fileproc on which to set the
3931  *                                              flag and drop the reference
3932  *              locked                          flag to internally take and
3933  *                                              drop proc_fdlock if it is not
3934  *                                              already held by the caller
3935  *
3936  * Returns:     0                               Success
3937  *              EBADF                           Bad file descriptor
3938  *
3939  * Locks:       This function internally takes and drops the proc_fdlock for
3940  *              the supplied process if 'locked' is non-zero, and assumes that
3941  *              the caller already holds this lock if 'locked' is non-zero.
3942  *
3943  * Notes:       The fileproc must correspond to the fd in the supplied proc
3944  */
3945 int
3946 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
3947 {
3948         struct filedesc *fdp = p->p_fd;
3949         int     needwakeup = 0;
3950
3951         if (!locked)
3952                 proc_fdlock_spin(p);
3953          if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
3954                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3955                         ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3956                          !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
3957                 if (!locked)
3958                         proc_fdunlock(p);
3959                 return (EBADF);
3960         }
3961         fp->f_iocount--;
3962
3963         if (fp->f_iocount == 0) {
3964                 if (fp->f_flags & FP_SELCONFLICT)
3965                         fp->f_flags &= ~FP_SELCONFLICT;
3966
3967                 if (p->p_fpdrainwait) {
3968                         p->p_fpdrainwait = 0;
3969                         needwakeup = 1;
3970                 }
3971         }
3972         if (!locked)
3973                 proc_fdunlock(p);
3974         if (needwakeup)
3975                 wakeup(&p->p_fpdrainwait);
3976
3977         return (0);
3978 }
3979
3980
3981 /*
3982  * file_vnode
3983  *
3984  * Description: Given an fd, look it up in the current process's per process
3985  *              open file table, and return its internal vnode pointer.
3986  *
3987  * Parameters:  fd                              fd to obtain vnode from
3988  *              vpp                             pointer to vnode return area
3989  *
3990  * Returns:     0                               Success
3991  *              EINVAL                          The fd does not refer to a
3992  *                                              vnode fileproc entry
3993  *      fp_lookup:EBADF                         Bad file descriptor
3994  *
3995  * Implicit returns:
3996  *              *vpp (modified)                 Returned vnode pointer
3997  *
3998  * Locks:       This function internally takes and drops the proc_fdlock for
3999  *              the current process
4000  *
4001  * Notes:       If successful, this function increments the f_iocount on the
4002  *              fd's corresponding fileproc.
4003  *
4004  *              The fileproc referenced is not returned; because of this, care
4005  *              must be taken to not drop the last reference (e.g. by closing
4006  *              the file).  This is inherently unsafe, since the reference may
4007  *              not be recoverable from the vnode, if there is a subsequent
4008  *              close that destroys the associate fileproc.  The caller should
4009  *              therefore retain their own reference on the fileproc so that
4010  *              the f_iocount can be dropped subsequently.  Failure to do this
4011  *              can result in the returned pointer immediately becoming invalid
4012  *              following the call.
4013  *
4014  *              Use of this function is discouraged.
4015  */
4016 int
4017 file_vnode(int fd, struct vnode **vpp)
4018 {
4019         proc_t p = current_proc();
4020         struct fileproc *fp;
4021         int error;
4022
4023         proc_fdlock_spin(p);
4024         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4025                 proc_fdunlock(p);
4026                 return(error);
4027         }
4028         if (fp->f_type != DTYPE_VNODE) {
4029                 fp_drop(p, fd, fp,1);
4030                 proc_fdunlock(p);
4031                 return(EINVAL);
4032         }
4033         if (vpp != NULL)
4034                 *vpp = (struct vnode *)fp->f_data;
4035         proc_fdunlock(p);
4036
4037         return(0);
4038 }
4039
4040
4041 /*
4042  * file_vnode_withvid
4043  *
4044  * Description: Given an fd, look it up in the current process's per process
4045  *              open file table, and return its internal vnode pointer.
4046  *
4047  * Parameters:  fd                              fd to obtain vnode from
4048  *              vpp                             pointer to vnode return area
4049  *              vidp                            pointer to vid of the returned vnode
4050  *
4051  * Returns:     0                               Success
4052  *              EINVAL                          The fd does not refer to a
4053  *                                              vnode fileproc entry
4054  *      fp_lookup:EBADF                         Bad file descriptor
4055  *
4056  * Implicit returns:
4057  *              *vpp (modified)                 Returned vnode pointer
4058  *
4059  * Locks:       This function internally takes and drops the proc_fdlock for
4060  *              the current process
4061  *
4062  * Notes:       If successful, this function increments the f_iocount on the
4063  *              fd's corresponding fileproc.
4064  *
4065  *              The fileproc referenced is not returned; because of this, care
4066  *              must be taken to not drop the last reference (e.g. by closing
4067  *              the file).  This is inherently unsafe, since the reference may
4068  *              not be recoverable from the vnode, if there is a subsequent
4069  *              close that destroys the associate fileproc.  The caller should
4070  *              therefore retain their own reference on the fileproc so that
4071  *              the f_iocount can be dropped subsequently.  Failure to do this
4072  *              can result in the returned pointer immediately becoming invalid
4073  *              following the call.
4074  *
4075  *              Use of this function is discouraged.
4076  */
4077 int
4078 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
4079 {
4080         proc_t p = current_proc();
4081         struct fileproc *fp;
4082         vnode_t vp;
4083         int error;
4084
4085         proc_fdlock_spin(p);
4086         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4087                 proc_fdunlock(p);
4088                 return(error);
4089         }
4090         if (fp->f_type != DTYPE_VNODE) {
4091                 fp_drop(p, fd, fp,1);
4092                 proc_fdunlock(p);
4093                 return(EINVAL);
4094         }
4095         vp = (struct vnode *)fp->f_data;
4096         if (vpp != NULL)
4097                 *vpp = vp;
4098
4099         if ((vidp != NULL) && (vp != NULLVP))
4100                 *vidp = (uint32_t)vp->v_id;
4101
4102         proc_fdunlock(p);
4103
4104         return(0);
4105 }
4106
4107
4108 /*
4109  * file_socket
4110  *
4111  * Description: Given an fd, look it up in the current process's per process
4112  *              open file table, and return its internal socket pointer.
4113  *
4114  * Parameters:  fd                              fd to obtain vnode from
4115  *              sp                              pointer to socket return area
4116  *
4117  * Returns:     0                               Success
4118  *              ENOTSOCK                        Not a socket
4119  *              fp_lookup:EBADF                 Bad file descriptor
4120  *
4121  * Implicit returns:
4122  *              *sp (modified)                  Returned socket pointer
4123  *
4124  * Locks:       This function internally takes and drops the proc_fdlock for
4125  *              the current process
4126  *
4127  * Notes:       If successful, this function increments the f_iocount on the
4128  *              fd's corresponding fileproc.
4129  *
4130  *              The fileproc referenced is not returned; because of this, care
4131  *              must be taken to not drop the last reference (e.g. by closing
4132  *              the file).  This is inherently unsafe, since the reference may
4133  *              not be recoverable from the socket, if there is a subsequent
4134  *              close that destroys the associate fileproc.  The caller should
4135  *              therefore retain their own reference on the fileproc so that
4136  *              the f_iocount can be dropped subsequently.  Failure to do this
4137  *              can result in the returned pointer immediately becoming invalid
4138  *              following the call.
4139  *
4140  *              Use of this function is discouraged.
4141  */
4142 int
4143 file_socket(int fd, struct socket **sp)
4144 {
4145         proc_t p = current_proc();
4146         struct fileproc *fp;
4147         int error;
4148
4149         proc_fdlock_spin(p);
4150         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4151                 proc_fdunlock(p);
4152                 return(error);
4153         }
4154         if (fp->f_type != DTYPE_SOCKET) {
4155                 fp_drop(p, fd, fp,1);
4156                 proc_fdunlock(p);
4157                 return(ENOTSOCK);
4158         }
4159         *sp = (struct socket *)fp->f_data;
4160         proc_fdunlock(p);
4161
4162         return(0);
4163 }
4164
4165
4166 /*
4167  * file_flags
4168  *
4169  * Description: Given an fd, look it up in the current process's per process
4170  *              open file table, and return its fileproc's flags field.
4171  *
4172  * Parameters:  fd                              fd whose flags are to be
4173  *                                              retrieved
4174  *              flags                           pointer to flags data area
4175  *
4176  * Returns:     0                               Success
4177  *              ENOTSOCK                        Not a socket
4178  *              fp_lookup:EBADF                 Bad file descriptor
4179  *
4180  * Implicit returns:
4181  *              *flags (modified)               Returned flags field
4182  *
4183  * Locks:       This function internally takes and drops the proc_fdlock for
4184  *              the current process
4185  *
4186  * Notes:       This function will internally increment and decrement the
4187  *              f_iocount of the fileproc as part of its operation.
4188  */
4189 int
4190 file_flags(int fd, int *flags)
4191 {
4192
4193         proc_t p = current_proc();
4194         struct fileproc *fp;
4195         int error;
4196
4197         proc_fdlock_spin(p);
4198         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4199                 proc_fdunlock(p);
4200                 return(error);
4201         }
4202         *flags = (int)fp->f_flag;
4203         fp_drop(p, fd, fp,1);
4204         proc_fdunlock(p);
4205
4206         return(0);
4207 }
4208
4209
4210 /*
4211  * file_drop
4212  *
4213  * Description: Drop an iocount reference on an fd, and wake up any waiters
4214  *              for draining (i.e. blocked in fileproc_drain() called during
4215  *              the last attempt to close a file).
4216  *
4217  * Parameters:  fd                              fd on which an ioreference is
4218  *                                              to be dropped
4219  *
4220  * Returns:     0                               Success
4221  *              EBADF                           Bad file descriptor
4222  *
4223  * Description: Given an fd, look it up in the current process's per process
4224  *              open file table, and drop it's fileproc's f_iocount by one
4225  *
4226  * Notes:       This is intended as a corresponding operation to the functions
4227  *              file_vnode() and file_socket() operations.
4228  *
4229  *              Technically, the close reference is supposed to be protected
4230  *              by a fileproc_drain(), however, a drain will only block if
4231  *              the fd refers to a character device, and that device has had
4232  *              preparefileread() called on it.  If it refers to something
4233  *              other than a character device, then the drain will occur and
4234  *              block each close attempt, rather than merely the last close.
4235  *
4236  *              Since it's possible for an fd that refers to a character
4237  *              device to have an intermediate close followed by an open to
4238  *              cause a different file to correspond to that descriptor,
4239  *              unless there was a cautionary reference taken on the fileproc,
4240  *              this is an inherently unsafe function.  This happens in the
4241  *              case where multiple fd's in a process refer to the same
4242  *              character device (e.g. stdin/out/err pointing to a tty, etc.).
4243  *
4244  *              Use of this function is discouraged.
4245  */
4246 int
4247 file_drop(int fd)
4248 {
4249         struct fileproc *fp;
4250         proc_t p = current_proc();
4251         int     needwakeup = 0;
4252
4253         proc_fdlock_spin(p);
4254         if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
4255                         (fp = p->p_fd->fd_ofiles[fd]) == NULL ||
4256                         ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
4257                          !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
4258                 proc_fdunlock(p);
4259                 return (EBADF);
4260         }
4261         fp->f_iocount --;
4262
4263         if (fp->f_iocount == 0) {
4264                 if (fp->f_flags & FP_SELCONFLICT)
4265                         fp->f_flags &= ~FP_SELCONFLICT;
4266
4267                 if (p->p_fpdrainwait) {
4268                         p->p_fpdrainwait = 0;
4269                         needwakeup = 1;
4270                 }
4271         }
4272         proc_fdunlock(p);
4273
4274         if (needwakeup)
4275                 wakeup(&p->p_fpdrainwait);
4276         return(0);
4277 }
4278
4279
4280 static int falloc_withalloc_locked(proc_t, struct fileproc **, int *,
4281     vfs_context_t, struct fileproc * (*)(void *), void *, int);
4282
4283 /*
4284  * falloc
4285  *
4286  * Description: Allocate an entry in the per process open file table and
4287  *              return the corresponding fileproc and fd.
4288  *
4289  * Parameters:  p                               The process in whose open file
4290  *                                              table the fd is to be allocated
4291  *              resultfp                        Pointer to fileproc pointer
4292  *                                              return area
4293  *              resultfd                        Pointer to fd return area
4294  *              ctx                             VFS context
4295  *
4296  * Returns:     0                               Success
4297  *      falloc:ENFILE                           Too many open files in system
4298  *      falloc:EMFILE                           Too many open files in process
4299  *      falloc:ENOMEM                           M_FILEPROC or M_FILEGLOB zone
4300  *                                              exhausted
4301  *
4302  * Implicit returns:
4303  *              *resultfd (modified)            Returned fileproc pointer
4304  *              *resultfd (modified)            Returned fd
4305  *
4306  * Locks:       This function takes and drops the proc_fdlock; if this lock
4307  *              is already held, use falloc_locked() instead.
4308  *
4309  * Notes:       This function takes separate process and context arguments
4310  *              solely to support kern_exec.c; otherwise, it would take
4311  *              neither, and expect falloc_locked() to use the
4312  *              vfs_context_current() routine internally.
4313  */
4314 int
4315 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4316 {
4317         return (falloc_withalloc(p, resultfp, resultfd, ctx,
4318             fileproc_alloc_init, NULL));
4319 }
4320
4321 /*
4322  * Like falloc, but including the fileproc allocator and create-args
4323  */
4324 int
4325 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4326     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *arg)
4327 {
4328         int error;
4329
4330         proc_fdlock(p);
4331         error = falloc_withalloc_locked(p,
4332             resultfp, resultfd, ctx, fp_zalloc, arg, 1);
4333         proc_fdunlock(p);
4334
4335         return (error);
4336 }
4337
4338 /*
4339  * "uninitialized" ops -- ensure fg->fg_ops->fo_type always exists
4340  */
4341 static const struct fileops uninitops;
4342
4343 /*
4344  * falloc_locked
4345  *
4346  * Create a new open file structure and allocate
4347  * a file descriptor for the process that refers to it.
4348  *
4349  * Returns:     0                       Success
4350  *
4351  * Description: Allocate an entry in the per process open file table and
4352  *              return the corresponding fileproc and fd.
4353  *
4354  * Parameters:  p                               The process in whose open file
4355  *                                              table the fd is to be allocated
4356  *              resultfp                        Pointer to fileproc pointer
4357  *                                              return area
4358  *              resultfd                        Pointer to fd return area
4359  *              ctx                             VFS context
4360  *              locked                          Flag to indicate whether the
4361  *                                              caller holds proc_fdlock
4362  *
4363  * Returns:     0                               Success
4364  *              ENFILE                          Too many open files in system
4365  *              fdalloc:EMFILE                  Too many open files in process
4366  *              ENOMEM                          M_FILEPROC or M_FILEGLOB zone
4367  *                                              exhausted
4368  *      fdalloc:ENOMEM
4369  *
4370  * Implicit returns:
4371  *              *resultfd (modified)            Returned fileproc pointer
4372  *              *resultfd (modified)            Returned fd
4373  *
4374  * Locks:       If the parameter 'locked' is zero, this function takes and
4375  *              drops the proc_fdlock; if non-zero, the caller must hold the
4376  *              lock.
4377  *
4378  * Notes:       If you intend to use a non-zero 'locked' parameter, use the
4379  *              utility function falloc() instead.
4380  *
4381  *              This function takes separate process and context arguments
4382  *              solely to support kern_exec.c; otherwise, it would take
4383  *              neither, and use the vfs_context_current() routine internally.
4384  */
4385 int
4386 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4387               vfs_context_t ctx, int locked)
4388 {
4389         return (falloc_withalloc_locked(p, resultfp, resultfd, ctx,
4390             fileproc_alloc_init, NULL, locked));
4391 }
4392
4393 static int
4394 falloc_withalloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4395     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg,
4396     int locked)
4397 {
4398         struct fileproc *fp;
4399         struct fileglob *fg;
4400         int error, nfd;
4401
4402         if (!locked)
4403                 proc_fdlock(p);
4404         if ( (error = fdalloc(p, 0, &nfd)) ) {
4405                 if (!locked)
4406                         proc_fdunlock(p);
4407                 return (error);
4408         }
4409         if (nfiles >= maxfiles) {
4410                 if (!locked)
4411                         proc_fdunlock(p);
4412                 tablefull("file");
4413                 return (ENFILE);
4414         }
4415 #if CONFIG_MACF
4416         error = mac_file_check_create(proc_ucred(p));
4417         if (error) {
4418                 if (!locked)
4419                         proc_fdunlock(p);
4420                 return (error);
4421         }
4422 #endif
4423
4424         /*
4425          * Allocate a new file descriptor.
4426          * If the process has file descriptor zero open, add to the list
4427          * of open files at that point, otherwise put it at the front of
4428          * the list of open files.
4429          */
4430         proc_fdunlock(p);
4431
4432         fp = (*fp_zalloc)(crarg);
4433         if (fp == NULL) {
4434                 if (locked)
4435                         proc_fdlock(p);
4436                 return (ENOMEM);
4437         }
4438         MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
4439         if (fg == NULL) {
4440                 fileproc_free(fp);
4441                 if (locked)
4442                         proc_fdlock(p);
4443                 return (ENOMEM);
4444         }
4445         bzero(fg, sizeof(struct fileglob));
4446         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4447
4448         fp->f_iocount = 1;
4449         fg->fg_count = 1;
4450         fg->fg_ops = &uninitops;
4451         fp->f_fglob = fg;
4452 #if CONFIG_MACF
4453         mac_file_label_init(fg);
4454 #endif
4455
4456         kauth_cred_ref(ctx->vc_ucred);
4457
4458         proc_fdlock(p);
4459
4460         fp->f_cred = ctx->vc_ucred;
4461
4462 #if CONFIG_MACF
4463         mac_file_label_associate(fp->f_cred, fg);
4464 #endif
4465
4466         OSAddAtomic(1, &nfiles);
4467
4468         p->p_fd->fd_ofiles[nfd] = fp;
4469
4470         if (!locked)
4471                 proc_fdunlock(p);
4472
4473         if (resultfp)
4474                 *resultfp = fp;
4475         if (resultfd)
4476                 *resultfd = nfd;
4477
4478         return (0);
4479 }
4480
4481
4482 /*
4483  * fg_free
4484  *
4485  * Description: Free a file structure; drop the global open file count, and
4486  *              drop the credential reference, if the fileglob has one, and
4487  *              destroy the instance mutex before freeing
4488  *
4489  * Parameters:  fg                              Pointer to fileglob to be
4490  *                                              freed
4491  *
4492  * Returns:     void
4493  */
4494 void
4495 fg_free(struct fileglob *fg)
4496 {
4497         OSAddAtomic(-1, &nfiles);
4498
4499         if (fg->fg_vn_data) {
4500                 fg_vn_data_free(fg->fg_vn_data);
4501                 fg->fg_vn_data = NULL;
4502         }
4503
4504         if (IS_VALID_CRED(fg->fg_cred)) {
4505                 kauth_cred_unref(&fg->fg_cred);
4506         }
4507         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4508
4509 #if CONFIG_MACF
4510         mac_file_label_destroy(fg);
4511 #endif
4512         FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4513 }
4514
4515
4516 /*
4517  * fdexec
4518  *
4519  * Description: Perform close-on-exec processing for all files in a process
4520  *              that are either marked as close-on-exec, or which were in the
4521  *              process of being opened at the time of the execve
4522  *
4523  *              Also handles the case (via posix_spawn()) where -all-
4524  *              files except those marked with "inherit" as treated as
4525  *              close-on-exec.
4526  *
4527  * Parameters:  p                               Pointer to process calling
4528  *                                              execve
4529  *
4530  * Returns:     void
4531  *
4532  * Locks:       This function internally takes and drops proc_fdlock()
4533  *
4534  */
4535 void
4536 fdexec(proc_t p, short flags)
4537 {
4538         struct filedesc *fdp = p->p_fd;
4539         int i;
4540         boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4541
4542         proc_fdlock(p);
4543         for (i = fdp->fd_lastfile; i >= 0; i--) {
4544
4545                 struct fileproc *fp = fdp->fd_ofiles[i];
4546                 char *flagp = &fdp->fd_ofileflags[i];
4547
4548                 if (fp && cloexec_default) {
4549                         /*
4550                          * Reverse the usual semantics of file descriptor
4551                          * inheritance - all of them should be closed
4552                          * except files marked explicitly as "inherit" and
4553                          * not marked close-on-exec.
4554                          */
4555                         if ((*flagp & (UF_EXCLOSE|UF_INHERIT)) != UF_INHERIT)
4556                                 *flagp |= UF_EXCLOSE;
4557                         *flagp &= ~UF_INHERIT;
4558                 }
4559
4560                 if (
4561                     ((*flagp & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
4562 #if CONFIG_MACF
4563                     || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4564 #endif
4565                 ) {
4566                         if (i < fdp->fd_knlistsize)
4567                                 knote_fdclose(p, i);
4568                         procfdtbl_clearfd(p, i);
4569                         if (i == fdp->fd_lastfile && i > 0)
4570                                 fdp->fd_lastfile--;
4571                         if (i < fdp->fd_freefile)
4572                                 fdp->fd_freefile = i;
4573
4574                         /*
4575                          * Wait for any third party viewers (e.g., lsof)
4576                          * to release their references to this fileproc.
4577                          */
4578                         while (fp->f_iocount > 0) {
4579                                 p->p_fpdrainwait = 1;
4580                                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4581                                     "fpdrain", NULL);
4582                         }
4583
4584                         closef_locked(fp, fp->f_fglob, p);
4585
4586                         fileproc_free(fp);
4587                 }
4588         }
4589         proc_fdunlock(p);
4590 }
4591
4592
4593 /*
4594  * fdcopy
4595  *
4596  * Description: Copy a filedesc structure.  This is normally used as part of
4597  *              forkproc() when forking a new process, to copy the per process
4598  *              open file table over to the new process.
4599  *
4600  * Parameters:  p                               Process whose open file table
4601  *                                              is to be copied (parent)
4602  *              uth_cdir                        Per thread current working
4603  *                                              cirectory, or NULL
4604  *
4605  * Returns:     NULL                            Copy failed
4606  *              !NULL                           Pointer to new struct filedesc
4607  *
4608  * Locks:       This function internally takes and drops proc_fdlock()
4609  *
4610  * Notes:       Files are copied directly, ignoring the new resource limits
4611  *              for the process that's being copied into.  Since the descriptor
4612  *              references are just additional references, this does not count
4613  *              against the number of open files on the system.
4614  *
4615  *              The struct filedesc includes the current working directory,
4616  *              and the current root directory, if the process is chroot'ed.
4617  *
4618  *              If the exec was called by a thread using a per thread current
4619  *              working directory, we inherit the working directory from the
4620  *              thread making the call, rather than from the process.
4621  *
4622  *              In the case of a failure to obtain a reference, for most cases,
4623  *              the file entry will be silently dropped.  There's an exception
4624  *              for the case of a chroot dir, since a failure to to obtain a
4625  *              reference there would constitute an "escape" from the chroot
4626  *              environment, which must not be allowed.  In that case, we will
4627  *              deny the execve() operation, rather than allowing the escape.
4628  */
4629 struct filedesc *
4630 fdcopy(proc_t p, vnode_t uth_cdir)
4631 {
4632         struct filedesc *newfdp, *fdp = p->p_fd;
4633         int i;
4634         struct fileproc *ofp, *fp;
4635         vnode_t v_dir;
4636
4637         MALLOC_ZONE(newfdp, struct filedesc *,
4638                         sizeof(*newfdp), M_FILEDESC, M_WAITOK);
4639         if (newfdp == NULL)
4640                 return(NULL);
4641
4642         proc_fdlock(p);
4643
4644         /*
4645          * the FD_CHROOT flag will be inherited via this copy
4646          */
4647         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4648
4649         /*
4650          * If we are running with per-thread current working directories,
4651          * inherit the new current working directory from the current thread
4652          * instead, before we take our references.
4653          */
4654         if (uth_cdir != NULLVP)
4655                 newfdp->fd_cdir = uth_cdir;
4656
4657         /*
4658          * For both fd_cdir and fd_rdir make sure we get
4659          * a valid reference... if we can't, than set
4660          * set the pointer(s) to NULL in the child... this
4661          * will keep us from using a non-referenced vp
4662          * and allows us to do the vnode_rele only on
4663          * a properly referenced vp
4664          */
4665         if ( (v_dir = newfdp->fd_cdir) ) {
4666                 if (vnode_getwithref(v_dir) == 0) {
4667                         if ( (vnode_ref(v_dir)) )
4668                                 newfdp->fd_cdir = NULL;
4669                         vnode_put(v_dir);
4670                 } else
4671                         newfdp->fd_cdir = NULL;
4672         }
4673         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4674                 /*
4675                  * we couldn't get a new reference on
4676                  * the current working directory being
4677                  * inherited... we might as well drop
4678                  * our reference from the parent also
4679                  * since the vnode has gone DEAD making
4680                  * it useless... by dropping it we'll
4681                  * be that much closer to recycling it
4682                  */
4683                 vnode_rele(fdp->fd_cdir);
4684                 fdp->fd_cdir = NULL;
4685         }
4686
4687         if ( (v_dir = newfdp->fd_rdir) ) {
4688                 if (vnode_getwithref(v_dir) == 0) {
4689                         if ( (vnode_ref(v_dir)) )
4690                                 newfdp->fd_rdir = NULL;
4691                         vnode_put(v_dir);
4692                 } else {
4693                         newfdp->fd_rdir = NULL;
4694                 }
4695         }
4696         /* Coming from a chroot environment and unable to get a reference... */
4697         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4698                 /*
4699                  * We couldn't get a new reference on
4700                  * the chroot directory being
4701                  * inherited... this is fatal, since
4702                  * otherwise it would constitute an
4703                  * escape from a chroot environment by
4704                  * the new process.
4705                  */
4706                 if (newfdp->fd_cdir)
4707                         vnode_rele(newfdp->fd_cdir);
4708                 FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
4709                 return(NULL);
4710         }
4711
4712         /*
4713          * If the number of open files fits in the internal arrays
4714          * of the open file structure, use them, otherwise allocate
4715          * additional memory for the number of descriptors currently
4716          * in use.
4717          */
4718         if (newfdp->fd_lastfile < NDFILE)
4719                 i = NDFILE;
4720         else {
4721                 /*
4722                  * Compute the smallest multiple of NDEXTENT needed
4723                  * for the file descriptors currently in use,
4724                  * allowing the table to shrink.
4725                  */
4726                 i = newfdp->fd_nfiles;
4727                 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
4728                         i /= 2;
4729         }
4730         proc_fdunlock(p);
4731
4732         MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
4733                                 i * OFILESIZE, M_OFILETABL, M_WAITOK);
4734         if (newfdp->fd_ofiles == NULL) {
4735                 if (newfdp->fd_cdir)
4736                         vnode_rele(newfdp->fd_cdir);
4737                 if (newfdp->fd_rdir)
4738                         vnode_rele(newfdp->fd_rdir);
4739
4740                 FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
4741                 return(NULL);
4742         }
4743         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4744         proc_fdlock(p);
4745
4746         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4747         newfdp->fd_nfiles = i;
4748
4749         if (fdp->fd_nfiles > 0) {
4750                 struct fileproc **fpp;
4751                 char *flags;
4752
4753                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4754                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4755                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4756                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4757
4758                 /*
4759                  * kq descriptors cannot be copied.
4760                  */
4761                 if (newfdp->fd_knlistsize != -1) {
4762                         fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4763                         flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4764                         for (i = newfdp->fd_lastfile;
4765                             i >= 0; i--, fpp--, flags--) {
4766                                 if (*flags & UF_RESERVED)
4767                                         continue;       /* (removed below) */
4768                                 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
4769                                         *fpp = NULL;
4770                                         *flags = 0;
4771                                         if (i < newfdp->fd_freefile)
4772                                                 newfdp->fd_freefile = i;
4773                                 }
4774                                 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
4775                                         newfdp->fd_lastfile--;
4776                         }
4777                         newfdp->fd_knlist = NULL;
4778                         newfdp->fd_knlistsize = -1;
4779                         newfdp->fd_knhash = NULL;
4780                         newfdp->fd_knhashmask = 0;
4781                 }
4782                 fpp = newfdp->fd_ofiles;
4783                 flags = newfdp->fd_ofileflags;
4784
4785                 for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++)
4786                         if ((ofp = *fpp) != NULL &&
4787                             0 == (*flags & (UF_FORKCLOSE|UF_RESERVED))) {
4788 #if DEBUG
4789                                 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE)
4790                                         panic("complex fileproc");
4791 #endif
4792                                 fp = fileproc_alloc_init(NULL);
4793                                 if (fp == NULL) {
4794                                         /*
4795                                          * XXX no room to copy, unable to
4796                                          * XXX safely unwind state at present
4797                                          */
4798                                         *fpp = NULL;
4799                                 } else {
4800                                         fp->f_flags |=
4801                                             (ofp->f_flags & ~FP_TYPEMASK);
4802                                         fp->f_fglob = ofp->f_fglob;
4803                                         (void)fg_ref(fp);
4804                                         *fpp = fp;
4805                                 }
4806                         } else {
4807                                 if (i < newfdp->fd_freefile)
4808                                         newfdp->fd_freefile = i;
4809                                 *fpp = NULL;
4810                                 *flags = 0;
4811                         }
4812         }
4813
4814         proc_fdunlock(p);
4815         return (newfdp);
4816 }
4817
4818
4819 /*
4820  * fdfree
4821  *
4822  * Description: Release a filedesc (per process open file table) structure;
4823  *              this is done on process exit(), or from forkproc_free() if
4824  *              the fork fails for some reason subsequent to a successful
4825  *              call to fdcopy()
4826  *
4827  * Parameters:  p                               Pointer to process going away
4828  *
4829  * Returns:     void
4830  *
4831  * Locks:       This function internally takes and drops proc_fdlock()
4832  */
4833 void
4834 fdfree(proc_t p)
4835 {
4836         struct filedesc *fdp;
4837         struct fileproc *fp;
4838         int i;
4839
4840         proc_fdlock(p);
4841
4842         if (p == kernproc || NULL == (fdp = p->p_fd)) {
4843                 proc_fdunlock(p);
4844                 return;
4845         }
4846
4847         extern struct filedesc filedesc0;
4848
4849         if (&filedesc0 == fdp)
4850                 panic("filedesc0");
4851
4852         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
4853                 for (i = fdp->fd_lastfile; i >= 0; i--) {
4854                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
4855
4856                           if (fdp->fd_ofileflags[i] & UF_RESERVED)
4857                                 panic("fdfree: found fp with UF_RESERVED");
4858
4859                                 procfdtbl_reservefd(p, i);
4860
4861                                 if (i < fdp->fd_knlistsize)
4862                                         knote_fdclose(p, i);
4863                                 if (fp->f_flags & FP_WAITEVENT)
4864                                         (void)waitevent_close(p, fp);
4865                                 (void) closef_locked(fp, fp->f_fglob, p);
4866                                 fileproc_free(fp);
4867                         }
4868                 }
4869                 FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
4870                 fdp->fd_ofiles = NULL;
4871                 fdp->fd_nfiles = 0;
4872         }
4873
4874         proc_fdunlock(p);
4875
4876         if (fdp->fd_cdir)
4877                 vnode_rele(fdp->fd_cdir);
4878         if (fdp->fd_rdir)
4879                 vnode_rele(fdp->fd_rdir);
4880
4881         proc_fdlock_spin(p);
4882         p->p_fd = NULL;
4883         proc_fdunlock(p);
4884
4885         if (fdp->fd_knlist)
4886                 FREE(fdp->fd_knlist, M_KQUEUE);
4887         if (fdp->fd_knhash)
4888                 FREE(fdp->fd_knhash, M_KQUEUE);
4889
4890         FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
4891 }
4892
4893 /*
4894  * closef_locked
4895  *
4896  * Description: Internal form of closef; called with proc_fdlock held
4897  *
4898  * Parameters:  fp                      Pointer to fileproc for fd
4899  *              fg                      Pointer to fileglob for fd
4900  *              p                       Pointer to proc structure
4901  *
4902  * Returns:     0                       Success
4903  *      closef_finish:???               Anything returnable by a per-fileops
4904  *                                      close function
4905  *
4906  * Note:        Decrements reference count on file structure; if this was the
4907  *              last reference, then closef_finish() is called
4908  *
4909  *              p and fp are allowed to  be NULL when closing a file that was
4910  *              being passed in a message (but only if we are called when this
4911  *              is NOT the last reference).
4912  */
4913 int
4914 closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
4915 {
4916         struct vnode *vp;
4917         struct flock lf;
4918         struct vfs_context context;
4919         int error;
4920
4921         if (fg == NULL) {
4922                 return (0);
4923         }
4924
4925         /* Set up context with cred stashed in fg */
4926         if (p == current_proc())
4927                 context.vc_thread = current_thread();
4928         else
4929                 context.vc_thread = NULL;
4930         context.vc_ucred = fg->fg_cred;
4931
4932         /*
4933          * POSIX record locking dictates that any close releases ALL
4934          * locks owned by this process.  This is handled by setting
4935          * a flag in the unlock to free ONLY locks obeying POSIX
4936          * semantics, and not to free BSD-style file locks.
4937          * If the descriptor was in a message, POSIX-style locks
4938          * aren't passed with the descriptor.
4939          */
4940         if (p && (p->p_ladvflag & P_LADVLOCK) &&
4941             DTYPE_VNODE == FILEGLOB_DTYPE(fg)) {
4942                 proc_fdunlock(p);
4943
4944                 lf.l_whence = SEEK_SET;
4945                 lf.l_start = 0;
4946                 lf.l_len = 0;
4947                 lf.l_type = F_UNLCK;
4948                 vp = (struct vnode *)fg->fg_data;
4949
4950                 if ( (error = vnode_getwithref(vp)) == 0 ) {
4951                         (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
4952                         (void)vnode_put(vp);
4953                 }
4954                 proc_fdlock(p);
4955         }
4956         lck_mtx_lock_spin(&fg->fg_lock);
4957         fg->fg_count--;
4958
4959         if (fg->fg_count > 0) {
4960                 lck_mtx_unlock(&fg->fg_lock);
4961                 return (0);
4962         }
4963 #if DIAGNOSTIC
4964         if (fg->fg_count != 0)
4965                 panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
4966 #endif
4967
4968         if (fp && (fp->f_flags & FP_WRITTEN))
4969                 fg->fg_flag |= FWASWRITTEN;
4970
4971         fg->fg_lflags |= FG_TERM;
4972         lck_mtx_unlock(&fg->fg_lock);
4973
4974         if (p)
4975                 proc_fdunlock(p);
4976
4977         /* Since we ensure that fg->fg_ops is always initialized,
4978          * it is safe to invoke fo_close on the fg */
4979         error = fo_close(fg, &context);
4980
4981         fg_free(fg);
4982
4983         if (p)
4984                 proc_fdlock(p);
4985
4986         return(error);
4987 }
4988
4989
4990 /*
4991  * fileproc_drain
4992  *
4993  * Description: Drain out pending I/O operations
4994  *
4995  * Parameters:  p                               Process closing this file
4996  *              fp                              fileproc struct for the open
4997  *                                              instance on the file
4998  *
4999  * Returns:     void
5000  *
5001  * Locks:       Assumes the caller holds the proc_fdlock
5002  *
5003  * Notes:       For character devices, this occurs on the last close of the
5004  *              device; for all other file descriptors, this occurs on each
5005  *              close to prevent fd's from being closed out from under
5006  *              operations currently in progress and blocked
5007  *
5008  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
5009  *              regarding their use and interaction with this function.
5010  */
5011 void
5012 fileproc_drain(proc_t p, struct fileproc * fp)
5013 {
5014         struct vfs_context context;
5015
5016         context.vc_thread = proc_thread(p);     /* XXX */
5017         context.vc_ucred = fp->f_fglob->fg_cred;
5018
5019         fp->f_iocount-- ; /* (the one the close holds) */
5020
5021         while (fp->f_iocount) {
5022
5023                 lck_mtx_convert_spin(&p->p_fdmlock);
5024
5025                 if (fp->f_fglob->fg_ops->fo_drain) {
5026                         (*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
5027                 }
5028                 if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
5029                         if (wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
5030                                 panic("bad wait queue for wait_queue_wakeup_all %p", fp->f_waddr);
5031                 }
5032                 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5033                         if (wait_queue_wakeup_all(&select_conflict_queue, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
5034                                 panic("bad select_conflict_queue");
5035                 }
5036                 p->p_fpdrainwait = 1;
5037
5038                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5039
5040         }
5041 #if DIAGNOSTIC
5042         if ((fp->f_flags & FP_INSELECT) != 0)
5043                 panic("FP_INSELECT set on drained fp");
5044 #endif
5045         if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT)
5046                 fp->f_flags &= ~FP_SELCONFLICT;
5047 }
5048
5049
5050 /*
5051  * fp_free
5052  *
5053  * Description: Release the fd and free the fileproc associated with the fd
5054  *              in the per process open file table of the specified process;
5055  *              these values must correspond.
5056  *
5057  * Parameters:  p                               Process containing fd
5058  *              fd                              fd to be released
5059  *              fp                              fileproc to be freed
5060  *
5061  * Returns:     0                               Success
5062  *
5063  * Notes:       XXX function should be void - no one interprets the returns
5064  *              XXX code
5065  */
5066 int
5067 fp_free(proc_t p, int fd, struct fileproc * fp)
5068 {
5069         proc_fdlock_spin(p);
5070         fdrelse(p, fd);
5071         proc_fdunlock(p);
5072
5073         fg_free(fp->f_fglob);
5074         fileproc_free(fp);
5075         return(0);
5076 }
5077
5078
5079 /*
5080  * flock
5081  *
5082  * Description: Apply an advisory lock on a file descriptor.
5083  *
5084  * Parameters:  p                               Process making request
5085  *              uap->fd                         fd on which the lock is to be
5086  *                                              attempted
5087  *              uap->how                        (Un)Lock bits, including type
5088  *              retval                          Pointer to the call return area
5089  *
5090  * Returns:     0                               Success
5091  *      fp_getfvp:EBADF                         Bad file descriptor
5092  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5093  *      vnode_getwithref:???
5094  *      VNOP_ADVLOCK:???
5095  *
5096  * Implicit returns:
5097  *              *retval (modified)              Size of dtable
5098  *
5099  * Notes:       Just attempt to get a record lock of the requested type on
5100  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5101  */
5102 int
5103 flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5104 {
5105         int fd = uap->fd;
5106         int how = uap->how;
5107         struct fileproc *fp;
5108         struct vnode *vp;
5109         struct flock lf;
5110         vfs_context_t ctx = vfs_context_current();
5111         int error=0;
5112
5113         AUDIT_ARG(fd, uap->fd);
5114         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5115                 return(error);
5116         }
5117         if ( (error = vnode_getwithref(vp)) ) {
5118                 goto out1;
5119         }
5120         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5121
5122         lf.l_whence = SEEK_SET;
5123         lf.l_start = 0;
5124         lf.l_len = 0;
5125         if (how & LOCK_UN) {
5126                 lf.l_type = F_UNLCK;
5127                 fp->f_flag &= ~FHASLOCK;
5128                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5129                 goto out;
5130         }
5131         if (how & LOCK_EX)
5132                 lf.l_type = F_WRLCK;
5133         else if (how & LOCK_SH)
5134                 lf.l_type = F_RDLCK;
5135         else {
5136                 error = EBADF;
5137                 goto out;
5138         }
5139 #if CONFIG_MACF
5140         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
5141         if (error)
5142                 goto out;
5143 #endif
5144         fp->f_flag |= FHASLOCK;
5145         if (how & LOCK_NB) {
5146                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx, NULL);
5147                 goto out;
5148         }
5149         error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx, NULL);
5150 out:
5151         (void)vnode_put(vp);
5152 out1:
5153         fp_drop(p, fd, fp, 0);
5154         return(error);
5155
5156 }
5157
5158 /*
5159  * fileport_makeport
5160  *
5161  * Description: Obtain a Mach send right for a given file descriptor.
5162  *
5163  * Parameters:  p               Process calling fileport
5164  *              uap->fd         The fd to reference
5165  *              uap->portnamep  User address at which to place port name.
5166  *
5167  * Returns:     0               Success.
5168  *              EBADF           Bad file descriptor.
5169  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
5170  *              EFAULT          Address at which to store port name is not valid.
5171  *              EAGAIN          Resource shortage.
5172  *
5173  * Implicit returns:
5174  *              On success, name of send right is stored at user-specified address.
5175  */
5176 int
5177 fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5178     __unused int *retval)
5179 {
5180         int err;
5181         int fd = uap->fd;
5182         user_addr_t user_portaddr = uap->portnamep;
5183         struct fileproc *fp = FILEPROC_NULL;
5184         struct fileglob *fg = NULL;
5185         ipc_port_t fileport;
5186         mach_port_name_t name = MACH_PORT_NULL;
5187
5188         err = fp_lookup(p, fd, &fp, 0);
5189         if (err != 0) {
5190                 goto out;
5191         }
5192
5193         if (!filetype_issendable(fp->f_type)) {
5194                 err = EINVAL;
5195                 goto out;
5196         }
5197
5198         if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5199                 proc_fdlock(p);
5200                 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5201                 proc_fdunlock(p);
5202                 goto out;
5203         }
5204
5205         /* Dropped when port is deallocated */
5206         fg = fp->f_fglob;
5207         fg_ref(fp);
5208
5209         /* Allocate and initialize a port */
5210         fileport = fileport_alloc(fg);
5211         if (fileport == IPC_PORT_NULL) {
5212                 err = EAGAIN;
5213                 fg_drop(fp);
5214                 goto out;
5215         }
5216
5217         /* Add an entry.  Deallocates port on failure. */
5218         name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5219         if (!MACH_PORT_VALID(name)) {
5220                 err = EINVAL;
5221                 goto out;
5222         }
5223
5224         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5225         if (err != 0) {
5226                 goto out;
5227         }
5228
5229         /* Tag the fileglob for debugging purposes */
5230         lck_mtx_lock_spin(&fg->fg_lock);
5231         fg->fg_lflags |= FG_PORTMADE;
5232         lck_mtx_unlock(&fg->fg_lock);
5233
5234         fp_drop(p, fd, fp, 0);
5235
5236         return 0;
5237
5238 out:
5239         if (MACH_PORT_VALID(name)) {
5240                 /* Don't care if another thread races us to deallocate the entry */
5241                 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5242         }
5243
5244         if (fp != FILEPROC_NULL) {
5245                 fp_drop(p, fd, fp, 0);
5246         }
5247
5248         return err;
5249 }
5250
5251 void
5252 fileport_releasefg(struct fileglob *fg)
5253 {
5254         (void)closef_locked(NULL, fg, PROC_NULL);
5255
5256         return;
5257 }
5258
5259
5260 /*
5261  * fileport_makefd
5262  *
5263  * Description: Obtain the file descriptor for a given Mach send right.
5264  *
5265  * Parameters:  p               Process calling fileport
5266  *              uap->port       Name of send right to file port.
5267  *
5268  * Returns:     0               Success
5269  *              EINVAL          Invalid Mach port name, or port is not for a file.
5270  *      fdalloc:EMFILE
5271  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5272  *
5273  * Implicit returns:
5274  *              *retval (modified)              The new descriptor
5275  */
5276 int
5277 fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5278 {
5279         struct fileglob *fg;
5280         struct fileproc *fp = FILEPROC_NULL;
5281         ipc_port_t port = IPC_PORT_NULL;
5282         mach_port_name_t send = uap->port;
5283         kern_return_t res;
5284         int fd;
5285         int err;
5286
5287         res = ipc_object_copyin(get_task_ipcspace(p->task),
5288                         send, MACH_MSG_TYPE_COPY_SEND, &port);
5289
5290         if (res != KERN_SUCCESS) {
5291                 err = EINVAL;
5292                 goto out;
5293         }
5294
5295         fg = fileport_port_to_fileglob(port);
5296         if (fg == NULL) {
5297                 err = EINVAL;
5298                 goto out;
5299         }
5300
5301         fp = fileproc_alloc_init(NULL);
5302         if (fp == FILEPROC_NULL) {
5303                 err = ENOMEM;
5304                 goto out;
5305         }
5306
5307         fp->f_fglob = fg;
5308         fg_ref(fp);
5309
5310         proc_fdlock(p);
5311         err = fdalloc(p, 0, &fd);
5312         if (err != 0) {
5313                 proc_fdunlock(p);
5314                 goto out;
5315         }
5316         *fdflags(p, fd) |= UF_EXCLOSE;
5317
5318         procfdtbl_releasefd(p, fd, fp);
5319         proc_fdunlock(p);
5320
5321         *retval = fd;
5322         err = 0;
5323 out:
5324         if ((fp != NULL) && (0 != err)) {
5325                 fileproc_free(fp);
5326         }
5327
5328         if (IPC_PORT_NULL != port) {
5329                 ipc_port_release_send(port);
5330         }
5331
5332         return err;
5333 }
5334
5335
5336 /*
5337  * dupfdopen
5338  *
5339  * Description: Duplicate the specified descriptor to a free descriptor;
5340  *              this is the second half of fdopen(), above.
5341  *
5342  * Parameters:  fdp                             filedesc pointer to fill in
5343  *              indx                            fd to dup to
5344  *              dfd                             fd to dup from
5345  *              mode                            mode to set on new fd
5346  *              error                           command code
5347  *
5348  * Returns:     0                               Success
5349  *              EBADF                           Source fd is bad
5350  *              EACCES                          Requested mode not allowed
5351  *              !0                              'error', if not ENODEV or
5352  *                                              ENXIO
5353  *
5354  * Notes:       XXX This is not thread safe; see fdopen() above
5355  */
5356 int
5357 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5358 {
5359         struct fileproc *wfp;
5360         struct fileproc *fp;
5361 #if CONFIG_MACF
5362         int myerror;
5363 #endif
5364         proc_t p = current_proc();
5365
5366         /*
5367          * If the to-be-dup'd fd number is greater than the allowed number
5368          * of file descriptors, or the fd to be dup'd has already been
5369          * closed, reject.  Note, check for new == old is necessary as
5370          * falloc could allocate an already closed to-be-dup'd descriptor
5371          * as the new descriptor.
5372          */
5373         proc_fdlock(p);
5374
5375         fp = fdp->fd_ofiles[indx];
5376         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5377                         (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5378                         (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5379
5380                 proc_fdunlock(p);
5381                 return (EBADF);
5382         }
5383 #if CONFIG_MACF
5384         myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5385         if (myerror) {
5386                 proc_fdunlock(p);
5387                 return (myerror);
5388         }
5389 #endif
5390         /*
5391          * There are two cases of interest here.
5392          *
5393          * For ENODEV simply dup (dfd) to file descriptor
5394          * (indx) and return.
5395          *
5396          * For ENXIO steal away the file structure from (dfd) and
5397          * store it in (indx).  (dfd) is effectively closed by
5398          * this operation.
5399          *
5400          * Any other error code is just returned.
5401          */
5402         switch (error) {
5403         case ENODEV:
5404                 if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5405                         int err = fp_guard_exception(p,
5406                             dfd, wfp, kGUARD_EXC_DUP);
5407                         proc_fdunlock(p);
5408                         return (err);
5409                 }
5410
5411                 /*
5412                  * Check that the mode the file is being opened for is a
5413                  * subset of the mode of the existing descriptor.
5414                  */
5415                 if (((flags & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5416                         proc_fdunlock(p);
5417                         return (EACCES);
5418                 }
5419                 if (indx > fdp->fd_lastfile)
5420                         fdp->fd_lastfile = indx;
5421                 (void)fg_ref(wfp);
5422
5423                 if (fp->f_fglob)
5424                         fg_free(fp->f_fglob);
5425                 fp->f_fglob = wfp->f_fglob;
5426
5427                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5428                         (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5429
5430                 proc_fdunlock(p);
5431                 return (0);
5432
5433         default:
5434                 proc_fdunlock(p);
5435                 return (error);
5436         }
5437         /* NOTREACHED */
5438 }
5439
5440
5441 /*
5442  * fg_ref
5443  *
5444  * Description: Add a reference to a fileglob by fileproc
5445  *
5446  * Parameters:  fp                              fileproc containing fileglob
5447  *                                              pointer
5448  *
5449  * Returns:     void
5450  *
5451  * Notes:       XXX Should use OSAddAtomic?
5452  */
5453 void
5454 fg_ref(struct fileproc * fp)
5455 {
5456         struct fileglob *fg;
5457
5458         fg = fp->f_fglob;
5459
5460         lck_mtx_lock_spin(&fg->fg_lock);
5461
5462 #if DIAGNOSTIC
5463         if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0)
5464                 panic("fg_ref: invalid bits on fp %p", fp);
5465
5466         if (fg->fg_count == 0)
5467                 panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5468                     fp, fg);
5469 #endif
5470         fg->fg_count++;
5471         lck_mtx_unlock(&fg->fg_lock);
5472 }
5473
5474
5475 /*
5476  * fg_drop
5477  *
5478  * Description: Remove a reference to a fileglob by fileproc
5479  *
5480  * Parameters:  fp                              fileproc containing fileglob
5481  *                                              pointer
5482  *
5483  * Returns:     void
5484  *
5485  * Notes:       XXX Should use OSAddAtomic?
5486  */
5487 void
5488 fg_drop(struct fileproc * fp)
5489 {
5490         struct fileglob *fg;
5491
5492         fg = fp->f_fglob;
5493         lck_mtx_lock_spin(&fg->fg_lock);
5494         fg->fg_count--;
5495         lck_mtx_unlock(&fg->fg_lock);
5496 }
5497
5498 #if SOCKETS
5499 /*
5500  * fg_insertuipc
5501  *
5502  * Description: Insert fileglob onto message queue
5503  *
5504  * Parameters:  fg                              Fileglob pointer to insert
5505  *
5506  * Returns:     void
5507  *
5508  * Locks:       Takes and drops fg_lock, potentially many times
5509  */
5510 void
5511 fg_insertuipc(struct fileglob * fg)
5512 {
5513         int insertque = 0;
5514
5515         lck_mtx_lock_spin(&fg->fg_lock);
5516
5517         while (fg->fg_lflags & FG_RMMSGQ) {
5518                 lck_mtx_convert_spin(&fg->fg_lock);
5519
5520                 fg->fg_lflags |= FG_WRMMSGQ;
5521                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
5522         }
5523
5524         fg->fg_count++;
5525         fg->fg_msgcount++;
5526         if (fg->fg_msgcount == 1) {
5527                 fg->fg_lflags |= FG_INSMSGQ;
5528                 insertque=1;
5529         }
5530         lck_mtx_unlock(&fg->fg_lock);
5531
5532         if (insertque) {
5533                 lck_mtx_lock_spin(uipc_lock);
5534                 unp_gc_wait();
5535                 LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
5536                 lck_mtx_unlock(uipc_lock);
5537                 lck_mtx_lock(&fg->fg_lock);
5538                 fg->fg_lflags &= ~FG_INSMSGQ;
5539                 if (fg->fg_lflags & FG_WINSMSGQ) {
5540                         fg->fg_lflags &= ~FG_WINSMSGQ;
5541                         wakeup(&fg->fg_lflags);
5542                 }
5543                 lck_mtx_unlock(&fg->fg_lock);
5544         }
5545
5546 }
5547
5548
5549 /*
5550  * fg_removeuipc
5551  *
5552  * Description: Remove fileglob from message queue
5553  *
5554  * Parameters:  fg                              Fileglob pointer to remove
5555  *
5556  * Returns:     void
5557  *
5558  * Locks:       Takes and drops fg_lock, potentially many times
5559  */
5560 void
5561 fg_removeuipc(struct fileglob * fg)
5562 {
5563         int removeque = 0;
5564
5565         lck_mtx_lock_spin(&fg->fg_lock);
5566         while (fg->fg_lflags & FG_INSMSGQ) {
5567                 lck_mtx_convert_spin(&fg->fg_lock);
5568
5569                 fg->fg_lflags |= FG_WINSMSGQ;
5570                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
5571         }
5572         fg->fg_msgcount--;
5573         if (fg->fg_msgcount == 0) {
5574                 fg->fg_lflags |= FG_RMMSGQ;
5575                 removeque=1;
5576         }
5577         lck_mtx_unlock(&fg->fg_lock);
5578
5579         if (removeque) {
5580                 lck_mtx_lock_spin(uipc_lock);
5581                 unp_gc_wait();
5582                 LIST_REMOVE(fg, f_msglist);
5583                 lck_mtx_unlock(uipc_lock);
5584                 lck_mtx_lock(&fg->fg_lock);
5585                 fg->fg_lflags &= ~FG_RMMSGQ;
5586                 if (fg->fg_lflags & FG_WRMMSGQ) {
5587                         fg->fg_lflags &= ~FG_WRMMSGQ;
5588                         wakeup(&fg->fg_lflags);
5589                 }
5590                 lck_mtx_unlock(&fg->fg_lock);
5591         }
5592 }
5593 #endif /* SOCKETS */
5594
5595 /*
5596  * fo_read
5597  *
5598  * Description: Generic fileops read indirected through the fileops pointer
5599  *              in the fileproc structure
5600  *
5601  * Parameters:  fp                              fileproc structure pointer
5602  *              uio                             user I/O structure pointer
5603  *              flags                           FOF_ flags
5604  *              ctx                             VFS context for operation
5605  *
5606  * Returns:     0                               Success
5607  *              !0                              Errno from read
5608  */
5609 int
5610 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5611 {
5612         return ((*fp->f_ops->fo_read)(fp, uio, flags, ctx));
5613 }
5614
5615
5616 /*
5617  * fo_write
5618  *
5619  * Description: Generic fileops write indirected through the fileops pointer
5620  *              in the fileproc structure
5621  *
5622  * Parameters:  fp                              fileproc structure pointer
5623  *              uio                             user I/O structure pointer
5624  *              flags                           FOF_ flags
5625  *              ctx                             VFS context for operation
5626  *
5627  * Returns:     0                               Success
5628  *              !0                              Errno from write
5629  */
5630 int
5631 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5632 {
5633         return((*fp->f_ops->fo_write)(fp, uio, flags, ctx));
5634 }
5635
5636
5637 /*
5638  * fo_ioctl
5639  *
5640  * Description: Generic fileops ioctl indirected through the fileops pointer
5641  *              in the fileproc structure
5642  *
5643  * Parameters:  fp                              fileproc structure pointer
5644  *              com                             ioctl command
5645  *              data                            pointer to internalized copy
5646  *                                              of user space ioctl command
5647  *                                              parameter data in kernel space
5648  *              ctx                             VFS context for operation
5649  *
5650  * Returns:     0                               Success
5651  *              !0                              Errno from ioctl
5652  *
5653  * Locks:       The caller is assumed to have held the proc_fdlock; this
5654  *              function releases and reacquires this lock.  If the caller
5655  *              accesses data protected by this lock prior to calling this
5656  *              function, it will need to revalidate/reacquire any cached
5657  *              protected data obtained prior to the call.
5658  */
5659 int
5660 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5661 {
5662         int error;
5663
5664         proc_fdunlock(vfs_context_proc(ctx));
5665         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5666         proc_fdlock(vfs_context_proc(ctx));
5667         return(error);
5668 }
5669
5670
5671 /*
5672  * fo_select
5673  *
5674  * Description: Generic fileops select indirected through the fileops pointer
5675  *              in the fileproc structure
5676  *
5677  * Parameters:  fp                              fileproc structure pointer
5678  *              which                           select which
5679  *              wql                             pointer to wait queue list
5680  *              ctx                             VFS context for operation
5681  *
5682  * Returns:     0                               Success
5683  *              !0                              Errno from select
5684  */
5685 int
5686 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5687 {
5688         return((*fp->f_ops->fo_select)(fp, which, wql, ctx));
5689 }
5690
5691
5692 /*
5693  * fo_close
5694  *
5695  * Description: Generic fileops close indirected through the fileops pointer
5696  *              in the fileproc structure
5697  *
5698  * Parameters:  fp                              fileproc structure pointer for
5699  *                                              file to close
5700  *              ctx                             VFS context for operation
5701  *
5702  * Returns:     0                               Success
5703  *              !0                              Errno from close
5704  */
5705 int
5706 fo_close(struct fileglob *fg, vfs_context_t ctx)
5707 {
5708         return((*fg->fg_ops->fo_close)(fg, ctx));
5709 }
5710
5711
5712 /*
5713  * fo_kqfilter
5714  *
5715  * Description: Generic fileops kqueue filter indirected through the fileops
5716  *              pointer in the fileproc structure
5717  *
5718  * Parameters:  fp                              fileproc structure pointer
5719  *              kn                              pointer to knote to filter on
5720  *              ctx                             VFS context for operation
5721  *
5722  * Returns:     0                               Success
5723  *              !0                              Errno from kqueue filter
5724  */
5725 int
5726 fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
5727 {
5728         return ((*fp->f_ops->fo_kqfilter)(fp, kn, ctx));
5729 }
5730
5731 /*
5732  * The ability to send a file descriptor to another
5733  * process is opt-in by file type.
5734  */
5735 boolean_t
5736 filetype_issendable(file_type_t fdtype)
5737 {
5738         switch (fdtype) {
5739                 case DTYPE_VNODE:
5740                 case DTYPE_SOCKET:
5741                 case DTYPE_PIPE:
5742                 case DTYPE_PSXSHM:
5743                         return TRUE;
5744                 default:
5745                         /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
5746                         return FALSE;
5747         }
5748 }
5749
5750
5751 struct fileproc *
5752 fileproc_alloc_init(__unused void *arg)
5753 {
5754         struct fileproc *fp;
5755
5756         MALLOC_ZONE(fp, struct fileproc *, sizeof (*fp), M_FILEPROC, M_WAITOK);
5757         if (fp)
5758                 bzero(fp, sizeof (*fp));
5759
5760         return (fp);
5761 }
5762
5763 void
5764 fileproc_free(struct fileproc *fp)
5765 {
5766         switch (FILEPROC_TYPE(fp)) {
5767         case FTYPE_SIMPLE:
5768                 FREE_ZONE(fp, sizeof (*fp), M_FILEPROC);
5769                 break;
5770         case FTYPE_GUARDED:
5771                 guarded_fileproc_free(fp);
5772                 break;
5773         default:
5774                 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags);
5775         }
5776 }