bsd/kern/kern_descrip.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/file_internal.h>
  83 #include <sys/guarded.h>
  84 #include <sys/socket.h>
  85 #include <sys/socketvar.h>
  86 #include <sys/stat.h>
  87 #include <sys/ioctl.h>
  88 #include <sys/fcntl.h>
  89 #include <sys/malloc.h>
  90 #include <sys/mman.h>
  91 #include <sys/syslog.h>
  92 #include <sys/unistd.h>
  93 #include <sys/resourcevar.h>
  94 #include <sys/aio_kern.h>
  95 #include <sys/ev.h>
  96 #include <kern/locks.h>
  97 #include <sys/uio_internal.h>
  98 #include <sys/codesign.h>
  99 #include <sys/codedir_internal.h>
 100
 101 #include <security/audit/audit.h>
 102
 103 #include <sys/mount_internal.h>
 104 #include <sys/kdebug.h>
 105 #include <sys/sysproto.h>
 106 #include <sys/pipe.h>
 107 #include <sys/spawn.h>
 108 #include <kern/kern_types.h>
 109 #include <kern/kalloc.h>
 110 #include <kern/waitq.h>
 111 #include <libkern/OSAtomic.h>
 112
 113 #include <sys/ubc_internal.h>
 114
 115 #include <kern/ipc_misc.h>
 116 #include <vm/vm_protos.h>
 117
 118 #include <mach/mach_port.h>
 119 #include <stdbool.h>
 120
 121 #include <hfs/hfs.h>
 122
 123 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
 124     mach_msg_type_name_t, ipc_port_t *);
 125 void ipc_port_release_send(ipc_port_t);
 126
 127 struct psemnode;
 128 struct pshmnode;
 129
 130 static int finishdup(proc_t p,
 131     struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 132
 133 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
 134 void fg_drop(struct fileproc * fp);
 135 void fg_free(struct fileglob *fg);
 136 void fg_ref(struct fileproc * fp);
 137 void fileport_releasefg(struct fileglob *fg);
 138
 139 /* flags for close_internal_locked */
 140 #define FD_DUP2RESV 1
 141
 142 /* We don't want these exported */
 143
 144 __private_extern__
 145 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
 146
 147 static void _fdrelse(struct proc * p, int fd);
 148
 149
 150 extern void file_lock_init(void);
 151
 152 extern kauth_scope_t    kauth_scope_fileop;
 153
 154 /* Conflict wait queue for when selects collide (opaque type) */
 155 extern struct waitq select_conflict_queue;
 156
 157 #define f_flag f_fglob->fg_flag
 158 #define f_type f_fglob->fg_ops->fo_type
 159 #define f_msgcount f_fglob->fg_msgcount
 160 #define f_cred f_fglob->fg_cred
 161 #define f_ops f_fglob->fg_ops
 162 #define f_offset f_fglob->fg_offset
 163 #define f_data f_fglob->fg_data
 164 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
 165                 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
 166                 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
 167                 ? 1 : 0)
 168 /*
 169  * Descriptor management.
 170  */
 171 struct fmsglist fmsghead;       /* head of list of open files */
 172 struct fmsglist fmsg_ithead;    /* head of list of open files */
 173 int nfiles;                     /* actual number of open files */
 174
 175
 176 lck_grp_attr_t * file_lck_grp_attr;
 177 lck_grp_t * file_lck_grp;
 178 lck_attr_t * file_lck_attr;
 179
 180 lck_mtx_t * uipc_lock;
 181
 182
 183 /*
 184  * check_file_seek_range
 185  *
 186  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
 187  *
 188  * Parameters:  fl              Flock structure.
 189  *              cur_file_offset Current offset in the file.
 190  *
 191  * Returns:     0               on Success.
 192  *              EOVERFLOW       on overflow.
 193  *              EINVAL          on offset less than zero.
 194  */
 195
 196 static int
 197 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
 198 {
 199         if (fl->l_whence == SEEK_CUR) {
 200                 /* Check if the start marker is beyond LLONG_MAX. */
 201                 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
 202                         /* Check if start marker is negative */
 203                         if (fl->l_start < 0) {
 204                                 return EINVAL;
 205                         }
 206                         return EOVERFLOW;
 207                 }
 208                 /* Check if the start marker is negative. */
 209                 if (fl->l_start + cur_file_offset < 0) {
 210                         return EINVAL;
 211                 }
 212                 /* Check if end marker is beyond LLONG_MAX. */
 213                 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
 214                         cur_file_offset, fl->l_len - 1))) {
 215                         return EOVERFLOW;
 216                 }
 217                 /* Check if the end marker is negative. */
 218                 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
 219                         fl->l_len < 0)) {
 220                         return EINVAL;
 221                 }
 222         } else if (fl->l_whence == SEEK_SET) {
 223                 /* Check if the start marker is negative. */
 224                 if (fl->l_start < 0) {
 225                         return EINVAL;
 226                 }
 227                 /* Check if the end marker is beyond LLONG_MAX. */
 228                 if ((fl->l_len > 0) &&
 229                     CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
 230                         return EOVERFLOW;
 231                 }
 232                 /* Check if the end marker is negative. */
 233                 if ((fl->l_len < 0) &&  fl->l_start + fl->l_len < 0) {
 234                         return EINVAL;
 235                 }
 236         }
 237         return 0;
 238 }
 239
 240
 241 /*
 242  * file_lock_init
 243  *
 244  * Description: Initialize the file lock group and the uipc and flist locks
 245  *
 246  * Parameters:  (void)
 247  *
 248  * Returns:     void
 249  *
 250  * Notes:       Called at system startup from bsd_init().
 251  */
 252 void
 253 file_lock_init(void)
 254 {
 255         /* allocate file lock group attribute and group */
 256         file_lck_grp_attr= lck_grp_attr_alloc_init();
 257
 258         file_lck_grp = lck_grp_alloc_init("file",  file_lck_grp_attr);
 259
 260         /* Allocate file lock attribute */
 261         file_lck_attr = lck_attr_alloc_init();
 262
 263         uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
 264 }
 265
 266
 267 /*
 268  * proc_fdlock, proc_fdlock_spin
 269  *
 270  * Description: Lock to control access to the per process struct fileproc
 271  *              and struct filedesc
 272  *
 273  * Parameters:  p                               Process to take the lock on
 274  *
 275  * Returns:     void
 276  *
 277  * Notes:       The lock is initialized in forkproc() and destroyed in
 278  *              reap_child_process().
 279  */
 280 void
 281 proc_fdlock(proc_t p)
 282 {
 283         lck_mtx_lock(&p->p_fdmlock);
 284 }
 285
 286 void
 287 proc_fdlock_spin(proc_t p)
 288 {
 289         lck_mtx_lock_spin(&p->p_fdmlock);
 290 }
 291
 292 void
 293 proc_fdlock_assert(proc_t p, int assertflags)
 294 {
 295         lck_mtx_assert(&p->p_fdmlock, assertflags);
 296 }
 297
 298
 299 /*
 300  * proc_fdunlock
 301  *
 302  * Description: Unlock the lock previously locked by a call to proc_fdlock()
 303  *
 304  * Parameters:  p                               Process to drop the lock on
 305  *
 306  * Returns:     void
 307  */
 308 void
 309 proc_fdunlock(proc_t p)
 310 {
 311         lck_mtx_unlock(&p->p_fdmlock);
 312 }
 313
 314
 315 /*
 316  * System calls on descriptors.
 317  */
 318
 319
 320 /*
 321  * getdtablesize
 322  *
 323  * Description: Returns the per process maximum size of the descriptor table
 324  *
 325  * Parameters:  p                               Process being queried
 326  *              retval                          Pointer to the call return area
 327  *
 328  * Returns:     0                               Success
 329  *
 330  * Implicit returns:
 331  *              *retval (modified)              Size of dtable
 332  */
 333 int
 334 getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 335 {
 336         proc_fdlock_spin(p);
 337         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 338         proc_fdunlock(p);
 339
 340         return (0);
 341 }
 342
 343
 344 void
 345 procfdtbl_reservefd(struct proc * p, int fd)
 346 {
 347         p->p_fd->fd_ofiles[fd] = NULL;
 348         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
 349 }
 350
 351 void
 352 procfdtbl_markclosefd(struct proc * p, int fd)
 353 {
 354         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
 355 }
 356
 357 void
 358 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
 359 {
 360         if (fp != NULL)
 361                 p->p_fd->fd_ofiles[fd] = fp;
 362         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
 363         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
 364                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
 365                 wakeup(&p->p_fd);
 366         }
 367 }
 368
 369 void
 370 procfdtbl_waitfd(struct proc * p, int fd)
 371 {
 372         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
 373         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
 374 }
 375
 376
 377 void
 378 procfdtbl_clearfd(struct proc * p, int fd)
 379 {
 380         int waiting;
 381
 382         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
 383         p->p_fd->fd_ofiles[fd] = NULL;
 384         p->p_fd->fd_ofileflags[fd] = 0;
 385         if ( waiting == UF_RESVWAIT) {
 386                 wakeup(&p->p_fd);
 387         }
 388 }
 389
 390 /*
 391  * _fdrelse
 392  *
 393  * Description: Inline utility function to free an fd in a filedesc
 394  *
 395  * Parameters:  fdp                             Pointer to filedesc fd lies in
 396  *              fd                              fd to free
 397  *              reserv                          fd should be reserved
 398  *
 399  * Returns:     void
 400  *
 401  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 402  *              the caller
 403  */
 404 static void
 405 _fdrelse(struct proc * p, int fd)
 406 {
 407         struct filedesc *fdp = p->p_fd;
 408         int nfd = 0;
 409
 410         if (fd < fdp->fd_freefile)
 411                 fdp->fd_freefile = fd;
 412 #if DIAGNOSTIC
 413         if (fd > fdp->fd_lastfile)
 414                 panic("fdrelse: fd_lastfile inconsistent");
 415 #endif
 416         procfdtbl_clearfd(p, fd);
 417
 418         while ((nfd = fdp->fd_lastfile) > 0 &&
 419                         fdp->fd_ofiles[nfd] == NULL &&
 420                         !(fdp->fd_ofileflags[nfd] & UF_RESERVED))
 421                 fdp->fd_lastfile--;
 422 }
 423
 424
 425 int
 426 fd_rdwr(
 427         int fd,
 428         enum uio_rw rw,
 429         uint64_t base,
 430         int64_t len,
 431         enum uio_seg segflg,
 432         off_t   offset,
 433         int     io_flg,
 434         int64_t *aresid)
 435 {
 436         struct fileproc *fp;
 437         proc_t  p;
 438         int error = 0;
 439         int flags = 0;
 440         int spacetype;
 441         uio_t auio = NULL;
 442         char uio_buf[ UIO_SIZEOF(1) ];
 443         struct vfs_context context = *(vfs_context_current());
 444         bool wrote_some = false;
 445
 446         p = current_proc();
 447
 448         error = fp_lookup(p, fd, &fp, 0);
 449         if (error)
 450                 return(error);
 451
 452         if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
 453                 error = EINVAL;
 454                 goto out;
 455         }
 456         if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
 457                 error = EBADF;
 458                 goto out;
 459         }
 460
 461         if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
 462                 error = EBADF;
 463                 goto out;
 464         }
 465
 466         context.vc_ucred = fp->f_fglob->fg_cred;
 467
 468         if (UIO_SEG_IS_USER_SPACE(segflg))
 469                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 470         else
 471                 spacetype = UIO_SYSSPACE;
 472
 473         auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
 474
 475         uio_addiov(auio, base, len);
 476
 477         if ( !(io_flg & IO_APPEND))
 478                 flags = FOF_OFFSET;
 479
 480         if (rw == UIO_WRITE) {
 481                 user_ssize_t orig_resid = uio_resid(auio);
 482                 error = fo_write(fp, auio, flags, &context);
 483                 wrote_some = uio_resid(auio) < orig_resid;
 484         } else
 485                 error = fo_read(fp, auio, flags, &context);
 486
 487         if (aresid)
 488                 *aresid = uio_resid(auio);
 489         else {
 490                 if (uio_resid(auio) && error == 0)
 491                         error = EIO;
 492         }
 493 out:
 494         if (wrote_some)
 495                 fp_drop_written(p, fd, fp);
 496         else
 497                 fp_drop(p, fd, fp, 0);
 498
 499         return error;
 500 }
 501
 502
 503
 504 /*
 505  * dup
 506  *
 507  * Description: Duplicate a file descriptor.
 508  *
 509  * Parameters:  p                               Process performing the dup
 510  *              uap->fd                         The fd to dup
 511  *              retval                          Pointer to the call return area
 512  *
 513  * Returns:     0                               Success
 514  *              !0                              Errno
 515  *
 516  * Implicit returns:
 517  *              *retval (modified)              The new descriptor
 518  */
 519 int
 520 dup(proc_t p, struct dup_args *uap, int32_t *retval)
 521 {
 522         struct filedesc *fdp = p->p_fd;
 523         int old = uap->fd;
 524         int new, error;
 525         struct fileproc *fp;
 526
 527         proc_fdlock(p);
 528         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
 529                 proc_fdunlock(p);
 530                 return(error);
 531         }
 532         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 533                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 534                 (void) fp_drop(p, old, fp, 1);
 535                 proc_fdunlock(p);
 536                 return (error);
 537         }
 538         if ( (error = fdalloc(p, 0, &new)) ) {
 539                 fp_drop(p, old, fp, 1);
 540                 proc_fdunlock(p);
 541                 return (error);
 542         }
 543         error = finishdup(p, fdp, old, new, 0, retval);
 544         fp_drop(p, old, fp, 1);
 545         proc_fdunlock(p);
 546
 547         if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
 548                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
 549                     new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
 550         }
 551
 552         return (error);
 553 }
 554
 555 /*
 556  * dup2
 557  *
 558  * Description: Duplicate a file descriptor to a particular value.
 559  *
 560  * Parameters:  p                               Process performing the dup
 561  *              uap->from                       The fd to dup
 562  *              uap->to                         The fd to dup it to
 563  *              retval                          Pointer to the call return area
 564  *
 565  * Returns:     0                               Success
 566  *              !0                              Errno
 567  *
 568  * Implicit returns:
 569  *              *retval (modified)              The new descriptor
 570  */
 571 int
 572 dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 573 {
 574         struct filedesc *fdp = p->p_fd;
 575         int old = uap->from, new = uap->to;
 576         int i, error;
 577         struct fileproc *fp, *nfp;
 578
 579         proc_fdlock(p);
 580
 581 startover:
 582         if ( (error = fp_lookup(p, old, &fp, 1)) ) {
 583                 proc_fdunlock(p);
 584                 return(error);
 585         }
 586         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 587                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 588                 (void) fp_drop(p, old, fp, 1);
 589                 proc_fdunlock(p);
 590                 return (error);
 591         }
 592         if (new < 0 ||
 593                 (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 594             new >= maxfiles) {
 595                 fp_drop(p, old, fp, 1);
 596                 proc_fdunlock(p);
 597                 return (EBADF);
 598         }
 599         if (old == new) {
 600                 fp_drop(p, old, fp, 1);
 601                 *retval = new;
 602                 proc_fdunlock(p);
 603                 return (0);
 604         }
 605         if (new < 0 || new >= fdp->fd_nfiles) {
 606                 if ( (error = fdalloc(p, new, &i)) ) {
 607                         fp_drop(p, old, fp, 1);
 608                         proc_fdunlock(p);
 609                         return (error);
 610                 }
 611                 if (new != i) {
 612                         fdrelse(p, i);
 613                         goto closeit;
 614                 }
 615         } else {
 616 closeit:
 617                 while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED)  {
 618                                 fp_drop(p, old, fp, 1);
 619                                 procfdtbl_waitfd(p, new);
 620 #if DIAGNOSTIC
 621                                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 622 #endif
 623                                 goto startover;
 624                 }
 625
 626                 if ((fdp->fd_ofiles[new] != NULL) &&
 627                     ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
 628                         fp_drop(p, old, fp, 1);
 629                         if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
 630                                 error = fp_guard_exception(p,
 631                                     new, nfp, kGUARD_EXC_CLOSE);
 632                                 (void) fp_drop(p, new, nfp, 1);
 633                                 proc_fdunlock(p);
 634                                 return (error);
 635                         }
 636                         (void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
 637 #if DIAGNOSTIC
 638                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 639 #endif
 640                         procfdtbl_clearfd(p, new);
 641                         goto startover;
 642                 } else  {
 643 #if DIAGNOSTIC
 644                         if (fdp->fd_ofiles[new] != NULL)
 645                                 panic("dup2: no ref on fileproc %d", new);
 646 #endif
 647                         procfdtbl_reservefd(p, new);
 648                 }
 649
 650 #if DIAGNOSTIC
 651                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 652 #endif
 653
 654         }
 655 #if DIAGNOSTIC
 656         if (fdp->fd_ofiles[new] != 0)
 657                 panic("dup2: overwriting fd_ofiles with new %d", new);
 658         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
 659                 panic("dup2: unreserved fileflags with new %d", new);
 660 #endif
 661         error = finishdup(p, fdp, old, new, 0, retval);
 662         fp_drop(p, old, fp, 1);
 663         proc_fdunlock(p);
 664
 665         return(error);
 666 }
 667
 668
 669 /*
 670  * fcntl
 671  *
 672  * Description: The file control system call.
 673  *
 674  * Parameters:  p                               Process performing the fcntl
 675  *              uap->fd                         The fd to operate against
 676  *              uap->cmd                        The command to perform
 677  *              uap->arg                        Pointer to the command argument
 678  *              retval                          Pointer to the call return area
 679  *
 680  * Returns:     0                               Success
 681  *              !0                              Errno (see fcntl_nocancel)
 682  *
 683  * Implicit returns:
 684  *              *retval (modified)              fcntl return value (if any)
 685  *
 686  * Notes:       This system call differs from fcntl_nocancel() in that it
 687  *              tests for cancellation prior to performing a potentially
 688  *              blocking operation.
 689  */
 690 int
 691 fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 692 {
 693         __pthread_testcancel(1);
 694         return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval));
 695 }
 696
 697
 698 /*
 699  * fcntl_nocancel
 700  *
 701  * Description: A non-cancel-testing file control system call.
 702  *
 703  * Parameters:  p                               Process performing the fcntl
 704  *              uap->fd                         The fd to operate against
 705  *              uap->cmd                        The command to perform
 706  *              uap->arg                        Pointer to the command argument
 707  *              retval                          Pointer to the call return area
 708  *
 709  * Returns:     0                               Success
 710  *              EINVAL
 711  *      fp_lookup:EBADF                         Bad file descriptor
 712  * [F_DUPFD]
 713  *      fdalloc:EMFILE
 714  *      fdalloc:ENOMEM
 715  *      finishdup:EBADF
 716  *      finishdup:ENOMEM
 717  * [F_SETOWN]
 718  *              ESRCH
 719  * [F_SETLK]
 720  *              EBADF
 721  *              EOVERFLOW
 722  *      copyin:EFAULT
 723  *      vnode_getwithref:???
 724  *      VNOP_ADVLOCK:???
 725  *      msleep:ETIMEDOUT
 726  * [F_GETLK]
 727  *              EBADF
 728  *              EOVERFLOW
 729  *      copyin:EFAULT
 730  *      copyout:EFAULT
 731  *      vnode_getwithref:???
 732  *      VNOP_ADVLOCK:???
 733  * [F_PREALLOCATE]
 734  *              EBADF
 735  *              EINVAL
 736  *      copyin:EFAULT
 737  *      copyout:EFAULT
 738  *      vnode_getwithref:???
 739  *      VNOP_ALLOCATE:???
 740  * [F_SETSIZE,F_RDADVISE]
 741  *              EBADF
 742  *      copyin:EFAULT
 743  *      vnode_getwithref:???
 744  * [F_RDAHEAD,F_NOCACHE]
 745  *              EBADF
 746  *      vnode_getwithref:???
 747  * [???]
 748  *
 749  * Implicit returns:
 750  *              *retval (modified)              fcntl return value (if any)
 751  */
 752 int
 753 fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 754 {
 755         int fd = uap->fd;
 756         struct filedesc *fdp = p->p_fd;
 757         struct fileproc *fp;
 758         char *pop;
 759         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 760         int i, tmp, error, error2, flg = 0;
 761         struct flock fl;
 762         struct flocktimeout fltimeout;
 763         struct timespec *timeout = NULL;
 764         struct vfs_context context;
 765         off_t offset;
 766         int newmin;
 767         daddr64_t lbn, bn;
 768         unsigned int fflag;
 769         user_addr_t argp;
 770         boolean_t is64bit;
 771
 772         AUDIT_ARG(fd, uap->fd);
 773         AUDIT_ARG(cmd, uap->cmd);
 774
 775         proc_fdlock(p);
 776         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
 777                 proc_fdunlock(p);
 778                 return(error);
 779         }
 780         context.vc_thread = current_thread();
 781         context.vc_ucred = fp->f_cred;
 782
 783         is64bit = proc_is64bit(p);
 784         if (is64bit) {
 785                 argp = uap->arg;
 786         }
 787         else {
 788                 /*
 789                  * Since the arg parameter is defined as a long but may be
 790                  * either a long or a pointer we must take care to handle
 791                  * sign extension issues.  Our sys call munger will sign
 792                  * extend a long when we are called from a 32-bit process.
 793                  * Since we can never have an address greater than 32-bits
 794                  * from a 32-bit process we lop off the top 32-bits to avoid
 795                  * getting the wrong address
 796                  */
 797                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
 798         }
 799
 800         pop = &fdp->fd_ofileflags[fd];
 801
 802 #if CONFIG_MACF
 803         error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
 804             uap->arg);
 805         if (error)
 806                 goto out;
 807 #endif
 808
 809         switch (uap->cmd) {
 810
 811         case F_DUPFD:
 812         case F_DUPFD_CLOEXEC:
 813                 if (FP_ISGUARDED(fp, GUARD_DUP)) {
 814                         error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
 815                         goto out;
 816                 }
 817                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 818                 AUDIT_ARG(value32, newmin);
 819                 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 820                     newmin >= maxfiles) {
 821                         error = EINVAL;
 822                         goto out;
 823                 }
 824                 if ( (error = fdalloc(p, newmin, &i)) )
 825                         goto out;
 826                 error = finishdup(p, fdp, fd, i,
 827                     uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
 828                 goto out;
 829
 830         case F_GETFD:
 831                 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
 832                 error = 0;
 833                 goto out;
 834
 835         case F_SETFD:
 836                 AUDIT_ARG(value32, uap->arg);
 837                 if (uap->arg & FD_CLOEXEC)
 838                         *pop |= UF_EXCLOSE;
 839                 else {
 840                         if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
 841                                 error = fp_guard_exception(p,
 842                                     fd, fp, kGUARD_EXC_NOCLOEXEC);
 843                                 goto out;
 844                         }
 845                         *pop &= ~UF_EXCLOSE;
 846                 }
 847                 error = 0;
 848                 goto out;
 849
 850         case F_GETFL:
 851                 *retval = OFLAGS(fp->f_flag);
 852                 error = 0;
 853                 goto out;
 854
 855         case F_SETFL:
 856                 fp->f_flag &= ~FCNTLFLAGS;
 857                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 858                 AUDIT_ARG(value32, tmp);
 859                 fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
 860                 tmp = fp->f_flag & FNONBLOCK;
 861                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 862                 if (error)
 863                         goto out;
 864                 tmp = fp->f_flag & FASYNC;
 865                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 866                 if (!error)
 867                         goto out;
 868                 fp->f_flag &= ~FNONBLOCK;
 869                 tmp = 0;
 870                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 871                 goto out;
 872
 873         case F_GETOWN:
 874                 if (fp->f_type == DTYPE_SOCKET) {
 875                         *retval = ((struct socket *)fp->f_data)->so_pgid;
 876                         error = 0;
 877                         goto out;
 878                 }
 879                 error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, &context);
 880                 *retval = -*retval;
 881                 goto out;
 882
 883         case F_SETOWN:
 884                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
 885                 AUDIT_ARG(value32, tmp);
 886                 if (fp->f_type == DTYPE_SOCKET) {
 887                         ((struct socket *)fp->f_data)->so_pgid = tmp;
 888                         error =0;
 889                         goto out;
 890                 }
 891                 if (fp->f_type == DTYPE_PIPE) {
 892                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 893                         goto out;
 894                 }
 895
 896                 if (tmp <= 0) {
 897                         tmp = -tmp;
 898                 } else {
 899                         proc_t p1 = proc_find(tmp);
 900                         if (p1 == 0) {
 901                                 error = ESRCH;
 902                                 goto out;
 903                         }
 904                         tmp = (int)p1->p_pgrpid;
 905                         proc_rele(p1);
 906                 }
 907                 error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 908                 goto out;
 909
 910         case F_SETNOSIGPIPE:
 911                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
 912                 if (fp->f_type == DTYPE_SOCKET) {
 913 #if SOCKETS
 914                         error = sock_setsockopt((struct socket *)fp->f_data,
 915                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp));
 916 #else
 917                         error = EINVAL;
 918 #endif
 919                 } else {
 920                         struct fileglob *fg = fp->f_fglob;
 921
 922                         lck_mtx_lock_spin(&fg->fg_lock);
 923                         if (tmp)
 924                                 fg->fg_lflags |= FG_NOSIGPIPE;
 925                         else
 926                                 fg->fg_lflags &= FG_NOSIGPIPE;
 927                         lck_mtx_unlock(&fg->fg_lock);
 928                         error = 0;
 929                 }
 930                 goto out;
 931
 932         case F_GETNOSIGPIPE:
 933                 if (fp->f_type == DTYPE_SOCKET) {
 934 #if SOCKETS
 935                         int retsize = sizeof (*retval);
 936                         error = sock_getsockopt((struct socket *)fp->f_data,
 937                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
 938 #else
 939                         error = EINVAL;
 940 #endif
 941                 } else {
 942                         *retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
 943                                 1 : 0;
 944                         error = 0;
 945                 }
 946                 goto out;
 947
 948         case F_SETCONFINED:
 949                 /*
 950                  * If this is the only reference to this fglob in the process
 951                  * and it's already marked as close-on-fork then mark it as
 952                  * (immutably) "confined" i.e. any fd that points to it will
 953                  * forever be close-on-fork, and attempts to use an IPC
 954                  * mechanism to move the descriptor elsewhere will fail.
 955                  */
 956                 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
 957                         struct fileglob *fg = fp->f_fglob;
 958
 959                         lck_mtx_lock_spin(&fg->fg_lock);
 960                         if (fg->fg_lflags & FG_CONFINED)
 961                                 error = 0;
 962                         else if (1 != fg->fg_count)
 963                                 error = EAGAIN; /* go close the dup .. */
 964                         else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
 965                                 fg->fg_lflags |= FG_CONFINED;
 966                                 error = 0;
 967                         } else
 968                                 error = EBADF;  /* open without O_CLOFORK? */
 969                         lck_mtx_unlock(&fg->fg_lock);
 970                 } else {
 971                         /*
 972                          * Other subsystems may have built on the immutability
 973                          * of FG_CONFINED; clearing it may be tricky.
 974                          */
 975                         error = EPERM;          /* immutable */
 976                 }
 977                 goto out;
 978
 979         case F_GETCONFINED:
 980                 *retval = (fp->f_fglob->fg_lflags & FG_CONFINED) ? 1 : 0;
 981                 error = 0;
 982                 goto out;
 983
 984         case F_SETLKWTIMEOUT:
 985         case F_SETLKW:
 986         case F_OFD_SETLKWTIMEOUT:
 987         case F_OFD_SETLKW:
 988                 flg |= F_WAIT;
 989                 /* Fall into F_SETLK */
 990
 991         case F_SETLK:
 992         case F_OFD_SETLK:
 993                 if (fp->f_type != DTYPE_VNODE) {
 994                         error = EBADF;
 995                         goto out;
 996                 }
 997                 vp = (struct vnode *)fp->f_data;
 998
 999                 fflag = fp->f_flag;
1000                 offset = fp->f_offset;
1001                 proc_fdunlock(p);
1002
1003                 /* Copy in the lock structure */
1004                 if (F_SETLKWTIMEOUT == uap->cmd ||
1005                     F_OFD_SETLKWTIMEOUT == uap->cmd) {
1006                         error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1007                         if (error) {
1008                                 goto outdrop;
1009                         }
1010                         fl = fltimeout.fl;
1011                         timeout = &fltimeout.timeout;
1012                 } else {
1013                         error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1014                         if (error) {
1015                                 goto outdrop;
1016                         }
1017                 }
1018
1019                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1020                 /* and ending byte for EOVERFLOW in SEEK_SET */
1021                 error = check_file_seek_range(&fl, offset);
1022                 if (error) {
1023                         goto outdrop;
1024                 }
1025
1026                 if ( (error = vnode_getwithref(vp)) ) {
1027                         goto outdrop;
1028                 }
1029                 if (fl.l_whence == SEEK_CUR)
1030                         fl.l_start += offset;
1031
1032 #if CONFIG_MACF
1033                 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1034                     F_SETLK, &fl);
1035                 if (error) {
1036                         (void)vnode_put(vp);
1037                         goto outdrop;
1038                 }
1039 #endif
1040                 switch (uap->cmd) {
1041                 case F_OFD_SETLK:
1042                 case F_OFD_SETLKW:
1043                 case F_OFD_SETLKWTIMEOUT:
1044                         flg |= F_OFD_LOCK;
1045                         switch (fl.l_type) {
1046                         case F_RDLCK:
1047                                 if ((fflag & FREAD) == 0) {
1048                                         error = EBADF;
1049                                         break;
1050                                 }
1051                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1052                                     F_SETLK, &fl, flg, &context, timeout);
1053                                 break;
1054                         case F_WRLCK:
1055                                 if ((fflag & FWRITE) == 0) {
1056                                         error = EBADF;
1057                                         break;
1058                                 }
1059                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1060                                     F_SETLK, &fl, flg, &context, timeout);
1061                                 break;
1062                         case F_UNLCK:
1063                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1064                                     F_UNLCK, &fl, F_OFD_LOCK, &context,
1065                                     timeout);
1066                                 break;
1067                         default:
1068                                 error = EINVAL;
1069                                 break;
1070                         }
1071                         if (0 == error &&
1072                             (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1073                                 struct fileglob *fg = fp->f_fglob;
1074
1075                                 /*
1076                                  * arrange F_UNLCK on last close (once
1077                                  * set, FG_HAS_OFDLOCK is immutable)
1078                                  */
1079                                 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1080                                         lck_mtx_lock_spin(&fg->fg_lock);
1081                                         fg->fg_lflags |= FG_HAS_OFDLOCK;
1082                                         lck_mtx_unlock(&fg->fg_lock);
1083                                 }
1084                         }
1085                         break;
1086                 default:
1087                         flg |= F_POSIX;
1088                         switch (fl.l_type) {
1089                         case F_RDLCK:
1090                                 if ((fflag & FREAD) == 0) {
1091                                         error = EBADF;
1092                                         break;
1093                                 }
1094                                 // XXX UInt32 unsafe for LP64 kernel
1095                                 OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1096                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1097                                     F_SETLK, &fl, flg, &context, timeout);
1098                                 break;
1099                         case F_WRLCK:
1100                                 if ((fflag & FWRITE) == 0) {
1101                                         error = EBADF;
1102                                         break;
1103                                 }
1104                                 // XXX UInt32 unsafe for LP64 kernel
1105                                 OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1106                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1107                                     F_SETLK, &fl, flg, &context, timeout);
1108                                 break;
1109                         case F_UNLCK:
1110                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1111                                     F_UNLCK, &fl, F_POSIX, &context, timeout);
1112                                 break;
1113                         default:
1114                                 error = EINVAL;
1115                                 break;
1116                         }
1117                         break;
1118                 }
1119                 (void) vnode_put(vp);
1120                 goto outdrop;
1121
1122         case F_GETLK:
1123         case F_OFD_GETLK:
1124                 if (fp->f_type != DTYPE_VNODE) {
1125                         error = EBADF;
1126                         goto out;
1127                 }
1128                 vp = (struct vnode *)fp->f_data;
1129
1130                 offset = fp->f_offset;
1131                 proc_fdunlock(p);
1132
1133                 /* Copy in the lock structure */
1134                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1135                 if (error)
1136                         goto outdrop;
1137
1138                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1139                 /* and ending byte for EOVERFLOW in SEEK_SET */
1140                 error = check_file_seek_range(&fl, offset);
1141                 if (error) {
1142                         goto outdrop;
1143                 }
1144
1145                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1146                         error = EINVAL;
1147                         goto outdrop;
1148                 }
1149
1150                 switch (fl.l_type) {
1151                 case F_RDLCK:
1152                 case F_UNLCK:
1153                 case F_WRLCK:
1154                         break;
1155                 default:
1156                         error = EINVAL;
1157                         goto outdrop;
1158                 }
1159
1160                 switch (fl.l_whence) {
1161                 case SEEK_CUR:
1162                 case SEEK_SET:
1163                 case SEEK_END:
1164                         break;
1165                 default:
1166                         error = EINVAL;
1167                         goto outdrop;
1168                 }
1169
1170                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1171                         if (fl.l_whence == SEEK_CUR)
1172                                 fl.l_start += offset;
1173
1174 #if CONFIG_MACF
1175                         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1176                             uap->cmd, &fl);
1177                         if (error == 0)
1178 #endif
1179                         switch (uap->cmd) {
1180                         case F_OFD_GETLK:
1181                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1182                                     F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1183                                 break;
1184                         case F_OFD_GETLKPID:
1185                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1186                                     F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1187                                 break;
1188                         default:
1189                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1190                                     uap->cmd, &fl, F_POSIX, &context, NULL);
1191                                 break;
1192                         }
1193
1194                         (void)vnode_put(vp);
1195
1196                         if (error == 0)
1197                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1198                 }
1199                 goto outdrop;
1200
1201         case F_PREALLOCATE: {
1202                 fstore_t alloc_struct;    /* structure for allocate command */
1203                 u_int32_t alloc_flags = 0;
1204
1205                 if (fp->f_type != DTYPE_VNODE) {
1206                         error = EBADF;
1207                         goto out;
1208                 }
1209
1210                 vp = (struct vnode *)fp->f_data;
1211                 proc_fdunlock(p);
1212
1213                 /* make sure that we have write permission */
1214                 if ((fp->f_flag & FWRITE) == 0) {
1215                         error = EBADF;
1216                         goto outdrop;
1217                 }
1218
1219                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1220                 if (error)
1221                         goto outdrop;
1222
1223                 /* now set the space allocated to 0 */
1224                 alloc_struct.fst_bytesalloc = 0;
1225
1226                 /*
1227                  * Do some simple parameter checking
1228                  */
1229
1230                 /* set up the flags */
1231
1232                 alloc_flags |= PREALLOCATE;
1233
1234                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG)
1235                         alloc_flags |= ALLOCATECONTIG;
1236
1237                 if (alloc_struct.fst_flags & F_ALLOCATEALL)
1238                         alloc_flags |= ALLOCATEALL;
1239
1240                 /*
1241                  * Do any position mode specific stuff.  The only
1242                  * position mode  supported now is PEOFPOSMODE
1243                  */
1244
1245                 switch (alloc_struct.fst_posmode) {
1246
1247                 case F_PEOFPOSMODE:
1248                         if (alloc_struct.fst_offset != 0) {
1249                                 error = EINVAL;
1250                                 goto outdrop;
1251                         }
1252
1253                         alloc_flags |= ALLOCATEFROMPEOF;
1254                         break;
1255
1256                 case F_VOLPOSMODE:
1257                         if (alloc_struct.fst_offset <= 0) {
1258                                 error = EINVAL;
1259                                 goto outdrop;
1260                         }
1261
1262                         alloc_flags |= ALLOCATEFROMVOL;
1263                         break;
1264
1265                 default: {
1266                         error = EINVAL;
1267                         goto outdrop;
1268                         }
1269                 }
1270                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1271                         /*
1272                          * call allocate to get the space
1273                          */
1274                         error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags,
1275                                               &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1276                                               &context);
1277                         (void)vnode_put(vp);
1278
1279                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1280
1281                         if (error == 0)
1282                                 error = error2;
1283                 }
1284                 goto outdrop;
1285
1286                 }
1287         case F_SETSIZE:
1288                 if (fp->f_type != DTYPE_VNODE) {
1289                         error = EBADF;
1290                         goto out;
1291                 }
1292                 vp = (struct vnode *)fp->f_data;
1293                 proc_fdunlock(p);
1294
1295                 error = copyin(argp, (caddr_t)&offset, sizeof (off_t));
1296                 if (error)
1297                         goto outdrop;
1298                 AUDIT_ARG(value64, offset);
1299
1300                 error = vnode_getwithref(vp);
1301                 if (error)
1302                         goto outdrop;
1303
1304 #if CONFIG_MACF
1305                 error = mac_vnode_check_truncate(&context,
1306                     fp->f_fglob->fg_cred, vp);
1307                 if (error) {
1308                         (void)vnode_put(vp);
1309                         goto outdrop;
1310                 }
1311 #endif
1312                 /*
1313                  * Make sure that we are root.  Growing a file
1314                  * without zero filling the data is a security hole
1315                  * root would have access anyway so we'll allow it
1316                  */
1317                 if (!kauth_cred_issuser(kauth_cred_get())) {
1318                         error = EACCES;
1319                 } else {
1320                         /*
1321                          * set the file size
1322                          */
1323                         error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1324                             &context);
1325                 }
1326
1327                 (void)vnode_put(vp);
1328                 goto outdrop;
1329
1330         case F_RDAHEAD:
1331                 if (fp->f_type != DTYPE_VNODE) {
1332                         error = EBADF;
1333                         goto out;
1334                 }
1335                 if (uap->arg)
1336                         fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1337                 else
1338                         fp->f_fglob->fg_flag |= FNORDAHEAD;
1339
1340                 goto out;
1341
1342         case F_NOCACHE:
1343                 if (fp->f_type != DTYPE_VNODE) {
1344                         error = EBADF;
1345                         goto out;
1346                 }
1347                 if (uap->arg)
1348                         fp->f_fglob->fg_flag |= FNOCACHE;
1349                 else
1350                         fp->f_fglob->fg_flag &= ~FNOCACHE;
1351
1352                 goto out;
1353
1354         case F_NODIRECT:
1355                 if (fp->f_type != DTYPE_VNODE) {
1356                         error = EBADF;
1357                         goto out;
1358                 }
1359                 if (uap->arg)
1360                         fp->f_fglob->fg_flag |= FNODIRECT;
1361                 else
1362                         fp->f_fglob->fg_flag &= ~FNODIRECT;
1363
1364                 goto out;
1365
1366         case F_SINGLE_WRITER:
1367                 if (fp->f_type != DTYPE_VNODE) {
1368                         error = EBADF;
1369                         goto out;
1370                 }
1371                 if (uap->arg)
1372                         fp->f_fglob->fg_flag |= FSINGLE_WRITER;
1373                 else
1374                         fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1375
1376                 goto out;
1377
1378         case F_GLOBAL_NOCACHE:
1379                 if (fp->f_type != DTYPE_VNODE) {
1380                         error = EBADF;
1381                         goto out;
1382                 }
1383                 vp = (struct vnode *)fp->f_data;
1384                 proc_fdunlock(p);
1385
1386                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1387
1388                         *retval = vnode_isnocache(vp);
1389
1390                         if (uap->arg)
1391                                 vnode_setnocache(vp);
1392                         else
1393                                 vnode_clearnocache(vp);
1394
1395                         (void)vnode_put(vp);
1396                 }
1397                 goto outdrop;
1398
1399         case F_CHECK_OPENEVT:
1400                 if (fp->f_type != DTYPE_VNODE) {
1401                         error = EBADF;
1402                         goto out;
1403                 }
1404                 vp = (struct vnode *)fp->f_data;
1405                 proc_fdunlock(p);
1406
1407                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1408
1409                         *retval = vnode_is_openevt(vp);
1410
1411                         if (uap->arg)
1412                                 vnode_set_openevt(vp);
1413                         else
1414                                 vnode_clear_openevt(vp);
1415
1416                         (void)vnode_put(vp);
1417                 }
1418                 goto outdrop;
1419
1420         case F_RDADVISE: {
1421                 struct radvisory ra_struct;
1422
1423                 if (fp->f_type != DTYPE_VNODE) {
1424                         error = EBADF;
1425                         goto out;
1426                 }
1427                 vp = (struct vnode *)fp->f_data;
1428                 proc_fdunlock(p);
1429
1430                 if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct))) )
1431                         goto outdrop;
1432                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1433                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1434
1435                         (void)vnode_put(vp);
1436                 }
1437                 goto outdrop;
1438                 }
1439
1440         case F_FLUSH_DATA:
1441
1442                 if (fp->f_type != DTYPE_VNODE) {
1443                         error = EBADF;
1444                         goto out;
1445                 }
1446                 vp = (struct vnode *)fp->f_data;
1447                 proc_fdunlock(p);
1448
1449                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1450                         error = cluster_push(vp, 0);
1451
1452                         (void)vnode_put(vp);
1453                 }
1454                 goto outdrop;
1455
1456         case F_LOG2PHYS:
1457         case F_LOG2PHYS_EXT: {
1458                 struct log2phys l2p_struct;    /* structure for allocate command */
1459                 int devBlockSize;
1460
1461                 off_t file_offset = 0;
1462                 size_t a_size = 0;
1463                 size_t run = 0;
1464
1465                 if (uap->cmd == F_LOG2PHYS_EXT) {
1466                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1467                         if (error)
1468                                 goto out;
1469                         file_offset = l2p_struct.l2p_devoffset;
1470                 } else {
1471                         file_offset = fp->f_offset;
1472                 }
1473                 if (fp->f_type != DTYPE_VNODE) {
1474                         error = EBADF;
1475                         goto out;
1476                 }
1477                 vp = (struct vnode *)fp->f_data;
1478                 proc_fdunlock(p);
1479                 if ( (error = vnode_getwithref(vp)) ) {
1480                         goto outdrop;
1481                 }
1482                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1483                 if (error) {
1484                         (void)vnode_put(vp);
1485                         goto outdrop;
1486                 }
1487                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1488                 if (error) {
1489                         (void)vnode_put(vp);
1490                         goto outdrop;
1491                 }
1492                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1493                 if (uap->cmd == F_LOG2PHYS_EXT) {
1494                         if (l2p_struct.l2p_contigbytes < 0) {
1495                                 vnode_put(vp);
1496                                 error = EINVAL;
1497                                 goto outdrop;
1498                         }
1499
1500                         a_size = MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1501                 } else {
1502                         a_size = devBlockSize;
1503                 }
1504
1505                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1506
1507                 (void)vnode_put(vp);
1508
1509                 if (!error) {
1510                         l2p_struct.l2p_flags = 0;       /* for now */
1511                         if (uap->cmd == F_LOG2PHYS_EXT) {
1512                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1513                         } else {
1514                                 l2p_struct.l2p_contigbytes = 0; /* for now */
1515                         }
1516
1517                         /*
1518                          * The block number being -1 suggests that the file offset is not backed
1519                          * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
1520                          */
1521                         if (bn == -1) {
1522                                 /* Don't multiply it by the block size */
1523                                 l2p_struct.l2p_devoffset = bn;
1524                         }
1525                         else {
1526                                 l2p_struct.l2p_devoffset = bn * devBlockSize;
1527                                 l2p_struct.l2p_devoffset += file_offset - offset;
1528                         }
1529                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1530                 }
1531                 goto outdrop;
1532                 }
1533         case F_GETPATH: {
1534                 char *pathbufp;
1535                 int pathlen;
1536
1537                 if (fp->f_type != DTYPE_VNODE) {
1538                         error = EBADF;
1539                         goto out;
1540                 }
1541                 vp = (struct vnode *)fp->f_data;
1542                 proc_fdunlock(p);
1543
1544                 pathlen = MAXPATHLEN;
1545                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1546                 if (pathbufp == NULL) {
1547                         error = ENOMEM;
1548                         goto outdrop;
1549                 }
1550                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1551                         error = vn_getpath(vp, pathbufp, &pathlen);
1552                         (void)vnode_put(vp);
1553
1554                         if (error == 0)
1555                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
1556                 }
1557                 FREE(pathbufp, M_TEMP);
1558                 goto outdrop;
1559         }
1560
1561         case F_PATHPKG_CHECK: {
1562                 char *pathbufp;
1563                 size_t pathlen;
1564
1565                 if (fp->f_type != DTYPE_VNODE) {
1566                         error = EBADF;
1567                         goto out;
1568                 }
1569                 vp = (struct vnode *)fp->f_data;
1570                 proc_fdunlock(p);
1571
1572                 pathlen = MAXPATHLEN;
1573                 pathbufp = kalloc(MAXPATHLEN);
1574
1575                 if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) {
1576                         if ( (error = vnode_getwithref(vp)) == 0 ) {
1577                                 AUDIT_ARG(text, pathbufp);
1578                                 error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1579
1580                                 (void)vnode_put(vp);
1581                         }
1582                 }
1583                 kfree(pathbufp, MAXPATHLEN);
1584                 goto outdrop;
1585         }
1586
1587         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1588         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
1589         case F_BARRIERFSYNC:  // fsync + barrier
1590         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1591         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1592                 if (fp->f_type != DTYPE_VNODE) {
1593                         error = EBADF;
1594                         goto out;
1595                 }
1596                 vp = (struct vnode *)fp->f_data;
1597                 proc_fdunlock(p);
1598
1599                 if ( (error = vnode_getwithref(vp)) == 0 ) {
1600                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1601
1602                         (void)vnode_put(vp);
1603                 }
1604                 break;
1605         }
1606
1607         /*
1608          * SPI (private) for opening a file starting from a dir fd
1609          */
1610         case F_OPENFROM: {
1611                 struct user_fopenfrom fopen;
1612                 struct vnode_attr va;
1613                 struct nameidata nd;
1614                 int cmode;
1615
1616                 /* Check if this isn't a valid file descriptor */
1617                 if ((fp->f_type != DTYPE_VNODE) ||
1618                     (fp->f_flag & FREAD) == 0) {
1619                         error = EBADF;
1620                         goto out;
1621                 }
1622                 vp = (struct vnode *)fp->f_data;
1623                 proc_fdunlock(p);
1624
1625                 if (vnode_getwithref(vp)) {
1626                         error = ENOENT;
1627                         goto outdrop;
1628                 }
1629
1630                 /* Only valid for directories */
1631                 if (vp->v_type != VDIR) {
1632                         vnode_put(vp);
1633                         error = ENOTDIR;
1634                         goto outdrop;
1635                 }
1636
1637                 /* Get flags, mode and pathname arguments. */
1638                 if (IS_64BIT_PROCESS(p)) {
1639                         error = copyin(argp, &fopen, sizeof(fopen));
1640                 } else {
1641                         struct user32_fopenfrom fopen32;
1642
1643                         error = copyin(argp, &fopen32, sizeof(fopen32));
1644                         fopen.o_flags = fopen32.o_flags;
1645                         fopen.o_mode = fopen32.o_mode;
1646                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1647                 }
1648                 if (error) {
1649                         vnode_put(vp);
1650                         goto outdrop;
1651                 }
1652                 AUDIT_ARG(fflags, fopen.o_flags);
1653                 AUDIT_ARG(mode, fopen.o_mode);
1654                 VATTR_INIT(&va);
1655                 /* Mask off all but regular access permissions */
1656                 cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1657                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1658
1659                 /* Start the lookup relative to the file descriptor's vnode. */
1660                 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
1661                        fopen.o_pathname, &context);
1662                 nd.ni_dvp = vp;
1663
1664                 error = open1(&context, &nd, fopen.o_flags, &va,
1665                               fileproc_alloc_init, NULL, retval);
1666
1667                 vnode_put(vp);
1668                 break;
1669         }
1670         /*
1671          * SPI (private) for unlinking a file starting from a dir fd
1672          */
1673         case F_UNLINKFROM: {
1674                 user_addr_t pathname;
1675
1676                 /* Check if this isn't a valid file descriptor */
1677                 if ((fp->f_type != DTYPE_VNODE) ||
1678                     (fp->f_flag & FREAD) == 0) {
1679                         error = EBADF;
1680                         goto out;
1681                 }
1682                 vp = (struct vnode *)fp->f_data;
1683                 proc_fdunlock(p);
1684
1685                 if (vnode_getwithref(vp)) {
1686                         error = ENOENT;
1687                         goto outdrop;
1688                 }
1689
1690                 /* Only valid for directories */
1691                 if (vp->v_type != VDIR) {
1692                         vnode_put(vp);
1693                         error = ENOTDIR;
1694                         goto outdrop;
1695                 }
1696
1697                 /* Get flags, mode and pathname arguments. */
1698                 if (IS_64BIT_PROCESS(p)) {
1699                         pathname = (user_addr_t)argp;
1700                 } else {
1701                         pathname = CAST_USER_ADDR_T(argp);
1702                 }
1703
1704                 /* Start the lookup relative to the file descriptor's vnode. */
1705                 error = unlink1(&context, vp, pathname, UIO_USERSPACE, 0);
1706
1707                 vnode_put(vp);
1708                 break;
1709
1710         }
1711
1712         case F_ADDSIGS:
1713         case F_ADDFILESIGS:
1714         case F_ADDFILESIGS_FOR_DYLD_SIM:
1715         case F_ADDFILESIGS_RETURN:
1716         {
1717                 struct cs_blob *blob = NULL;
1718                 struct user_fsignatures fs;
1719                 kern_return_t kr;
1720                 vm_offset_t kernel_blob_addr;
1721                 vm_size_t kernel_blob_size;
1722                 int blob_add_flags = 0;
1723
1724                 if (fp->f_type != DTYPE_VNODE) {
1725                         error = EBADF;
1726                         goto out;
1727                 }
1728                 vp = (struct vnode *)fp->f_data;
1729                 proc_fdunlock(p);
1730
1731                 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1732                         blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
1733                         if ((p->p_csflags & CS_KILL) == 0) {
1734                                 proc_lock(p);
1735                                 p->p_csflags |= CS_KILL;
1736                                 proc_unlock(p);
1737                         }
1738                 }
1739
1740                 error = vnode_getwithref(vp);
1741                 if (error)
1742                         goto outdrop;
1743
1744                 if (IS_64BIT_PROCESS(p)) {
1745                         error = copyin(argp, &fs, sizeof (fs));
1746                 } else {
1747                         struct user32_fsignatures fs32;
1748
1749                         error = copyin(argp, &fs32, sizeof (fs32));
1750                         fs.fs_file_start = fs32.fs_file_start;
1751                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1752                         fs.fs_blob_size = fs32.fs_blob_size;
1753                 }
1754
1755                 if (error) {
1756                         vnode_put(vp);
1757                         goto outdrop;
1758                 }
1759
1760                 /*
1761                  * First check if we have something loaded a this offset
1762                  */
1763                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start);
1764                 if (blob != NULL)
1765                 {
1766                         /* If this is for dyld_sim revalidate the blob */
1767                         if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1768                                 error = ubc_cs_blob_revalidate(vp, blob, blob_add_flags);
1769                         }
1770
1771                 } else {
1772                         /*
1773                          * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
1774                          * our use cases for the immediate future, but note that at the time of this commit, some
1775                          * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
1776                          *
1777                          * We should consider how we can manage this more effectively; the above means that some
1778                          * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
1779                          * threshold considered ridiculous at the time of this change.
1780                          */
1781 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
1782                         if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1783                                 error = E2BIG;
1784                                 vnode_put(vp);
1785                                 goto outdrop;
1786                         }
1787
1788                         kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1789                         kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1790                         if (kr != KERN_SUCCESS) {
1791                                 error = ENOMEM;
1792                                 vnode_put(vp);
1793                                 goto outdrop;
1794                         }
1795
1796                         if(uap->cmd == F_ADDSIGS) {
1797                                 error = copyin(fs.fs_blob_start,
1798                                                (void *) kernel_blob_addr,
1799                                                kernel_blob_size);
1800                         } else /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */ {
1801                                 int resid;
1802
1803                                 error = vn_rdwr(UIO_READ,
1804                                                 vp,
1805                                                 (caddr_t) kernel_blob_addr,
1806                                                 kernel_blob_size,
1807                                                 fs.fs_file_start + fs.fs_blob_start,
1808                                                 UIO_SYSSPACE,
1809                                                 0,
1810                                                 kauth_cred_get(),
1811                                                 &resid,
1812                                                 p);
1813                                 if ((error == 0) && resid) {
1814                                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
1815                                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
1816                                 }
1817                         }
1818
1819                         if (error) {
1820                                 ubc_cs_blob_deallocate(kernel_blob_addr,
1821                                                        kernel_blob_size);
1822                                 vnode_put(vp);
1823                                 goto outdrop;
1824                         }
1825
1826                         blob = NULL;
1827                         error = ubc_cs_blob_add(vp,
1828                                                 CPU_TYPE_ANY,   /* not for a specific architecture */
1829                                                 fs.fs_file_start,
1830                                                 kernel_blob_addr,
1831                                                 kernel_blob_size,
1832                                                 blob_add_flags,
1833                                                 &blob);
1834                         if (error) {
1835                                 ubc_cs_blob_deallocate(kernel_blob_addr,
1836                                                        kernel_blob_size);
1837                         } else {
1838                                 /* ubc_blob_add() has consumed "kernel_blob_addr" */
1839 #if CHECK_CS_VALIDATION_BITMAP
1840                                 ubc_cs_validation_bitmap_allocate( vp );
1841 #endif
1842                         }
1843                 }
1844
1845                 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1846                         /*
1847                          * The first element of the structure is a
1848                          * off_t that happen to have the same size for
1849                          * all archs. Lets overwrite that.
1850                          */
1851                         off_t end_offset = 0;
1852                         if (blob)
1853                                 end_offset = blob->csb_end_offset;
1854                         error = copyout(&end_offset, argp, sizeof (end_offset));
1855                 }
1856
1857                 (void) vnode_put(vp);
1858                 break;
1859         }
1860         case F_FINDSIGS: {
1861                 error = ENOTSUP;
1862                 goto out;
1863         }
1864 #if CONFIG_PROTECT
1865         case F_GETPROTECTIONCLASS: {
1866                 int class = 0;
1867
1868                 if (fp->f_type != DTYPE_VNODE) {
1869                         error = EBADF;
1870                         goto out;
1871                 }
1872                 vp = (struct vnode *)fp->f_data;
1873
1874                 proc_fdunlock(p);
1875
1876                 if (vnode_getwithref(vp)) {
1877                         error = ENOENT;
1878                         goto outdrop;
1879                 }
1880
1881                 error = cp_vnode_getclass (vp, &class);
1882                 if (error == 0) {
1883                         *retval = class;
1884                 }
1885
1886                 vnode_put(vp);
1887                 break;
1888         }
1889
1890         case F_SETPROTECTIONCLASS: {
1891                 /* tmp must be a valid PROTECTION_CLASS_* */
1892                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
1893
1894                 if (fp->f_type != DTYPE_VNODE) {
1895                         error = EBADF;
1896                         goto out;
1897                 }
1898                 vp = (struct vnode *)fp->f_data;
1899
1900                 proc_fdunlock(p);
1901
1902                 if (vnode_getwithref(vp)) {
1903                         error = ENOENT;
1904                         goto outdrop;
1905                 }
1906
1907                 /* Only go forward if you have write access */
1908                 vfs_context_t ctx = vfs_context_current();
1909                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
1910                         vnode_put(vp);
1911                         error = EBADF;
1912                         goto outdrop;
1913                 }
1914                 error = cp_vnode_setclass (vp, tmp);
1915                 vnode_put(vp);
1916                 break;
1917         }
1918
1919         case F_TRANSCODEKEY: {
1920
1921                 char *backup_keyp = NULL;
1922                 unsigned backup_key_len = CP_MAX_WRAPPEDKEYSIZE;
1923
1924                 if (fp->f_type != DTYPE_VNODE) {
1925                         error = EBADF;
1926                         goto out;
1927                 }
1928
1929                 vp = (struct vnode *)fp->f_data;
1930                 proc_fdunlock(p);
1931
1932                 if (vnode_getwithref(vp)) {
1933                         error = ENOENT;
1934                         goto outdrop;
1935                 }
1936
1937                 MALLOC(backup_keyp, char *, backup_key_len, M_TEMP, M_WAITOK);
1938                 if (backup_keyp == NULL) {
1939                         error = ENOMEM;
1940                         goto outdrop;
1941                 }
1942
1943                 error = cp_vnode_transcode (vp, backup_keyp, &backup_key_len);
1944                 vnode_put(vp);
1945
1946                 if (error == 0) {
1947                         error = copyout((caddr_t)backup_keyp, argp, backup_key_len);
1948                         *retval = backup_key_len;
1949                 }
1950
1951                 FREE(backup_keyp, M_TEMP);
1952
1953                 break;
1954         }
1955
1956         case F_GETPROTECTIONLEVEL:  {
1957                 uint32_t cp_version = 0;
1958
1959                 if (fp->f_type != DTYPE_VNODE) {
1960                         error = EBADF;
1961                         goto out;
1962                 }
1963
1964                 vp = (struct vnode*) fp->f_data;
1965                 proc_fdunlock (p);
1966
1967                 if (vnode_getwithref(vp)) {
1968                         error = ENOENT;
1969                         goto outdrop;
1970                 }
1971
1972                 /*
1973                  * if cp_get_major_vers fails, error will be set to proper errno
1974                  * and cp_version will still be 0.
1975                  */
1976
1977                 error = cp_get_root_major_vers (vp, &cp_version);
1978                 *retval = cp_version;
1979
1980                 vnode_put (vp);
1981                 break;
1982         }
1983
1984         case F_GETDEFAULTPROTLEVEL:  {
1985                 uint32_t cp_default = 0;
1986
1987                 if (fp->f_type != DTYPE_VNODE) {
1988                         error = EBADF;
1989                         goto out;
1990                 }
1991
1992                 vp = (struct vnode*) fp->f_data;
1993                 proc_fdunlock (p);
1994
1995                 if (vnode_getwithref(vp)) {
1996                         error = ENOENT;
1997                         goto outdrop;
1998                 }
1999
2000                 /*
2001                  * if cp_get_major_vers fails, error will be set to proper errno
2002                  * and cp_version will still be 0.
2003                  */
2004
2005                 error = cp_get_default_level(vp, &cp_default);
2006                 *retval = cp_default;
2007
2008                 vnode_put (vp);
2009                 break;
2010         }
2011
2012
2013 #endif /* CONFIG_PROTECT */
2014
2015         case F_MOVEDATAEXTENTS: {
2016                 struct fileproc *fp2 = NULL;
2017                 struct vnode *src_vp = NULLVP;
2018                 struct vnode *dst_vp = NULLVP;
2019                 /* We need to grab the 2nd FD out of the argments before moving on. */
2020                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2021
2022                 if (fp->f_type != DTYPE_VNODE) {
2023                         error = EBADF;
2024                         goto out;
2025                 }
2026
2027                 /* For now, special case HFS+ only, since this is SPI. */
2028                 src_vp = (struct vnode *)fp->f_data;
2029                 if (src_vp->v_tag != VT_HFS) {
2030                         error = EINVAL;
2031                         goto out;
2032                 }
2033
2034                 /*
2035                  * Get the references before we start acquiring iocounts on the vnodes,
2036                  * while we still hold the proc fd lock
2037                  */
2038                 if ( (error = fp_lookup(p, fd2, &fp2, 1)) ) {
2039                         error = EBADF;
2040                         goto out;
2041                 }
2042                 if (fp2->f_type != DTYPE_VNODE) {
2043                         fp_drop(p, fd2, fp2, 1);
2044                         error = EBADF;
2045                         goto out;
2046                 }
2047                 dst_vp = (struct vnode *)fp2->f_data;
2048                 if (dst_vp->v_tag != VT_HFS) {
2049                         fp_drop(p, fd2, fp2, 1);
2050                         error = EINVAL;
2051                         goto out;
2052                 }
2053
2054 #if CONFIG_MACF
2055                 /* Re-do MAC checks against the new FD, pass in a fake argument */
2056                 error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
2057                 if (error) {
2058                         fp_drop(p, fd2, fp2, 1);
2059                         goto out;
2060                 }
2061 #endif
2062                 /* Audit the 2nd FD */
2063                 AUDIT_ARG(fd, fd2);
2064
2065                 proc_fdunlock(p);
2066
2067                 if (vnode_getwithref(src_vp)) {
2068                         fp_drop(p, fd2, fp2, 0);
2069                         error = ENOENT;
2070                         goto outdrop;
2071                 }
2072                 if (vnode_getwithref(dst_vp)) {
2073                         vnode_put (src_vp);
2074                         fp_drop(p, fd2, fp2, 0);
2075                         error = ENOENT;
2076                         goto outdrop;
2077                 }
2078
2079                 /*
2080                  * Basic asserts; validate they are not the same and that
2081                  * both live on the same filesystem.
2082                  */
2083                 if (dst_vp == src_vp) {
2084                         vnode_put (src_vp);
2085                         vnode_put (dst_vp);
2086                         fp_drop (p, fd2, fp2, 0);
2087                         error = EINVAL;
2088                         goto outdrop;
2089                 }
2090
2091                 if (dst_vp->v_mount != src_vp->v_mount) {
2092                         vnode_put (src_vp);
2093                         vnode_put (dst_vp);
2094                         fp_drop (p, fd2, fp2, 0);
2095                         error = EXDEV;
2096                         goto outdrop;
2097                 }
2098
2099                 /* Now we have a legit pair of FDs.  Go to work */
2100
2101                 /* Now check for write access to the target files */
2102                 if(vnode_authorize(src_vp, NULLVP,
2103                                                    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2104                         vnode_put(src_vp);
2105                         vnode_put(dst_vp);
2106                         fp_drop(p, fd2, fp2, 0);
2107                         error = EBADF;
2108                         goto outdrop;
2109                 }
2110
2111                 if(vnode_authorize(dst_vp, NULLVP,
2112                                                    (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2113                         vnode_put(src_vp);
2114                         vnode_put(dst_vp);
2115                         fp_drop(p, fd2, fp2, 0);
2116                         error = EBADF;
2117                         goto outdrop;
2118                 }
2119
2120                 /* Verify that both vps point to files and not directories */
2121                 if ( !vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2122                         error = EINVAL;
2123                         vnode_put (src_vp);
2124                         vnode_put (dst_vp);
2125                         fp_drop (p, fd2, fp2, 0);
2126                         goto outdrop;
2127                 }
2128
2129                 /*
2130                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2131                  * We'll pass in our special bit indicating that the new behavior is expected
2132                  */
2133
2134                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2135
2136                 vnode_put (src_vp);
2137                 vnode_put (dst_vp);
2138                 fp_drop(p, fd2, fp2, 0);
2139                 break;
2140         }
2141
2142         /*
2143          * SPI for making a file compressed.
2144          */
2145         case F_MAKECOMPRESSED: {
2146                 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2147
2148                 if (fp->f_type != DTYPE_VNODE) {
2149                         error = EBADF;
2150                         goto out;
2151                 }
2152
2153                 vp = (struct vnode*) fp->f_data;
2154                 proc_fdunlock (p);
2155
2156                 /* get the vnode */
2157                 if (vnode_getwithref(vp)) {
2158                         error = ENOENT;
2159                         goto outdrop;
2160                 }
2161
2162                 /* Is it a file? */
2163                 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2164                         vnode_put(vp);
2165                         error = EBADF;
2166                         goto outdrop;
2167                 }
2168
2169                 /* invoke ioctl to pass off to FS */
2170                 /* Only go forward if you have write access */
2171                 vfs_context_t ctx = vfs_context_current();
2172                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2173                         vnode_put(vp);
2174                         error = EBADF;
2175                         goto outdrop;
2176                 }
2177
2178                 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2179
2180                 vnode_put (vp);
2181                 break;
2182         }
2183
2184         /*
2185          * SPI (private) for indicating to a filesystem that subsequent writes to
2186          * the open FD will written to the Fastflow.
2187          */
2188         case F_SET_GREEDY_MODE:
2189                 /* intentionally drop through to the same handler as F_SETSTATIC.
2190                  * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2191                  */
2192
2193         /*
2194          * SPI (private) for indicating to a filesystem that subsequent writes to
2195          * the open FD will represent static content.
2196          */
2197         case F_SETSTATICCONTENT: {
2198                 caddr_t ioctl_arg = NULL;
2199
2200                 if (uap->arg) {
2201                         ioctl_arg = (caddr_t) 1;
2202                 }
2203
2204                 if (fp->f_type != DTYPE_VNODE) {
2205                         error = EBADF;
2206                         goto out;
2207                 }
2208                 vp = (struct vnode *)fp->f_data;
2209                 proc_fdunlock(p);
2210
2211                 error = vnode_getwithref(vp);
2212                 if (error) {
2213                         error = ENOENT;
2214                         goto outdrop;
2215                 }
2216
2217                 /* Only go forward if you have write access */
2218                 vfs_context_t ctx = vfs_context_current();
2219                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2220                         vnode_put(vp);
2221                         error = EBADF;
2222                         goto outdrop;
2223                 }
2224
2225                 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2226                 (void)vnode_put(vp);
2227
2228                 break;
2229         }
2230
2231         /*
2232          * SPI (private) for indicating to the lower level storage driver that the
2233          * subsequent writes should be of a particular IO type (burst, greedy, static),
2234          * or other flavors that may be necessary.
2235          */
2236         case F_SETIOTYPE: {
2237                 caddr_t param_ptr;
2238                 uint32_t param;
2239
2240                 if (uap->arg) {
2241                         /* extract 32 bits of flags from userland */
2242                         param_ptr = (caddr_t) uap->arg;
2243                         param = (uint32_t) param_ptr;
2244                 }
2245                 else {
2246                         /* If no argument is specified, error out */
2247                         error = EINVAL;
2248                         goto out;
2249                 }
2250
2251                 /*
2252                  * Validate the different types of flags that can be specified:
2253                  * all of them are mutually exclusive for now.
2254                  */
2255                 switch (param) {
2256                         case F_IOTYPE_ISOCHRONOUS:
2257                                 break;
2258
2259                         default:
2260                                 error = EINVAL;
2261                                 goto out;
2262                 }
2263
2264
2265                 if (fp->f_type != DTYPE_VNODE) {
2266                         error = EBADF;
2267                         goto out;
2268                 }
2269                 vp = (struct vnode *)fp->f_data;
2270                 proc_fdunlock(p);
2271
2272                 error = vnode_getwithref(vp);
2273                 if (error) {
2274                         error = ENOENT;
2275                         goto outdrop;
2276                 }
2277
2278                 /* Only go forward if you have write access */
2279                 vfs_context_t ctx = vfs_context_current();
2280                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2281                         vnode_put(vp);
2282                         error = EBADF;
2283                         goto outdrop;
2284                 }
2285
2286                 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2287                 (void)vnode_put(vp);
2288
2289                 break;
2290         }
2291
2292
2293         /*
2294          * Extract the CodeDirectory of the vnode associated with
2295          * the file descriptor and copy it back to user space
2296          */
2297         case F_GETCODEDIR: {
2298                 struct user_fcodeblobs args;
2299
2300                 if (fp->f_type != DTYPE_VNODE) {
2301                         error = EBADF;
2302                         goto out;
2303                 }
2304
2305                 vp = (struct vnode *)fp->f_data;
2306                 proc_fdunlock(p);
2307
2308                 if ((fp->f_flag & FREAD) == 0) {
2309                         error = EBADF;
2310                         goto outdrop;
2311                 }
2312
2313                 if (IS_64BIT_PROCESS(p)) {
2314                         struct user64_fcodeblobs args64;
2315
2316                         error = copyin(argp, &args64, sizeof(args64));
2317                         if (error)
2318                                 goto outdrop;
2319
2320                         args.f_cd_hash = args64.f_cd_hash;
2321                         args.f_hash_size = args64.f_hash_size;
2322                         args.f_cd_buffer = args64.f_cd_buffer;
2323                         args.f_cd_size = args64.f_cd_size;
2324                         args.f_out_size = args64.f_out_size;
2325                         args.f_arch = args64.f_arch;
2326                 } else {
2327                         struct user32_fcodeblobs args32;
2328
2329                         error = copyin(argp, &args32, sizeof(args32));
2330                         if (error)
2331                                 goto outdrop;
2332
2333                         args.f_cd_hash = CAST_USER_ADDR_T(args32.f_cd_hash);
2334                         args.f_hash_size = args32.f_hash_size;
2335                         args.f_cd_buffer = CAST_USER_ADDR_T(args32.f_cd_buffer);
2336                         args.f_cd_size = args32.f_cd_size;
2337                         args.f_out_size = CAST_USER_ADDR_T(args32.f_out_size);
2338                         args.f_arch = args32.f_arch;
2339                 }
2340
2341                 if (vp->v_ubcinfo == NULL) {
2342                         error = EINVAL;
2343                         goto outdrop;
2344                 }
2345
2346                 struct cs_blob *t_blob = vp->v_ubcinfo->cs_blobs;
2347
2348                 /*
2349                  * This call fails if there is no cs_blob corresponding to the
2350                  * vnode, or if there are multiple cs_blobs present, and the caller
2351                  * did not specify which cpu_type they want the cs_blob for
2352                  */
2353                 if (t_blob == NULL) {
2354                         error = ENOENT; /* there is no codesigning blob for this process */
2355                         goto outdrop;
2356                 } else if (args.f_arch == 0 && t_blob->csb_next != NULL) {
2357                         error = ENOENT; /* too many architectures and none specified */
2358                         goto outdrop;
2359                 }
2360
2361                 /* If the user specified an architecture, find the right blob */
2362                 if (args.f_arch != 0) {
2363                         while (t_blob) {
2364                                 if (t_blob->csb_cpu_type == args.f_arch)
2365                                         break;
2366                                 t_blob = t_blob->csb_next;
2367                         }
2368                         /* The cpu_type the user requested could not be found */
2369                         if (t_blob == NULL) {
2370                                 error = ENOENT;
2371                                 goto outdrop;
2372                         }
2373                 }
2374
2375                 const CS_SuperBlob *super_blob = (void *)t_blob->csb_mem_kaddr;
2376                 const CS_CodeDirectory *cd = findCodeDirectory(super_blob,
2377                                                          (const char *) super_blob,
2378                                                          (const char *) super_blob + t_blob->csb_mem_size);
2379                 if (cd == NULL) {
2380                         error = ENOENT;
2381                         goto outdrop;
2382                 }
2383
2384                 uint64_t buffer_size = ntohl(cd->length);
2385
2386                 if (buffer_size > UINT_MAX) {
2387                         error = ERANGE;
2388                         goto outdrop;
2389                 }
2390
2391                 error = copyout(&buffer_size, args.f_out_size, sizeof(unsigned int));
2392                 if (error)
2393                         goto outdrop;
2394
2395                 if (sizeof(t_blob->csb_cdhash) > args.f_hash_size ||
2396                                         buffer_size > args.f_cd_size) {
2397                         error = ERANGE;
2398                         goto outdrop;
2399                 }
2400
2401                 error = copyout(t_blob->csb_cdhash, args.f_cd_hash, sizeof(t_blob->csb_cdhash));
2402                 if (error)
2403                         goto outdrop;
2404                 error = copyout(cd, args.f_cd_buffer, buffer_size);
2405                 if (error)
2406                         goto outdrop;
2407
2408                 break;
2409         }
2410
2411         /*
2412          * Set the vnode pointed to by 'fd'
2413          * and tag it as the (potentially future) backing store
2414          * for another filesystem
2415          */
2416         case F_SETBACKINGSTORE: {
2417                 if (fp->f_type != DTYPE_VNODE) {
2418                         error = EBADF;
2419                         goto out;
2420                 }
2421
2422                 vp = (struct vnode *)fp->f_data;
2423
2424                 if (vp->v_tag != VT_HFS) {
2425                         error = EINVAL;
2426                         goto out;
2427                 }
2428                 proc_fdunlock(p);
2429
2430                 if (vnode_getwithref(vp)) {
2431                         error = ENOENT;
2432                         goto outdrop;
2433                 }
2434
2435                 /* only proceed if you have write access */
2436                 vfs_context_t ctx = vfs_context_current();
2437                 if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2438                         vnode_put(vp);
2439                         error = EBADF;
2440                         goto outdrop;
2441                 }
2442
2443
2444                 /* If arg != 0, set, otherwise unset */
2445                 if (uap->arg) {
2446                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)1, 0, &context);
2447                 }
2448                 else {
2449                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t)NULL, 0, &context);
2450                 }
2451
2452                 vnode_put(vp);
2453                 break;
2454         }
2455
2456         /*
2457          * like F_GETPATH, but special semantics for
2458          * the mobile time machine handler.
2459          */
2460         case F_GETPATH_MTMINFO: {
2461                 char *pathbufp;
2462                 int pathlen;
2463
2464                 if (fp->f_type != DTYPE_VNODE) {
2465                         error = EBADF;
2466                         goto out;
2467                 }
2468                 vp = (struct vnode *)fp->f_data;
2469                 proc_fdunlock(p);
2470
2471                 pathlen = MAXPATHLEN;
2472                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
2473                 if (pathbufp == NULL) {
2474                         error = ENOMEM;
2475                         goto outdrop;
2476                 }
2477                 if ( (error = vnode_getwithref(vp)) == 0 ) {
2478                         int backingstore = 0;
2479
2480                         /* Check for error from vn_getpath before moving on */
2481                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
2482                                 if (vp->v_tag == VT_HFS) {
2483                                         error = VNOP_IOCTL (vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
2484                                 }
2485                                 (void)vnode_put(vp);
2486
2487                                 if (error == 0) {
2488                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
2489                                 }
2490                                 if (error == 0) {
2491                                         /*
2492                                          * If the copyout was successful, now check to ensure
2493                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
2494                                          * wants the path regardless.
2495                                          */
2496                                         if (backingstore) {
2497                                                 error = EBUSY;
2498                                         }
2499                                 }
2500                         } else
2501                                 (void)vnode_put(vp);
2502                 }
2503                 FREE(pathbufp, M_TEMP);
2504                 goto outdrop;
2505         }
2506
2507 #if DEBUG || DEVELOPMENT
2508         case F_RECYCLE:
2509                 if (fp->f_type != DTYPE_VNODE) {
2510                         error = EBADF;
2511                         goto out;
2512                 }
2513                 vp = (struct vnode *)fp->f_data;
2514                 proc_fdunlock(p);
2515
2516                 vnode_recycle(vp);
2517                 break;
2518 #endif
2519
2520         default:
2521                 /*
2522                  * This is an fcntl() that we d not recognize at this level;
2523                  * if this is a vnode, we send it down into the VNOP_IOCTL
2524                  * for this vnode; this can include special devices, and will
2525                  * effectively overload fcntl() to send ioctl()'s.
2526                  */
2527                 if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
2528             error = EINVAL;
2529                         goto out;
2530                 }
2531
2532                 /* Catch any now-invalid fcntl() selectors */
2533                 switch (uap->cmd) {
2534                         case F_MARKDEPENDENCY:
2535                                 error = EINVAL;
2536                                 goto out;
2537                         default:
2538                                 break;
2539                 }
2540
2541                 if (fp->f_type != DTYPE_VNODE) {
2542                         error = EBADF;
2543                         goto out;
2544                 }
2545                 vp = (struct vnode *)fp->f_data;
2546                 proc_fdunlock(p);
2547
2548                 if ( (error = vnode_getwithref(vp)) == 0 ) {
2549 #define STK_PARAMS 128
2550                         char stkbuf[STK_PARAMS];
2551                         unsigned int size;
2552                         caddr_t data, memp;
2553                         /*
2554                          * For this to work properly, we have to copy in the
2555                          * ioctl() cmd argument if there is one; we must also
2556                          * check that a command parameter, if present, does
2557                          * not exceed the maximum command length dictated by
2558                          * the number of bits we have available in the command
2559                          * to represent a structure length.  Finally, we have
2560                          * to copy the results back out, if it is that type of
2561                          * ioctl().
2562                          */
2563                         size = IOCPARM_LEN(uap->cmd);
2564                         if (size > IOCPARM_MAX) {
2565                                 (void)vnode_put(vp);
2566                                 error = EINVAL;
2567                                 break;
2568                         }
2569
2570                         memp = NULL;
2571                         if (size > sizeof (stkbuf)) {
2572                                 if ((memp = (caddr_t)kalloc(size)) == 0) {
2573                                         (void)vnode_put(vp);
2574                                         error = ENOMEM;
2575                                         goto outdrop;
2576                                 }
2577                                 data = memp;
2578                         } else {
2579                                 data = &stkbuf[0];
2580                         }
2581
2582                         if (uap->cmd & IOC_IN) {
2583                                 if (size) {
2584                                         /* structure */
2585                                         error = copyin(argp, data, size);
2586                                         if (error) {
2587                                                 (void)vnode_put(vp);
2588                                                 if (memp)
2589                                                         kfree(memp, size);
2590                                                 goto outdrop;
2591                                         }
2592
2593                                         /* Bzero the section beyond that which was needed */
2594                                         if (size <= sizeof(stkbuf)) {
2595                                                 bzero ( (((uint8_t*)data) + size), (sizeof(stkbuf) - size));
2596                                         }
2597                                 } else {
2598                                         /* int */
2599                                         if (is64bit) {
2600                                                 *(user_addr_t *)data = argp;
2601                                         } else {
2602                                                 *(uint32_t *)data = (uint32_t)argp;
2603                                         }
2604                                 };
2605                         } else if ((uap->cmd & IOC_OUT) && size) {
2606                                 /*
2607                                  * Zero the buffer so the user always
2608                                  * gets back something deterministic.
2609                                  */
2610                                 bzero(data, size);
2611                         } else if (uap->cmd & IOC_VOID) {
2612                                 if (is64bit) {
2613                                     *(user_addr_t *)data = argp;
2614                                 } else {
2615                                     *(uint32_t *)data = (uint32_t)argp;
2616                                 }
2617                         }
2618
2619                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
2620
2621                         (void)vnode_put(vp);
2622
2623                         /* Copy any output data to user */
2624                         if (error == 0 && (uap->cmd & IOC_OUT) && size)
2625                                 error = copyout(data, argp, size);
2626                         if (memp)
2627                                 kfree(memp, size);
2628                 }
2629                 break;
2630         }
2631
2632 outdrop:
2633         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2634         fp_drop(p, fd, fp, 0);
2635         return(error);
2636 out:
2637         fp_drop(p, fd, fp, 1);
2638         proc_fdunlock(p);
2639         return(error);
2640 }
2641
2642
2643 /*
2644  * finishdup
2645  *
2646  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
2647  *
2648  * Parameters:  p                               Process performing the dup
2649  *              old                             The fd to dup
2650  *              new                             The fd to dup it to
2651  *              fd_flags                        Flags to augment the new fd
2652  *              retval                          Pointer to the call return area
2653  *
2654  * Returns:     0                               Success
2655  *              EBADF
2656  *              ENOMEM
2657  *
2658  * Implicit returns:
2659  *              *retval (modified)              The new descriptor
2660  *
2661  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
2662  *              the caller
2663  *
2664  * Notes:       This function may drop and reacquire this lock; it is unsafe
2665  *              for a caller to assume that other state protected by the lock
2666  *              has not been subsequently changed out from under it.
2667  */
2668 int
2669 finishdup(proc_t p,
2670     struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
2671 {
2672         struct fileproc *nfp;
2673         struct fileproc *ofp;
2674 #if CONFIG_MACF
2675         int error;
2676 #endif
2677
2678 #if DIAGNOSTIC
2679         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2680 #endif
2681         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
2682             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
2683                 fdrelse(p, new);
2684                 return (EBADF);
2685         }
2686         fg_ref(ofp);
2687
2688 #if CONFIG_MACF
2689         error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2690         if (error) {
2691                 fg_drop(ofp);
2692                 fdrelse(p, new);
2693                 return (error);
2694         }
2695 #endif
2696
2697         proc_fdunlock(p);
2698
2699         nfp = fileproc_alloc_init(NULL);
2700
2701         proc_fdlock(p);
2702
2703         if (nfp == NULL) {
2704                 fg_drop(ofp);
2705                 fdrelse(p, new);
2706                 return (ENOMEM);
2707         }
2708
2709         nfp->f_fglob = ofp->f_fglob;
2710
2711 #if DIAGNOSTIC
2712         if (fdp->fd_ofiles[new] != 0)
2713                 panic("finishdup: overwriting fd_ofiles with new %d", new);
2714         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
2715                 panic("finishdup: unreserved fileflags with new %d", new);
2716 #endif
2717
2718         if (new > fdp->fd_lastfile)
2719                 fdp->fd_lastfile = new;
2720         *fdflags(p, new) |= fd_flags;
2721         procfdtbl_releasefd(p, new, nfp);
2722         *retval = new;
2723         return (0);
2724 }
2725
2726
2727 /*
2728  * close
2729  *
2730  * Description: The implementation of the close(2) system call
2731  *
2732  * Parameters:  p                       Process in whose per process file table
2733  *                                      the close is to occur
2734  *              uap->fd                 fd to be closed
2735  *              retval                  <unused>
2736  *
2737  * Returns:     0                       Success
2738  *      fp_lookup:EBADF                 Bad file descriptor
2739  *      fp_guard_exception:???          Guarded file descriptor
2740  *      close_internal:EBADF
2741  *      close_internal:???              Anything returnable by a per-fileops
2742  *                                      close function
2743  */
2744 int
2745 close(proc_t p, struct close_args *uap, int32_t *retval)
2746 {
2747         __pthread_testcancel(1);
2748         return(close_nocancel(p, (struct close_nocancel_args *)uap, retval));
2749 }
2750
2751
2752 int
2753 close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
2754 {
2755         struct fileproc *fp;
2756         int fd = uap->fd;
2757         int error;
2758
2759         AUDIT_SYSCLOSE(p, fd);
2760
2761         proc_fdlock(p);
2762
2763         if ( (error = fp_lookup(p,fd,&fp, 1)) ) {
2764                 proc_fdunlock(p);
2765                 return(error);
2766         }
2767
2768         if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
2769                 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
2770                 (void) fp_drop(p, fd, fp, 1);
2771                 proc_fdunlock(p);
2772                 return (error);
2773         }
2774
2775         error = close_internal_locked(p, fd, fp, 0);
2776
2777         proc_fdunlock(p);
2778
2779         return (error);
2780 }
2781
2782
2783 /*
2784  * close_internal_locked
2785  *
2786  * Close a file descriptor.
2787  *
2788  * Parameters:  p                       Process in whose per process file table
2789  *                                      the close is to occur
2790  *              fd                      fd to be closed
2791  *              fp                      fileproc associated with the fd
2792  *
2793  * Returns:     0                       Success
2794  *              EBADF                   fd already in close wait state
2795  *      closef_locked:???               Anything returnable by a per-fileops
2796  *                                      close function
2797  *
2798  * Locks:       Assumes proc_fdlock for process is held by the caller and returns
2799  *              with lock held
2800  *
2801  * Notes:       This function may drop and reacquire this lock; it is unsafe
2802  *              for a caller to assume that other state protected by the lock
2803  *              has not been subsequently changed out from under it.
2804  */
2805 int
2806 close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
2807 {
2808         struct filedesc *fdp = p->p_fd;
2809         int error =0;
2810         int resvfd = flags & FD_DUP2RESV;
2811
2812
2813 #if DIAGNOSTIC
2814         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2815 #endif
2816
2817         /* Keep people from using the filedesc while we are closing it */
2818         procfdtbl_markclosefd(p, fd);
2819
2820
2821         if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2822                 panic("close_internal_locked: being called on already closing fd");
2823         }
2824
2825
2826 #if DIAGNOSTIC
2827         if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2828                 panic("close_internal: unreserved fileflags with fd %d", fd);
2829 #endif
2830
2831         fp->f_flags |= FP_CLOSING;
2832
2833         if ( (fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners() ) {
2834
2835                 proc_fdunlock(p);
2836
2837                 if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) {
2838                         /*
2839                          * call out to allow 3rd party notification of close.
2840                          * Ignore result of kauth_authorize_fileop call.
2841                          */
2842                         if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
2843                                 u_int   fileop_flags = 0;
2844                                 if ((fp->f_flags & FP_WRITTEN) != 0)
2845                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
2846                                 kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2847                                                        (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2848                                 vnode_put((vnode_t)fp->f_data);
2849                         }
2850                 }
2851                 if (fp->f_flags & FP_AIOISSUED)
2852                         /*
2853                          * cancel all async IO requests that can be cancelled.
2854                          */
2855                         _aio_close( p, fd );
2856
2857                 proc_fdlock(p);
2858         }
2859
2860         if (fd < fdp->fd_knlistsize)
2861                 knote_fdclose(p, fd);
2862
2863         if (fp->f_flags & FP_WAITEVENT)
2864                 (void)waitevent_close(p, fp);
2865
2866         fileproc_drain(p, fp);
2867
2868         if (resvfd == 0) {
2869                 _fdrelse(p, fd);
2870         } else {
2871                 procfdtbl_reservefd(p, fd);
2872         }
2873
2874         if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET)
2875                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
2876                     fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
2877
2878         error = closef_locked(fp, fp->f_fglob, p);
2879         if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE)
2880                 wakeup(&fp->f_flags);
2881         fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
2882
2883         proc_fdunlock(p);
2884
2885         fileproc_free(fp);
2886
2887         proc_fdlock(p);
2888
2889 #if DIAGNOSTIC
2890         if (resvfd != 0) {
2891                 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
2892                         panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
2893         }
2894 #endif
2895
2896         return(error);
2897 }
2898
2899
2900 /*
2901  * fstat1
2902  *
2903  * Description: Return status information about a file descriptor.
2904  *
2905  * Parameters:  p                               The process doing the fstat
2906  *              fd                              The fd to stat
2907  *              ub                              The user stat buffer
2908  *              xsecurity                       The user extended security
2909  *                                              buffer, or 0 if none
2910  *              xsecurity_size                  The size of xsecurity, or 0
2911  *                                              if no xsecurity
2912  *              isstat64                        Flag to indicate 64 bit version
2913  *                                              for inode size, etc.
2914  *
2915  * Returns:     0                               Success
2916  *              EBADF
2917  *              EFAULT
2918  *      fp_lookup:EBADF                         Bad file descriptor
2919  *      vnode_getwithref:???
2920  *      copyout:EFAULT
2921  *      vnode_getwithref:???
2922  *      vn_stat:???
2923  *      soo_stat:???
2924  *      pipe_stat:???
2925  *      pshm_stat:???
2926  *      kqueue_stat:???
2927  *
2928  * Notes:       Internal implementation for all other fstat() related
2929  *              functions
2930  *
2931  *              XXX switch on node type is bogus; need a stat in struct
2932  *              XXX fileops instead.
2933  */
2934 static int
2935 fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
2936 {
2937         struct fileproc *fp;
2938         union {
2939                 struct stat sb;
2940                 struct stat64 sb64;
2941         } source;
2942         union {
2943                 struct user64_stat user64_sb;
2944                 struct user32_stat user32_sb;
2945                 struct user64_stat64 user64_sb64;
2946                 struct user32_stat64 user32_sb64;
2947         } dest;
2948         int error, my_size;
2949         file_type_t type;
2950         caddr_t data;
2951         kauth_filesec_t fsec;
2952         user_size_t xsecurity_bufsize;
2953         vfs_context_t ctx = vfs_context_current();
2954         void * sbptr;
2955
2956
2957         AUDIT_ARG(fd, fd);
2958
2959         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
2960                 return(error);
2961         }
2962         type = fp->f_type;
2963         data = fp->f_data;
2964         fsec = KAUTH_FILESEC_NONE;
2965
2966         sbptr = (void *)&source;
2967
2968         switch (type) {
2969
2970         case DTYPE_VNODE:
2971                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
2972                         /*
2973                          * If the caller has the file open, and is not
2974                          * requesting extended security information, we are
2975                          * going to let them get the basic stat information.
2976                          */
2977                         if (xsecurity == USER_ADDR_NULL) {
2978                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx);
2979                         } else {
2980                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx);
2981                         }
2982
2983                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
2984                         (void)vnode_put((vnode_t)data);
2985                 }
2986                 break;
2987
2988 #if SOCKETS
2989         case DTYPE_SOCKET:
2990                 error = soo_stat((struct socket *)data, sbptr, isstat64);
2991                 break;
2992 #endif /* SOCKETS */
2993
2994         case DTYPE_PIPE:
2995                 error = pipe_stat((void *)data, sbptr, isstat64);
2996                 break;
2997
2998         case DTYPE_PSXSHM:
2999                 error = pshm_stat((void *)data, sbptr, isstat64);
3000                 break;
3001
3002         case DTYPE_KQUEUE:
3003                 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3004                 break;
3005
3006         default:
3007                 error = EBADF;
3008                 goto out;
3009         }
3010         if (error == 0) {
3011                 caddr_t sbp;
3012
3013                 if (isstat64 != 0) {
3014                         source.sb64.st_lspare = 0;
3015                         source.sb64.st_qspare[0] = 0LL;
3016                         source.sb64.st_qspare[1] = 0LL;
3017
3018                         if (IS_64BIT_PROCESS(current_proc())) {
3019                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3020                                 my_size = sizeof(dest.user64_sb64);
3021                                 sbp = (caddr_t)&dest.user64_sb64;
3022                         } else {
3023                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3024                                 my_size = sizeof(dest.user32_sb64);
3025                                 sbp = (caddr_t)&dest.user32_sb64;
3026                         }
3027                 } else {
3028                         source.sb.st_lspare = 0;
3029                         source.sb.st_qspare[0] = 0LL;
3030                         source.sb.st_qspare[1] = 0LL;
3031                         if (IS_64BIT_PROCESS(current_proc())) {
3032                                 munge_user64_stat(&source.sb, &dest.user64_sb);
3033                                 my_size = sizeof(dest.user64_sb);
3034                                 sbp = (caddr_t)&dest.user64_sb;
3035                         } else {
3036                                 munge_user32_stat(&source.sb, &dest.user32_sb);
3037                                 my_size = sizeof(dest.user32_sb);
3038                                 sbp = (caddr_t)&dest.user32_sb;
3039                         }
3040                 }
3041
3042                 error = copyout(sbp, ub, my_size);
3043         }
3044
3045         /* caller wants extended security information? */
3046         if (xsecurity != USER_ADDR_NULL) {
3047
3048                 /* did we get any? */
3049                  if (fsec == KAUTH_FILESEC_NONE) {
3050                         if (susize(xsecurity_size, 0) != 0) {
3051                                 error = EFAULT;
3052                                 goto out;
3053                         }
3054                 } else {
3055                         /* find the user buffer size */
3056                         xsecurity_bufsize = fusize(xsecurity_size);
3057
3058                         /* copy out the actual data size */
3059                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3060                                 error = EFAULT;
3061                                 goto out;
3062                         }
3063
3064                         /* if the caller supplied enough room, copy out to it */
3065                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3066                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3067                 }
3068         }
3069 out:
3070         fp_drop(p, fd, fp, 0);
3071         if (fsec != NULL)
3072                 kauth_filesec_free(fsec);
3073         return (error);
3074 }
3075
3076
3077 /*
3078  * fstat_extended
3079  *
3080  * Description: Extended version of fstat supporting returning extended
3081  *              security information
3082  *
3083  * Parameters:  p                               The process doing the fstat
3084  *              uap->fd                         The fd to stat
3085  *              uap->ub                         The user stat buffer
3086  *              uap->xsecurity                  The user extended security
3087  *                                              buffer, or 0 if none
3088  *              uap->xsecurity_size             The size of xsecurity, or 0
3089  *
3090  * Returns:     0                               Success
3091  *              !0                              Errno (see fstat1)
3092  */
3093 int
3094 fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3095 {
3096         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3097 }
3098
3099
3100 /*
3101  * fstat
3102  *
3103  * Description: Get file status for the file associated with fd
3104  *
3105  * Parameters:  p                               The process doing the fstat
3106  *              uap->fd                         The fd to stat
3107  *              uap->ub                         The user stat buffer
3108  *
3109  * Returns:     0                               Success
3110  *              !0                              Errno (see fstat1)
3111  */
3112 int
3113 fstat(proc_t p, register struct fstat_args *uap, __unused int32_t *retval)
3114 {
3115         return(fstat1(p, uap->fd, uap->ub, 0, 0, 0));
3116 }
3117
3118
3119 /*
3120  * fstat64_extended
3121  *
3122  * Description: Extended version of fstat64 supporting returning extended
3123  *              security information
3124  *
3125  * Parameters:  p                               The process doing the fstat
3126  *              uap->fd                         The fd to stat
3127  *              uap->ub                         The user stat buffer
3128  *              uap->xsecurity                  The user extended security
3129  *                                              buffer, or 0 if none
3130  *              uap->xsecurity_size             The size of xsecurity, or 0
3131  *
3132  * Returns:     0                               Success
3133  *              !0                              Errno (see fstat1)
3134  */
3135 int
3136 fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3137 {
3138         return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3139 }
3140
3141
3142 /*
3143  * fstat64
3144  *
3145  * Description: Get 64 bit version of the file status for the file associated
3146  *              with fd
3147  *
3148  * Parameters:  p                               The process doing the fstat
3149  *              uap->fd                         The fd to stat
3150  *              uap->ub                         The user stat buffer
3151  *
3152  * Returns:     0                               Success
3153  *              !0                              Errno (see fstat1)
3154  */
3155 int
3156 fstat64(proc_t p, register struct fstat64_args *uap, __unused int32_t *retval)
3157 {
3158         return(fstat1(p, uap->fd, uap->ub, 0, 0, 1));
3159 }
3160
3161
3162 /*
3163  * fpathconf
3164  *
3165  * Description: Return pathconf information about a file descriptor.
3166  *
3167  * Parameters:  p                               Process making the request
3168  *              uap->fd                         fd to get information about
3169  *              uap->name                       Name of information desired
3170  *              retval                          Pointer to the call return area
3171  *
3172  * Returns:     0                               Success
3173  *              EINVAL
3174  *      fp_lookup:EBADF                         Bad file descriptor
3175  *      vnode_getwithref:???
3176  *      vn_pathconf:???
3177  *
3178  * Implicit returns:
3179  *              *retval (modified)              Returned information (numeric)
3180  */
3181 int
3182 fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3183 {
3184         int fd = uap->fd;
3185         struct fileproc *fp;
3186         struct vnode *vp;
3187         int error = 0;
3188         file_type_t type;
3189         caddr_t data;
3190
3191
3192         AUDIT_ARG(fd, uap->fd);
3193         if ( (error = fp_lookup(p, fd, &fp, 0)) )
3194                 return(error);
3195         type = fp->f_type;
3196         data = fp->f_data;
3197
3198         switch (type) {
3199
3200         case DTYPE_SOCKET:
3201                 if (uap->name != _PC_PIPE_BUF) {
3202                         error = EINVAL;
3203                         goto out;
3204                 }
3205                 *retval = PIPE_BUF;
3206                 error = 0;
3207                 goto out;
3208
3209         case DTYPE_PIPE:
3210                 if (uap->name != _PC_PIPE_BUF) {
3211                         error = EINVAL;
3212                         goto out;
3213                 }
3214                 *retval = PIPE_BUF;
3215                 error = 0;
3216                 goto out;
3217
3218         case DTYPE_VNODE:
3219                 vp = (struct vnode *)data;
3220
3221                 if ( (error = vnode_getwithref(vp)) == 0) {
3222                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3223
3224                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3225
3226                         (void)vnode_put(vp);
3227                 }
3228                 goto out;
3229
3230         default:
3231                 error = EINVAL;
3232                 goto out;
3233
3234         }
3235         /*NOTREACHED*/
3236 out:
3237         fp_drop(p, fd, fp, 0);
3238         return(error);
3239 }
3240
3241 /*
3242  * Statistics counter for the number of times a process calling fdalloc()
3243  * has resulted in an expansion of the per process open file table.
3244  *
3245  * XXX This would likely be of more use if it were per process
3246  */
3247 int fdexpand;
3248
3249
3250 /*
3251  * fdalloc
3252  *
3253  * Description: Allocate a file descriptor for the process.
3254  *
3255  * Parameters:  p                               Process to allocate the fd in
3256  *              want                            The fd we would prefer to get
3257  *              result                          Pointer to fd we got
3258  *
3259  * Returns:     0                               Success
3260  *              EMFILE
3261  *              ENOMEM
3262  *
3263  * Implicit returns:
3264  *              *result (modified)              The fd which was allocated
3265  */
3266 int
3267 fdalloc(proc_t p, int want, int *result)
3268 {
3269         struct filedesc *fdp = p->p_fd;
3270         int i;
3271         int lim, last, numfiles, oldnfiles;
3272         struct fileproc **newofiles, **ofiles;
3273         char *newofileflags;
3274
3275         /*
3276          * Search for a free descriptor starting at the higher
3277          * of want or fd_freefile.  If that fails, consider
3278          * expanding the ofile array.
3279          */
3280 #if DIAGNOSTIC
3281         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3282 #endif
3283
3284         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3285         for (;;) {
3286                 last = min(fdp->fd_nfiles, lim);
3287                 if ((i = want) < fdp->fd_freefile)
3288                         i = fdp->fd_freefile;
3289                 for (; i < last; i++) {
3290                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3291                                 procfdtbl_reservefd(p, i);
3292                                 if (i > fdp->fd_lastfile)
3293                                         fdp->fd_lastfile = i;
3294                                 if (want <= fdp->fd_freefile)
3295                                         fdp->fd_freefile = i;
3296                                 *result = i;
3297                                 return (0);
3298                         }
3299                 }
3300
3301                 /*
3302                  * No space in current array.  Expand?
3303                  */
3304                 if (fdp->fd_nfiles >= lim)
3305                         return (EMFILE);
3306                 if (fdp->fd_nfiles < NDEXTENT)
3307                         numfiles = NDEXTENT;
3308                 else
3309                         numfiles = 2 * fdp->fd_nfiles;
3310                 /* Enforce lim */
3311                 if (numfiles > lim)
3312                         numfiles = lim;
3313                 proc_fdunlock(p);
3314                 MALLOC_ZONE(newofiles, struct fileproc **,
3315                                 numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3316                 proc_fdlock(p);
3317                 if (newofiles == NULL) {
3318                         return (ENOMEM);
3319                 }
3320                 if (fdp->fd_nfiles >= numfiles) {
3321                         FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
3322                         continue;
3323                 }
3324                 newofileflags = (char *) &newofiles[numfiles];
3325                 /*
3326                  * Copy the existing ofile and ofileflags arrays
3327                  * and zero the new portion of each array.
3328                  */
3329                 oldnfiles = fdp->fd_nfiles;
3330                 (void) memcpy(newofiles, fdp->fd_ofiles,
3331                                 oldnfiles * sizeof(*fdp->fd_ofiles));
3332                 (void) memset(&newofiles[oldnfiles], 0,
3333                                 (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3334
3335                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3336                                 oldnfiles * sizeof(*fdp->fd_ofileflags));
3337                 (void) memset(&newofileflags[oldnfiles], 0,
3338                                 (numfiles - oldnfiles) *
3339                                                 sizeof(*fdp->fd_ofileflags));
3340                 ofiles = fdp->fd_ofiles;
3341                 fdp->fd_ofiles = newofiles;
3342                 fdp->fd_ofileflags = newofileflags;
3343                 fdp->fd_nfiles = numfiles;
3344                 FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
3345                 fdexpand++;
3346         }
3347 }
3348
3349
3350 /*
3351  * fdavail
3352  *
3353  * Description: Check to see whether n user file descriptors are available
3354  *              to the process p.
3355  *
3356  * Parameters:  p                               Process to check in
3357  *              n                               The number of fd's desired
3358  *
3359  * Returns:     0                               No
3360  *              1                               Yes
3361  *
3362  * Locks:       Assumes proc_fdlock for process is held by the caller
3363  *
3364  * Notes:       The answer only remains valid so long as the proc_fdlock is
3365  *              held by the caller.
3366  */
3367 int
3368 fdavail(proc_t p, int n)
3369 {
3370         struct filedesc *fdp = p->p_fd;
3371         struct fileproc **fpp;
3372         char *flags;
3373         int i, lim;
3374
3375         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3376         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
3377                 return (1);
3378         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3379         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3380         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++)
3381                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0)
3382                         return (1);
3383         return (0);
3384 }
3385
3386
3387 /*
3388  * fdrelse
3389  *
3390  * Description: Legacy KPI wrapper function for _fdrelse
3391  *
3392  * Parameters:  p                               Process in which fd lives
3393  *              fd                              fd to free
3394  *
3395  * Returns:     void
3396  *
3397  * Locks:       Assumes proc_fdlock for process is held by the caller
3398  */
3399 void
3400 fdrelse(proc_t p, int fd)
3401 {
3402         _fdrelse(p, fd);
3403 }
3404
3405
3406 /*
3407  * fdgetf_noref
3408  *
3409  * Description: Get the fileproc pointer for the given fd from the per process
3410  *              open file table without taking an explicit reference on it.
3411  *
3412  * Parameters:  p                               Process containing fd
3413  *              fd                              fd to obtain fileproc for
3414  *              resultfp                        Pointer to pointer return area
3415  *
3416  * Returns:     0                               Success
3417  *              EBADF
3418  *
3419  * Implicit returns:
3420  *              *resultfp (modified)            Pointer to fileproc pointer
3421  *
3422  * Locks:       Assumes proc_fdlock for process is held by the caller
3423  *
3424  * Notes:       Because there is no reference explicitly taken, the returned
3425  *              fileproc pointer is only valid so long as the proc_fdlock
3426  *              remains held by the caller.
3427  */
3428 int
3429 fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
3430 {
3431         struct filedesc *fdp = p->p_fd;
3432         struct fileproc *fp;
3433
3434         if (fd < 0 || fd >= fdp->fd_nfiles ||
3435                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3436                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3437                 return (EBADF);
3438         }
3439         if (resultfp)
3440                 *resultfp = fp;
3441         return (0);
3442 }
3443
3444
3445 /*
3446  * fp_getfvp
3447  *
3448  * Description: Get fileproc and vnode pointer for a given fd from the per
3449  *              process open file table of the specified process, and if
3450  *              successful, increment the f_iocount
3451  *
3452  * Parameters:  p                               Process in which fd lives
3453  *              fd                              fd to get information for
3454  *              resultfp                        Pointer to result fileproc
3455  *                                              pointer area, or 0 if none
3456  *              resultvp                        Pointer to result vnode pointer
3457  *                                              area, or 0 if none
3458  *
3459  * Returns:     0                               Success
3460  *              EBADF                           Bad file descriptor
3461  *              ENOTSUP                         fd does not refer to a vnode
3462  *
3463  * Implicit returns:
3464  *              *resultfp (modified)            Fileproc pointer
3465  *              *resultvp (modified)            vnode pointer
3466  *
3467  * Notes:       The resultfp and resultvp fields are optional, and may be
3468  *              independently specified as NULL to skip returning information
3469  *
3470  * Locks:       Internally takes and releases proc_fdlock
3471  */
3472 int
3473 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
3474 {
3475         struct filedesc *fdp = p->p_fd;
3476         struct fileproc *fp;
3477
3478         proc_fdlock_spin(p);
3479         if (fd < 0 || fd >= fdp->fd_nfiles ||
3480                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3481                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3482                 proc_fdunlock(p);
3483                 return (EBADF);
3484         }
3485         if (fp->f_type != DTYPE_VNODE) {
3486                 proc_fdunlock(p);
3487                 return(ENOTSUP);
3488         }
3489         fp->f_iocount++;
3490
3491         if (resultfp)
3492                 *resultfp = fp;
3493         if (resultvp)
3494                 *resultvp = (struct vnode *)fp->f_data;
3495         proc_fdunlock(p);
3496
3497         return (0);
3498 }
3499
3500
3501 /*
3502  * fp_getfvpandvid
3503  *
3504  * Description: Get fileproc, vnode pointer, and vid for a given fd from the
3505  *              per process open file table of the specified process, and if
3506  *              successful, increment the f_iocount
3507  *
3508  * Parameters:  p                               Process in which fd lives
3509  *              fd                              fd to get information for
3510  *              resultfp                        Pointer to result fileproc
3511  *                                              pointer area, or 0 if none
3512  *              resultvp                        Pointer to result vnode pointer
3513  *                                              area, or 0 if none
3514  *              vidp                            Pointer to resuld vid area
3515  *
3516  * Returns:     0                               Success
3517  *              EBADF                           Bad file descriptor
3518  *              ENOTSUP                         fd does not refer to a vnode
3519  *
3520  * Implicit returns:
3521  *              *resultfp (modified)            Fileproc pointer
3522  *              *resultvp (modified)            vnode pointer
3523  *              *vidp                           vid value
3524  *
3525  * Notes:       The resultfp and resultvp fields are optional, and may be
3526  *              independently specified as NULL to skip returning information
3527  *
3528  * Locks:       Internally takes and releases proc_fdlock
3529  */
3530 int
3531 fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3532                 struct vnode **resultvp, uint32_t *vidp)
3533 {
3534         struct filedesc *fdp = p->p_fd;
3535         struct fileproc *fp;
3536
3537         proc_fdlock_spin(p);
3538         if (fd < 0 || fd >= fdp->fd_nfiles ||
3539                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3540                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3541                 proc_fdunlock(p);
3542                 return (EBADF);
3543         }
3544         if (fp->f_type != DTYPE_VNODE) {
3545                 proc_fdunlock(p);
3546                 return(ENOTSUP);
3547         }
3548         fp->f_iocount++;
3549
3550         if (resultfp)
3551                 *resultfp = fp;
3552         if (resultvp)
3553                 *resultvp = (struct vnode *)fp->f_data;
3554         if (vidp)
3555                 *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
3556         proc_fdunlock(p);
3557
3558         return (0);
3559 }
3560
3561
3562 /*
3563  * fp_getfsock
3564  *
3565  * Description: Get fileproc and socket pointer for a given fd from the
3566  *              per process open file table of the specified process, and if
3567  *              successful, increment the f_iocount
3568  *
3569  * Parameters:  p                               Process in which fd lives
3570  *              fd                              fd to get information for
3571  *              resultfp                        Pointer to result fileproc
3572  *                                              pointer area, or 0 if none
3573  *              results                         Pointer to result socket
3574  *                                              pointer area, or 0 if none
3575  *
3576  * Returns:     EBADF                   The file descriptor is invalid
3577  *              EOPNOTSUPP              The file descriptor is not a socket
3578  *              0                       Success
3579  *
3580  * Implicit returns:
3581  *              *resultfp (modified)            Fileproc pointer
3582  *              *results (modified)             socket pointer
3583  *
3584  * Notes:       EOPNOTSUPP should probably be ENOTSOCK; this function is only
3585  *              ever called from accept1().
3586  */
3587 int
3588 fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3589             struct socket **results)
3590 {
3591         struct filedesc *fdp = p->p_fd;
3592         struct fileproc *fp;
3593
3594         proc_fdlock_spin(p);
3595         if (fd < 0 || fd >= fdp->fd_nfiles ||
3596                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3597                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3598                 proc_fdunlock(p);
3599                 return (EBADF);
3600         }
3601         if (fp->f_type != DTYPE_SOCKET) {
3602                 proc_fdunlock(p);
3603                 return(EOPNOTSUPP);
3604         }
3605         fp->f_iocount++;
3606
3607         if (resultfp)
3608                 *resultfp = fp;
3609         if (results)
3610                 *results = (struct socket *)fp->f_data;
3611         proc_fdunlock(p);
3612
3613         return (0);
3614 }
3615
3616
3617 /*
3618  * fp_getfkq
3619  *
3620  * Description: Get fileproc and kqueue pointer for a given fd from the
3621  *              per process open file table of the specified process, and if
3622  *              successful, increment the f_iocount
3623  *
3624  * Parameters:  p                               Process in which fd lives
3625  *              fd                              fd to get information for
3626  *              resultfp                        Pointer to result fileproc
3627  *                                              pointer area, or 0 if none
3628  *              resultkq                        Pointer to result kqueue
3629  *                                              pointer area, or 0 if none
3630  *
3631  * Returns:     EBADF                   The file descriptor is invalid
3632  *              EBADF                   The file descriptor is not a socket
3633  *              0                       Success
3634  *
3635  * Implicit returns:
3636  *              *resultfp (modified)            Fileproc pointer
3637  *              *resultkq (modified)            kqueue pointer
3638  *
3639  * Notes:       The second EBADF should probably be something else to make
3640  *              the error condition distinct.
3641  */
3642 int
3643 fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3644           struct kqueue **resultkq)
3645 {
3646         struct filedesc *fdp = p->p_fd;
3647         struct fileproc *fp;
3648
3649         proc_fdlock_spin(p);
3650         if ( fd < 0 || fd >= fdp->fd_nfiles ||
3651                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3652                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3653                 proc_fdunlock(p);
3654                 return (EBADF);
3655         }
3656         if (fp->f_type != DTYPE_KQUEUE) {
3657                 proc_fdunlock(p);
3658                 return(EBADF);
3659         }
3660         fp->f_iocount++;
3661
3662         if (resultfp)
3663                 *resultfp = fp;
3664         if (resultkq)
3665                 *resultkq = (struct kqueue *)fp->f_data;
3666         proc_fdunlock(p);
3667
3668         return (0);
3669 }
3670
3671
3672 /*
3673  * fp_getfpshm
3674  *
3675  * Description: Get fileproc and POSIX shared memory pointer for a given fd
3676  *              from the per process open file table of the specified process
3677  *              and if successful, increment the f_iocount
3678  *
3679  * Parameters:  p                               Process in which fd lives
3680  *              fd                              fd to get information for
3681  *              resultfp                        Pointer to result fileproc
3682  *                                              pointer area, or 0 if none
3683  *              resultpshm                      Pointer to result POSIX
3684  *                                              shared memory pointer
3685  *                                              pointer area, or 0 if none
3686  *
3687  * Returns:     EBADF                   The file descriptor is invalid
3688  *              EBADF                   The file descriptor is not a POSIX
3689  *                                      shared memory area
3690  *              0                       Success
3691  *
3692  * Implicit returns:
3693  *              *resultfp (modified)            Fileproc pointer
3694  *              *resultpshm (modified)          POSIX shared memory pointer
3695  *
3696  * Notes:       The second EBADF should probably be something else to make
3697  *              the error condition distinct.
3698  */
3699 int
3700 fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3701             struct pshmnode **resultpshm)
3702 {
3703         struct filedesc *fdp = p->p_fd;
3704         struct fileproc *fp;
3705
3706         proc_fdlock_spin(p);
3707         if (fd < 0 || fd >= fdp->fd_nfiles ||
3708                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3709                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3710                 proc_fdunlock(p);
3711                 return (EBADF);
3712         }
3713         if (fp->f_type != DTYPE_PSXSHM) {
3714
3715                 proc_fdunlock(p);
3716                 return(EBADF);
3717         }
3718         fp->f_iocount++;
3719
3720         if (resultfp)
3721                 *resultfp = fp;
3722         if (resultpshm)
3723                 *resultpshm = (struct pshmnode *)fp->f_data;
3724         proc_fdunlock(p);
3725
3726         return (0);
3727 }
3728
3729
3730 /*
3731  * fp_getfsem
3732  *
3733  * Description: Get fileproc and POSIX semaphore pointer for a given fd from
3734  *              the per process open file table of the specified process
3735  *              and if successful, increment the f_iocount
3736  *
3737  * Parameters:  p                               Process in which fd lives
3738  *              fd                              fd to get information for
3739  *              resultfp                        Pointer to result fileproc
3740  *                                              pointer area, or 0 if none
3741  *              resultpsem                      Pointer to result POSIX
3742  *                                              semaphore pointer area, or
3743  *                                              0 if none
3744  *
3745  * Returns:     EBADF                   The file descriptor is invalid
3746  *              EBADF                   The file descriptor is not a POSIX
3747  *                                      semaphore
3748  *              0                       Success
3749  *
3750  * Implicit returns:
3751  *              *resultfp (modified)            Fileproc pointer
3752  *              *resultpsem (modified)          POSIX semaphore pointer
3753  *
3754  * Notes:       The second EBADF should probably be something else to make
3755  *              the error condition distinct.
3756  *
3757  *              In order to support unnamed POSIX semaphores, the named
3758  *              POSIX semaphores will have to move out of the per-process
3759  *              open filetable, and into a global table that is shared with
3760  *              unnamed POSIX semaphores, since unnamed POSIX semaphores
3761  *              are typically used by declaring instances in shared memory,
3762  *              and there's no other way to do this without changing the
3763  *              underlying type, which would introduce binary compatibility
3764  *              issues.
3765  */
3766 int
3767 fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3768             struct psemnode **resultpsem)
3769 {
3770         struct filedesc *fdp = p->p_fd;
3771         struct fileproc *fp;
3772
3773         proc_fdlock_spin(p);
3774         if (fd < 0 || fd >= fdp->fd_nfiles ||
3775                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3776                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3777                 proc_fdunlock(p);
3778                 return (EBADF);
3779         }
3780         if (fp->f_type != DTYPE_PSXSEM) {
3781                 proc_fdunlock(p);
3782                 return(EBADF);
3783         }
3784         fp->f_iocount++;
3785
3786         if (resultfp)
3787                 *resultfp = fp;
3788         if (resultpsem)
3789                 *resultpsem = (struct psemnode *)fp->f_data;
3790         proc_fdunlock(p);
3791
3792         return (0);
3793 }
3794
3795
3796 /*
3797  * fp_getfpipe
3798  *
3799  * Description: Get fileproc and pipe pointer for a given fd from the
3800  *              per process open file table of the specified process
3801  *              and if successful, increment the f_iocount
3802  *
3803  * Parameters:  p                               Process in which fd lives
3804  *              fd                              fd to get information for
3805  *              resultfp                        Pointer to result fileproc
3806  *                                              pointer area, or 0 if none
3807  *              resultpipe                      Pointer to result pipe
3808  *                                              pointer area, or 0 if none
3809  *
3810  * Returns:     EBADF                   The file descriptor is invalid
3811  *              EBADF                   The file descriptor is not a socket
3812  *              0                       Success
3813  *
3814  * Implicit returns:
3815  *              *resultfp (modified)            Fileproc pointer
3816  *              *resultpipe (modified)          pipe pointer
3817  *
3818  * Notes:       The second EBADF should probably be something else to make
3819  *              the error condition distinct.
3820  */
3821 int
3822 fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
3823             struct pipe **resultpipe)
3824 {
3825         struct filedesc *fdp = p->p_fd;
3826         struct fileproc *fp;
3827
3828         proc_fdlock_spin(p);
3829         if (fd < 0 || fd >= fdp->fd_nfiles ||
3830                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3831                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3832                 proc_fdunlock(p);
3833                 return (EBADF);
3834         }
3835         if (fp->f_type != DTYPE_PIPE) {
3836                 proc_fdunlock(p);
3837                 return(EBADF);
3838         }
3839         fp->f_iocount++;
3840
3841         if (resultfp)
3842                 *resultfp = fp;
3843         if (resultpipe)
3844                 *resultpipe = (struct pipe *)fp->f_data;
3845         proc_fdunlock(p);
3846
3847         return (0);
3848 }
3849
3850 /*
3851  * fp_lookup
3852  *
3853  * Description: Get fileproc pointer for a given fd from the per process
3854  *              open file table of the specified process and if successful,
3855  *              increment the f_iocount
3856  *
3857  * Parameters:  p                               Process in which fd lives
3858  *              fd                              fd to get information for
3859  *              resultfp                        Pointer to result fileproc
3860  *                                              pointer area, or 0 if none
3861  *              locked                          !0 if the caller holds the
3862  *                                              proc_fdlock, 0 otherwise
3863  *
3864  * Returns:     0                       Success
3865  *              EBADF                   Bad file descriptor
3866  *
3867  * Implicit returns:
3868  *              *resultfp (modified)            Fileproc pointer
3869  *
3870  * Locks:       If the argument 'locked' is non-zero, then the caller is
3871  *              expected to have taken and held the proc_fdlock; if it is
3872  *              zero, than this routine internally takes and drops this lock.
3873  */
3874 int
3875 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
3876 {
3877         struct filedesc *fdp = p->p_fd;
3878         struct fileproc *fp;
3879
3880         if (!locked)
3881                 proc_fdlock_spin(p);
3882         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
3883                         (fp = fdp->fd_ofiles[fd]) == NULL ||
3884                         (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3885                 if (!locked)
3886                         proc_fdunlock(p);
3887                 return (EBADF);
3888         }
3889         fp->f_iocount++;
3890
3891         if (resultfp)
3892                 *resultfp = fp;
3893         if (!locked)
3894                 proc_fdunlock(p);
3895
3896         return (0);
3897 }
3898
3899
3900 /*
3901  * fp_tryswap
3902  *
3903  * Description: Swap the fileproc pointer for a given fd with a new
3904  *              fileproc pointer in the per-process open file table of
3905  *              the specified process.  The fdlock must be held at entry.
3906  *
3907  * Parameters:  p               Process containing the fd
3908  *              fd              The fd of interest
3909  *              nfp             Pointer to the newfp
3910  *
3911  * Returns:     0               Success
3912  *              EBADF           Bad file descriptor
3913  *              EINTR           Interrupted
3914  *              EKEEPLOOKING    f_iocount changed while lock was dropped.
3915  */
3916 int
3917 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
3918 {
3919         struct fileproc *fp;
3920         int error;
3921
3922         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3923
3924         if (0 != (error = fp_lookup(p, fd, &fp, 1)))
3925                 return (error);
3926         /*
3927          * At this point, our caller (change_guardedfd_np) has
3928          * one f_iocount reference, and we just took another
3929          * one to begin the replacement.
3930          */
3931         if (fp->f_iocount < 2) {
3932                 panic("f_iocount too small %d", fp->f_iocount);
3933         } else if (2 == fp->f_iocount) {
3934
3935                 /* Copy the contents of *fp, preserving the "type" of *nfp */
3936
3937                 nfp->f_flags = (nfp->f_flags & FP_TYPEMASK) |
3938                         (fp->f_flags & ~FP_TYPEMASK);
3939                 nfp->f_iocount = fp->f_iocount;
3940                 nfp->f_fglob = fp->f_fglob;
3941                 nfp->f_wset = fp->f_wset;
3942
3943                 p->p_fd->fd_ofiles[fd] = nfp;
3944                 (void) fp_drop(p, fd, nfp, 1);
3945         } else {
3946                 /*
3947                  * Wait for all other active references to evaporate.
3948                  */
3949                 p->p_fpdrainwait = 1;
3950                 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
3951                     PRIBIO | PCATCH, "tryswap fpdrain", NULL);
3952                 if (0 == error) {
3953                         /*
3954                          * Return an "internal" errno to trigger a full
3955                          * reevaluation of the change-guard attempt.
3956                          */
3957                         error = EKEEPLOOKING;
3958                         printf("%s: lookup collision fd %d\n", __func__, fd);
3959                 }
3960                 (void) fp_drop(p, fd, fp, 1);
3961         }
3962         return (error);
3963 }
3964
3965
3966 /*
3967  * fp_drop_written
3968  *
3969  * Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
3970  *              reference previously taken by calling fp_lookup et. al.
3971  *
3972  * Parameters:  p                               Process in which the fd lives
3973  *              fd                              fd associated with the fileproc
3974  *              fp                              fileproc on which to set the
3975  *                                              flag and drop the reference
3976  *
3977  * Returns:     0                               Success
3978  *      fp_drop:EBADF                           Bad file descriptor
3979  *
3980  * Locks:       This function internally takes and drops the proc_fdlock for
3981  *              the supplied process
3982  *
3983  * Notes:       The fileproc must correspond to the fd in the supplied proc
3984  */
3985 int
3986 fp_drop_written(proc_t p, int fd, struct fileproc *fp)
3987 {
3988         int error;
3989
3990         proc_fdlock_spin(p);
3991
3992         fp->f_flags |= FP_WRITTEN;
3993
3994         error = fp_drop(p, fd, fp, 1);
3995
3996         proc_fdunlock(p);
3997
3998         return (error);
3999 }
4000
4001
4002 /*
4003  * fp_drop_event
4004  *
4005  * Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
4006  *              reference previously taken by calling fp_lookup et. al.
4007  *
4008  * Parameters:  p                               Process in which the fd lives
4009  *              fd                              fd associated with the fileproc
4010  *              fp                              fileproc on which to set the
4011  *                                              flag and drop the reference
4012  *
4013  * Returns:     0                               Success
4014  *      fp_drop:EBADF                           Bad file descriptor
4015  *
4016  * Locks:       This function internally takes and drops the proc_fdlock for
4017  *              the supplied process
4018  *
4019  * Notes:       The fileproc must correspond to the fd in the supplied proc
4020  */
4021 int
4022 fp_drop_event(proc_t p, int fd, struct fileproc *fp)
4023 {
4024         int error;
4025
4026         proc_fdlock_spin(p);
4027
4028         fp->f_flags |= FP_WAITEVENT;
4029
4030         error = fp_drop(p, fd, fp, 1);
4031
4032         proc_fdunlock(p);
4033
4034         return (error);
4035 }
4036
4037
4038 /*
4039  * fp_drop
4040  *
4041  * Description: Drop the I/O reference previously taken by calling fp_lookup
4042  *              et. al.
4043  *
4044  * Parameters:  p                               Process in which the fd lives
4045  *              fd                              fd associated with the fileproc
4046  *              fp                              fileproc on which to set the
4047  *                                              flag and drop the reference
4048  *              locked                          flag to internally take and
4049  *                                              drop proc_fdlock if it is not
4050  *                                              already held by the caller
4051  *
4052  * Returns:     0                               Success
4053  *              EBADF                           Bad file descriptor
4054  *
4055  * Locks:       This function internally takes and drops the proc_fdlock for
4056  *              the supplied process if 'locked' is non-zero, and assumes that
4057  *              the caller already holds this lock if 'locked' is non-zero.
4058  *
4059  * Notes:       The fileproc must correspond to the fd in the supplied proc
4060  */
4061 int
4062 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4063 {
4064         struct filedesc *fdp = p->p_fd;
4065         int     needwakeup = 0;
4066
4067         if (!locked)
4068                 proc_fdlock_spin(p);
4069          if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4070                         (fp = fdp->fd_ofiles[fd]) == NULL ||
4071                         ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4072                          !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4073                 if (!locked)
4074                         proc_fdunlock(p);
4075                 return (EBADF);
4076         }
4077         fp->f_iocount--;
4078
4079         if (fp->f_iocount == 0) {
4080                 if (fp->f_flags & FP_SELCONFLICT)
4081                         fp->f_flags &= ~FP_SELCONFLICT;
4082
4083                 if (p->p_fpdrainwait) {
4084                         p->p_fpdrainwait = 0;
4085                         needwakeup = 1;
4086                 }
4087         }
4088         if (!locked)
4089                 proc_fdunlock(p);
4090         if (needwakeup)
4091                 wakeup(&p->p_fpdrainwait);
4092
4093         return (0);
4094 }
4095
4096
4097 /*
4098  * file_vnode
4099  *
4100  * Description: Given an fd, look it up in the current process's per process
4101  *              open file table, and return its internal vnode pointer.
4102  *
4103  * Parameters:  fd                              fd to obtain vnode from
4104  *              vpp                             pointer to vnode return area
4105  *
4106  * Returns:     0                               Success
4107  *              EINVAL                          The fd does not refer to a
4108  *                                              vnode fileproc entry
4109  *      fp_lookup:EBADF                         Bad file descriptor
4110  *
4111  * Implicit returns:
4112  *              *vpp (modified)                 Returned vnode pointer
4113  *
4114  * Locks:       This function internally takes and drops the proc_fdlock for
4115  *              the current process
4116  *
4117  * Notes:       If successful, this function increments the f_iocount on the
4118  *              fd's corresponding fileproc.
4119  *
4120  *              The fileproc referenced is not returned; because of this, care
4121  *              must be taken to not drop the last reference (e.g. by closing
4122  *              the file).  This is inherently unsafe, since the reference may
4123  *              not be recoverable from the vnode, if there is a subsequent
4124  *              close that destroys the associate fileproc.  The caller should
4125  *              therefore retain their own reference on the fileproc so that
4126  *              the f_iocount can be dropped subsequently.  Failure to do this
4127  *              can result in the returned pointer immediately becoming invalid
4128  *              following the call.
4129  *
4130  *              Use of this function is discouraged.
4131  */
4132 int
4133 file_vnode(int fd, struct vnode **vpp)
4134 {
4135         proc_t p = current_proc();
4136         struct fileproc *fp;
4137         int error;
4138
4139         proc_fdlock_spin(p);
4140         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4141                 proc_fdunlock(p);
4142                 return(error);
4143         }
4144         if (fp->f_type != DTYPE_VNODE) {
4145                 fp_drop(p, fd, fp,1);
4146                 proc_fdunlock(p);
4147                 return(EINVAL);
4148         }
4149         if (vpp != NULL)
4150                 *vpp = (struct vnode *)fp->f_data;
4151         proc_fdunlock(p);
4152
4153         return(0);
4154 }
4155
4156
4157 /*
4158  * file_vnode_withvid
4159  *
4160  * Description: Given an fd, look it up in the current process's per process
4161  *              open file table, and return its internal vnode pointer.
4162  *
4163  * Parameters:  fd                              fd to obtain vnode from
4164  *              vpp                             pointer to vnode return area
4165  *              vidp                            pointer to vid of the returned vnode
4166  *
4167  * Returns:     0                               Success
4168  *              EINVAL                          The fd does not refer to a
4169  *                                              vnode fileproc entry
4170  *      fp_lookup:EBADF                         Bad file descriptor
4171  *
4172  * Implicit returns:
4173  *              *vpp (modified)                 Returned vnode pointer
4174  *
4175  * Locks:       This function internally takes and drops the proc_fdlock for
4176  *              the current process
4177  *
4178  * Notes:       If successful, this function increments the f_iocount on the
4179  *              fd's corresponding fileproc.
4180  *
4181  *              The fileproc referenced is not returned; because of this, care
4182  *              must be taken to not drop the last reference (e.g. by closing
4183  *              the file).  This is inherently unsafe, since the reference may
4184  *              not be recoverable from the vnode, if there is a subsequent
4185  *              close that destroys the associate fileproc.  The caller should
4186  *              therefore retain their own reference on the fileproc so that
4187  *              the f_iocount can be dropped subsequently.  Failure to do this
4188  *              can result in the returned pointer immediately becoming invalid
4189  *              following the call.
4190  *
4191  *              Use of this function is discouraged.
4192  */
4193 int
4194 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
4195 {
4196         proc_t p = current_proc();
4197         struct fileproc *fp;
4198         vnode_t vp;
4199         int error;
4200
4201         proc_fdlock_spin(p);
4202         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4203                 proc_fdunlock(p);
4204                 return(error);
4205         }
4206         if (fp->f_type != DTYPE_VNODE) {
4207                 fp_drop(p, fd, fp,1);
4208                 proc_fdunlock(p);
4209                 return(EINVAL);
4210         }
4211         vp = (struct vnode *)fp->f_data;
4212         if (vpp != NULL)
4213                 *vpp = vp;
4214
4215         if ((vidp != NULL) && (vp != NULLVP))
4216                 *vidp = (uint32_t)vp->v_id;
4217
4218         proc_fdunlock(p);
4219
4220         return(0);
4221 }
4222
4223
4224 /*
4225  * file_socket
4226  *
4227  * Description: Given an fd, look it up in the current process's per process
4228  *              open file table, and return its internal socket pointer.
4229  *
4230  * Parameters:  fd                              fd to obtain vnode from
4231  *              sp                              pointer to socket return area
4232  *
4233  * Returns:     0                               Success
4234  *              ENOTSOCK                        Not a socket
4235  *              fp_lookup:EBADF                 Bad file descriptor
4236  *
4237  * Implicit returns:
4238  *              *sp (modified)                  Returned socket pointer
4239  *
4240  * Locks:       This function internally takes and drops the proc_fdlock for
4241  *              the current process
4242  *
4243  * Notes:       If successful, this function increments the f_iocount on the
4244  *              fd's corresponding fileproc.
4245  *
4246  *              The fileproc referenced is not returned; because of this, care
4247  *              must be taken to not drop the last reference (e.g. by closing
4248  *              the file).  This is inherently unsafe, since the reference may
4249  *              not be recoverable from the socket, if there is a subsequent
4250  *              close that destroys the associate fileproc.  The caller should
4251  *              therefore retain their own reference on the fileproc so that
4252  *              the f_iocount can be dropped subsequently.  Failure to do this
4253  *              can result in the returned pointer immediately becoming invalid
4254  *              following the call.
4255  *
4256  *              Use of this function is discouraged.
4257  */
4258 int
4259 file_socket(int fd, struct socket **sp)
4260 {
4261         proc_t p = current_proc();
4262         struct fileproc *fp;
4263         int error;
4264
4265         proc_fdlock_spin(p);
4266         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4267                 proc_fdunlock(p);
4268                 return(error);
4269         }
4270         if (fp->f_type != DTYPE_SOCKET) {
4271                 fp_drop(p, fd, fp,1);
4272                 proc_fdunlock(p);
4273                 return(ENOTSOCK);
4274         }
4275         *sp = (struct socket *)fp->f_data;
4276         proc_fdunlock(p);
4277
4278         return(0);
4279 }
4280
4281
4282 /*
4283  * file_flags
4284  *
4285  * Description: Given an fd, look it up in the current process's per process
4286  *              open file table, and return its fileproc's flags field.
4287  *
4288  * Parameters:  fd                              fd whose flags are to be
4289  *                                              retrieved
4290  *              flags                           pointer to flags data area
4291  *
4292  * Returns:     0                               Success
4293  *              ENOTSOCK                        Not a socket
4294  *              fp_lookup:EBADF                 Bad file descriptor
4295  *
4296  * Implicit returns:
4297  *              *flags (modified)               Returned flags field
4298  *
4299  * Locks:       This function internally takes and drops the proc_fdlock for
4300  *              the current process
4301  *
4302  * Notes:       This function will internally increment and decrement the
4303  *              f_iocount of the fileproc as part of its operation.
4304  */
4305 int
4306 file_flags(int fd, int *flags)
4307 {
4308
4309         proc_t p = current_proc();
4310         struct fileproc *fp;
4311         int error;
4312
4313         proc_fdlock_spin(p);
4314         if ( (error = fp_lookup(p, fd, &fp, 1)) ) {
4315                 proc_fdunlock(p);
4316                 return(error);
4317         }
4318         *flags = (int)fp->f_flag;
4319         fp_drop(p, fd, fp,1);
4320         proc_fdunlock(p);
4321
4322         return(0);
4323 }
4324
4325
4326 /*
4327  * file_drop
4328  *
4329  * Description: Drop an iocount reference on an fd, and wake up any waiters
4330  *              for draining (i.e. blocked in fileproc_drain() called during
4331  *              the last attempt to close a file).
4332  *
4333  * Parameters:  fd                              fd on which an ioreference is
4334  *                                              to be dropped
4335  *
4336  * Returns:     0                               Success
4337  *              EBADF                           Bad file descriptor
4338  *
4339  * Description: Given an fd, look it up in the current process's per process
4340  *              open file table, and drop it's fileproc's f_iocount by one
4341  *
4342  * Notes:       This is intended as a corresponding operation to the functions
4343  *              file_vnode() and file_socket() operations.
4344  *
4345  *              Technically, the close reference is supposed to be protected
4346  *              by a fileproc_drain(), however, a drain will only block if
4347  *              the fd refers to a character device, and that device has had
4348  *              preparefileread() called on it.  If it refers to something
4349  *              other than a character device, then the drain will occur and
4350  *              block each close attempt, rather than merely the last close.
4351  *
4352  *              Since it's possible for an fd that refers to a character
4353  *              device to have an intermediate close followed by an open to
4354  *              cause a different file to correspond to that descriptor,
4355  *              unless there was a cautionary reference taken on the fileproc,
4356  *              this is an inherently unsafe function.  This happens in the
4357  *              case where multiple fd's in a process refer to the same
4358  *              character device (e.g. stdin/out/err pointing to a tty, etc.).
4359  *
4360  *              Use of this function is discouraged.
4361  */
4362 int
4363 file_drop(int fd)
4364 {
4365         struct fileproc *fp;
4366         proc_t p = current_proc();
4367         int     needwakeup = 0;
4368
4369         proc_fdlock_spin(p);
4370         if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
4371                         (fp = p->p_fd->fd_ofiles[fd]) == NULL ||
4372                         ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
4373                          !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
4374                 proc_fdunlock(p);
4375                 return (EBADF);
4376         }
4377         fp->f_iocount --;
4378
4379         if (fp->f_iocount == 0) {
4380                 if (fp->f_flags & FP_SELCONFLICT)
4381                         fp->f_flags &= ~FP_SELCONFLICT;
4382
4383                 if (p->p_fpdrainwait) {
4384                         p->p_fpdrainwait = 0;
4385                         needwakeup = 1;
4386                 }
4387         }
4388         proc_fdunlock(p);
4389
4390         if (needwakeup)
4391                 wakeup(&p->p_fpdrainwait);
4392         return(0);
4393 }
4394
4395
4396 static int falloc_withalloc_locked(proc_t, struct fileproc **, int *,
4397     vfs_context_t, struct fileproc * (*)(void *), void *, int);
4398
4399 /*
4400  * falloc
4401  *
4402  * Description: Allocate an entry in the per process open file table and
4403  *              return the corresponding fileproc and fd.
4404  *
4405  * Parameters:  p                               The process in whose open file
4406  *                                              table the fd is to be allocated
4407  *              resultfp                        Pointer to fileproc pointer
4408  *                                              return area
4409  *              resultfd                        Pointer to fd return area
4410  *              ctx                             VFS context
4411  *
4412  * Returns:     0                               Success
4413  *      falloc:ENFILE                           Too many open files in system
4414  *      falloc:EMFILE                           Too many open files in process
4415  *      falloc:ENOMEM                           M_FILEPROC or M_FILEGLOB zone
4416  *                                              exhausted
4417  *
4418  * Implicit returns:
4419  *              *resultfd (modified)            Returned fileproc pointer
4420  *              *resultfd (modified)            Returned fd
4421  *
4422  * Locks:       This function takes and drops the proc_fdlock; if this lock
4423  *              is already held, use falloc_locked() instead.
4424  *
4425  * Notes:       This function takes separate process and context arguments
4426  *              solely to support kern_exec.c; otherwise, it would take
4427  *              neither, and expect falloc_locked() to use the
4428  *              vfs_context_current() routine internally.
4429  */
4430 int
4431 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4432 {
4433         return (falloc_withalloc(p, resultfp, resultfd, ctx,
4434             fileproc_alloc_init, NULL));
4435 }
4436
4437 /*
4438  * Like falloc, but including the fileproc allocator and create-args
4439  */
4440 int
4441 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4442     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *arg)
4443 {
4444         int error;
4445
4446         proc_fdlock(p);
4447         error = falloc_withalloc_locked(p,
4448             resultfp, resultfd, ctx, fp_zalloc, arg, 1);
4449         proc_fdunlock(p);
4450
4451         return (error);
4452 }
4453
4454 /*
4455  * "uninitialized" ops -- ensure fg->fg_ops->fo_type always exists
4456  */
4457 static const struct fileops uninitops;
4458
4459 /*
4460  * falloc_locked
4461  *
4462  * Create a new open file structure and allocate
4463  * a file descriptor for the process that refers to it.
4464  *
4465  * Returns:     0                       Success
4466  *
4467  * Description: Allocate an entry in the per process open file table and
4468  *              return the corresponding fileproc and fd.
4469  *
4470  * Parameters:  p                               The process in whose open file
4471  *                                              table the fd is to be allocated
4472  *              resultfp                        Pointer to fileproc pointer
4473  *                                              return area
4474  *              resultfd                        Pointer to fd return area
4475  *              ctx                             VFS context
4476  *              locked                          Flag to indicate whether the
4477  *                                              caller holds proc_fdlock
4478  *
4479  * Returns:     0                               Success
4480  *              ENFILE                          Too many open files in system
4481  *              fdalloc:EMFILE                  Too many open files in process
4482  *              ENOMEM                          M_FILEPROC or M_FILEGLOB zone
4483  *                                              exhausted
4484  *      fdalloc:ENOMEM
4485  *
4486  * Implicit returns:
4487  *              *resultfd (modified)            Returned fileproc pointer
4488  *              *resultfd (modified)            Returned fd
4489  *
4490  * Locks:       If the parameter 'locked' is zero, this function takes and
4491  *              drops the proc_fdlock; if non-zero, the caller must hold the
4492  *              lock.
4493  *
4494  * Notes:       If you intend to use a non-zero 'locked' parameter, use the
4495  *              utility function falloc() instead.
4496  *
4497  *              This function takes separate process and context arguments
4498  *              solely to support kern_exec.c; otherwise, it would take
4499  *              neither, and use the vfs_context_current() routine internally.
4500  */
4501 int
4502 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4503               vfs_context_t ctx, int locked)
4504 {
4505         return (falloc_withalloc_locked(p, resultfp, resultfd, ctx,
4506             fileproc_alloc_init, NULL, locked));
4507 }
4508
4509 static int
4510 falloc_withalloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4511     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg,
4512     int locked)
4513 {
4514         struct fileproc *fp;
4515         struct fileglob *fg;
4516         int error, nfd;
4517
4518         if (!locked)
4519                 proc_fdlock(p);
4520         if ( (error = fdalloc(p, 0, &nfd)) ) {
4521                 if (!locked)
4522                         proc_fdunlock(p);
4523                 return (error);
4524         }
4525         if (nfiles >= maxfiles) {
4526                 if (!locked)
4527                         proc_fdunlock(p);
4528                 tablefull("file");
4529                 return (ENFILE);
4530         }
4531 #if CONFIG_MACF
4532         error = mac_file_check_create(proc_ucred(p));
4533         if (error) {
4534                 if (!locked)
4535                         proc_fdunlock(p);
4536                 return (error);
4537         }
4538 #endif
4539
4540         /*
4541          * Allocate a new file descriptor.
4542          * If the process has file descriptor zero open, add to the list
4543          * of open files at that point, otherwise put it at the front of
4544          * the list of open files.
4545          */
4546         proc_fdunlock(p);
4547
4548         fp = (*fp_zalloc)(crarg);
4549         if (fp == NULL) {
4550                 if (locked)
4551                         proc_fdlock(p);
4552                 return (ENOMEM);
4553         }
4554         MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
4555         if (fg == NULL) {
4556                 fileproc_free(fp);
4557                 if (locked)
4558                         proc_fdlock(p);
4559                 return (ENOMEM);
4560         }
4561         bzero(fg, sizeof(struct fileglob));
4562         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4563
4564         fp->f_iocount = 1;
4565         fg->fg_count = 1;
4566         fg->fg_ops = &uninitops;
4567         fp->f_fglob = fg;
4568 #if CONFIG_MACF
4569         mac_file_label_init(fg);
4570 #endif
4571
4572         kauth_cred_ref(ctx->vc_ucred);
4573
4574         proc_fdlock(p);
4575
4576         fp->f_cred = ctx->vc_ucred;
4577
4578 #if CONFIG_MACF
4579         mac_file_label_associate(fp->f_cred, fg);
4580 #endif
4581
4582         OSAddAtomic(1, &nfiles);
4583
4584         p->p_fd->fd_ofiles[nfd] = fp;
4585
4586         if (!locked)
4587                 proc_fdunlock(p);
4588
4589         if (resultfp)
4590                 *resultfp = fp;
4591         if (resultfd)
4592                 *resultfd = nfd;
4593
4594         return (0);
4595 }
4596
4597
4598 /*
4599  * fg_free
4600  *
4601  * Description: Free a file structure; drop the global open file count, and
4602  *              drop the credential reference, if the fileglob has one, and
4603  *              destroy the instance mutex before freeing
4604  *
4605  * Parameters:  fg                              Pointer to fileglob to be
4606  *                                              freed
4607  *
4608  * Returns:     void
4609  */
4610 void
4611 fg_free(struct fileglob *fg)
4612 {
4613         OSAddAtomic(-1, &nfiles);
4614
4615         if (fg->fg_vn_data) {
4616                 fg_vn_data_free(fg->fg_vn_data);
4617                 fg->fg_vn_data = NULL;
4618         }
4619
4620         if (IS_VALID_CRED(fg->fg_cred)) {
4621                 kauth_cred_unref(&fg->fg_cred);
4622         }
4623         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4624
4625 #if CONFIG_MACF
4626         mac_file_label_destroy(fg);
4627 #endif
4628         FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4629 }
4630
4631
4632 /*
4633  * fdexec
4634  *
4635  * Description: Perform close-on-exec processing for all files in a process
4636  *              that are either marked as close-on-exec, or which were in the
4637  *              process of being opened at the time of the execve
4638  *
4639  *              Also handles the case (via posix_spawn()) where -all-
4640  *              files except those marked with "inherit" as treated as
4641  *              close-on-exec.
4642  *
4643  * Parameters:  p                               Pointer to process calling
4644  *                                              execve
4645  *
4646  * Returns:     void
4647  *
4648  * Locks:       This function internally takes and drops proc_fdlock()
4649  *
4650  */
4651 void
4652 fdexec(proc_t p, short flags)
4653 {
4654         struct filedesc *fdp = p->p_fd;
4655         int i;
4656         boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4657
4658         proc_fdlock(p);
4659         for (i = fdp->fd_lastfile; i >= 0; i--) {
4660
4661                 struct fileproc *fp = fdp->fd_ofiles[i];
4662                 char *flagp = &fdp->fd_ofileflags[i];
4663
4664                 if (fp && cloexec_default) {
4665                         /*
4666                          * Reverse the usual semantics of file descriptor
4667                          * inheritance - all of them should be closed
4668                          * except files marked explicitly as "inherit" and
4669                          * not marked close-on-exec.
4670                          */
4671                         if ((*flagp & (UF_EXCLOSE|UF_INHERIT)) != UF_INHERIT)
4672                                 *flagp |= UF_EXCLOSE;
4673                         *flagp &= ~UF_INHERIT;
4674                 }
4675
4676                 if (
4677                     ((*flagp & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
4678 #if CONFIG_MACF
4679                     || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4680 #endif
4681                 ) {
4682                         if (i < fdp->fd_knlistsize)
4683                                 knote_fdclose(p, i);
4684                         procfdtbl_clearfd(p, i);
4685                         if (i == fdp->fd_lastfile && i > 0)
4686                                 fdp->fd_lastfile--;
4687                         if (i < fdp->fd_freefile)
4688                                 fdp->fd_freefile = i;
4689
4690                         /*
4691                          * Wait for any third party viewers (e.g., lsof)
4692                          * to release their references to this fileproc.
4693                          */
4694                         while (fp->f_iocount > 0) {
4695                                 p->p_fpdrainwait = 1;
4696                                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4697                                     "fpdrain", NULL);
4698                         }
4699
4700                         closef_locked(fp, fp->f_fglob, p);
4701
4702                         fileproc_free(fp);
4703                 }
4704         }
4705         proc_fdunlock(p);
4706 }
4707
4708
4709 /*
4710  * fdcopy
4711  *
4712  * Description: Copy a filedesc structure.  This is normally used as part of
4713  *              forkproc() when forking a new process, to copy the per process
4714  *              open file table over to the new process.
4715  *
4716  * Parameters:  p                               Process whose open file table
4717  *                                              is to be copied (parent)
4718  *              uth_cdir                        Per thread current working
4719  *                                              cirectory, or NULL
4720  *
4721  * Returns:     NULL                            Copy failed
4722  *              !NULL                           Pointer to new struct filedesc
4723  *
4724  * Locks:       This function internally takes and drops proc_fdlock()
4725  *
4726  * Notes:       Files are copied directly, ignoring the new resource limits
4727  *              for the process that's being copied into.  Since the descriptor
4728  *              references are just additional references, this does not count
4729  *              against the number of open files on the system.
4730  *
4731  *              The struct filedesc includes the current working directory,
4732  *              and the current root directory, if the process is chroot'ed.
4733  *
4734  *              If the exec was called by a thread using a per thread current
4735  *              working directory, we inherit the working directory from the
4736  *              thread making the call, rather than from the process.
4737  *
4738  *              In the case of a failure to obtain a reference, for most cases,
4739  *              the file entry will be silently dropped.  There's an exception
4740  *              for the case of a chroot dir, since a failure to to obtain a
4741  *              reference there would constitute an "escape" from the chroot
4742  *              environment, which must not be allowed.  In that case, we will
4743  *              deny the execve() operation, rather than allowing the escape.
4744  */
4745 struct filedesc *
4746 fdcopy(proc_t p, vnode_t uth_cdir)
4747 {
4748         struct filedesc *newfdp, *fdp = p->p_fd;
4749         int i;
4750         struct fileproc *ofp, *fp;
4751         vnode_t v_dir;
4752
4753         MALLOC_ZONE(newfdp, struct filedesc *,
4754                         sizeof(*newfdp), M_FILEDESC, M_WAITOK);
4755         if (newfdp == NULL)
4756                 return(NULL);
4757
4758         proc_fdlock(p);
4759
4760         /*
4761          * the FD_CHROOT flag will be inherited via this copy
4762          */
4763         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4764
4765         /*
4766          * If we are running with per-thread current working directories,
4767          * inherit the new current working directory from the current thread
4768          * instead, before we take our references.
4769          */
4770         if (uth_cdir != NULLVP)
4771                 newfdp->fd_cdir = uth_cdir;
4772
4773         /*
4774          * For both fd_cdir and fd_rdir make sure we get
4775          * a valid reference... if we can't, than set
4776          * set the pointer(s) to NULL in the child... this
4777          * will keep us from using a non-referenced vp
4778          * and allows us to do the vnode_rele only on
4779          * a properly referenced vp
4780          */
4781         if ( (v_dir = newfdp->fd_cdir) ) {
4782                 if (vnode_getwithref(v_dir) == 0) {
4783                         if ( (vnode_ref(v_dir)) )
4784                                 newfdp->fd_cdir = NULL;
4785                         vnode_put(v_dir);
4786                 } else
4787                         newfdp->fd_cdir = NULL;
4788         }
4789         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4790                 /*
4791                  * we couldn't get a new reference on
4792                  * the current working directory being
4793                  * inherited... we might as well drop
4794                  * our reference from the parent also
4795                  * since the vnode has gone DEAD making
4796                  * it useless... by dropping it we'll
4797                  * be that much closer to recycling it
4798                  */
4799                 vnode_rele(fdp->fd_cdir);
4800                 fdp->fd_cdir = NULL;
4801         }
4802
4803         if ( (v_dir = newfdp->fd_rdir) ) {
4804                 if (vnode_getwithref(v_dir) == 0) {
4805                         if ( (vnode_ref(v_dir)) )
4806                                 newfdp->fd_rdir = NULL;
4807                         vnode_put(v_dir);
4808                 } else {
4809                         newfdp->fd_rdir = NULL;
4810                 }
4811         }
4812         /* Coming from a chroot environment and unable to get a reference... */
4813         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4814                 /*
4815                  * We couldn't get a new reference on
4816                  * the chroot directory being
4817                  * inherited... this is fatal, since
4818                  * otherwise it would constitute an
4819                  * escape from a chroot environment by
4820                  * the new process.
4821                  */
4822                 if (newfdp->fd_cdir)
4823                         vnode_rele(newfdp->fd_cdir);
4824                 FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
4825                 return(NULL);
4826         }
4827
4828         /*
4829          * If the number of open files fits in the internal arrays
4830          * of the open file structure, use them, otherwise allocate
4831          * additional memory for the number of descriptors currently
4832          * in use.
4833          */
4834         if (newfdp->fd_lastfile < NDFILE)
4835                 i = NDFILE;
4836         else {
4837                 /*
4838                  * Compute the smallest multiple of NDEXTENT needed
4839                  * for the file descriptors currently in use,
4840                  * allowing the table to shrink.
4841                  */
4842                 i = newfdp->fd_nfiles;
4843                 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2)
4844                         i /= 2;
4845         }
4846         proc_fdunlock(p);
4847
4848         MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
4849                                 i * OFILESIZE, M_OFILETABL, M_WAITOK);
4850         if (newfdp->fd_ofiles == NULL) {
4851                 if (newfdp->fd_cdir)
4852                         vnode_rele(newfdp->fd_cdir);
4853                 if (newfdp->fd_rdir)
4854                         vnode_rele(newfdp->fd_rdir);
4855
4856                 FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
4857                 return(NULL);
4858         }
4859         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4860         proc_fdlock(p);
4861
4862         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4863         newfdp->fd_nfiles = i;
4864
4865         if (fdp->fd_nfiles > 0) {
4866                 struct fileproc **fpp;
4867                 char *flags;
4868
4869                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4870                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4871                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4872                                         (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4873
4874                 /*
4875                  * kq descriptors cannot be copied.
4876                  */
4877                 if (newfdp->fd_knlistsize != -1) {
4878                         fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4879                         flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4880                         for (i = newfdp->fd_lastfile;
4881                             i >= 0; i--, fpp--, flags--) {
4882                                 if (*flags & UF_RESERVED)
4883                                         continue;       /* (removed below) */
4884                                 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
4885                                         *fpp = NULL;
4886                                         *flags = 0;
4887                                         if (i < newfdp->fd_freefile)
4888                                                 newfdp->fd_freefile = i;
4889                                 }
4890                                 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
4891                                         newfdp->fd_lastfile--;
4892                         }
4893                         newfdp->fd_knlist = NULL;
4894                         newfdp->fd_knlistsize = -1;
4895                         newfdp->fd_knhash = NULL;
4896                         newfdp->fd_knhashmask = 0;
4897                 }
4898                 fpp = newfdp->fd_ofiles;
4899                 flags = newfdp->fd_ofileflags;
4900
4901                 for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++)
4902                         if ((ofp = *fpp) != NULL &&
4903                             0 == (ofp->f_fglob->fg_lflags & FG_CONFINED) &&
4904                             0 == (*flags & (UF_FORKCLOSE|UF_RESERVED))) {
4905 #if DEBUG
4906                                 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE)
4907                                         panic("complex fileproc");
4908 #endif
4909                                 fp = fileproc_alloc_init(NULL);
4910                                 if (fp == NULL) {
4911                                         /*
4912                                          * XXX no room to copy, unable to
4913                                          * XXX safely unwind state at present
4914                                          */
4915                                         *fpp = NULL;
4916                                 } else {
4917                                         fp->f_flags |=
4918                                             (ofp->f_flags & ~FP_TYPEMASK);
4919                                         fp->f_fglob = ofp->f_fglob;
4920                                         (void)fg_ref(fp);
4921                                         *fpp = fp;
4922                                 }
4923                         } else {
4924                                 if (i < newfdp->fd_freefile)
4925                                         newfdp->fd_freefile = i;
4926                                 *fpp = NULL;
4927                                 *flags = 0;
4928                         }
4929         }
4930
4931         proc_fdunlock(p);
4932         return (newfdp);
4933 }
4934
4935
4936 /*
4937  * fdfree
4938  *
4939  * Description: Release a filedesc (per process open file table) structure;
4940  *              this is done on process exit(), or from forkproc_free() if
4941  *              the fork fails for some reason subsequent to a successful
4942  *              call to fdcopy()
4943  *
4944  * Parameters:  p                               Pointer to process going away
4945  *
4946  * Returns:     void
4947  *
4948  * Locks:       This function internally takes and drops proc_fdlock()
4949  */
4950 void
4951 fdfree(proc_t p)
4952 {
4953         struct filedesc *fdp;
4954         struct fileproc *fp;
4955         int i;
4956
4957         proc_fdlock(p);
4958
4959         if (p == kernproc || NULL == (fdp = p->p_fd)) {
4960                 proc_fdunlock(p);
4961                 return;
4962         }
4963
4964         extern struct filedesc filedesc0;
4965
4966         if (&filedesc0 == fdp)
4967                 panic("filedesc0");
4968
4969         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
4970                 for (i = fdp->fd_lastfile; i >= 0; i--) {
4971                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
4972
4973                           if (fdp->fd_ofileflags[i] & UF_RESERVED)
4974                                 panic("fdfree: found fp with UF_RESERVED");
4975
4976                                 procfdtbl_reservefd(p, i);
4977
4978                                 if (i < fdp->fd_knlistsize)
4979                                         knote_fdclose(p, i);
4980                                 if (fp->f_flags & FP_WAITEVENT)
4981                                         (void)waitevent_close(p, fp);
4982                                 (void) closef_locked(fp, fp->f_fglob, p);
4983                                 fileproc_free(fp);
4984                         }
4985                 }
4986                 FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
4987                 fdp->fd_ofiles = NULL;
4988                 fdp->fd_nfiles = 0;
4989         }
4990
4991         proc_fdunlock(p);
4992
4993         if (fdp->fd_cdir)
4994                 vnode_rele(fdp->fd_cdir);
4995         if (fdp->fd_rdir)
4996                 vnode_rele(fdp->fd_rdir);
4997
4998         proc_fdlock_spin(p);
4999         p->p_fd = NULL;
5000         proc_fdunlock(p);
5001
5002         if (fdp->fd_knlist)
5003                 FREE(fdp->fd_knlist, M_KQUEUE);
5004         if (fdp->fd_knhash)
5005                 FREE(fdp->fd_knhash, M_KQUEUE);
5006
5007         FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
5008 }
5009
5010 /*
5011  * closef_locked
5012  *
5013  * Description: Internal form of closef; called with proc_fdlock held
5014  *
5015  * Parameters:  fp                      Pointer to fileproc for fd
5016  *              fg                      Pointer to fileglob for fd
5017  *              p                       Pointer to proc structure
5018  *
5019  * Returns:     0                       Success
5020  *      closef_finish:???               Anything returnable by a per-fileops
5021  *                                      close function
5022  *
5023  * Note:        Decrements reference count on file structure; if this was the
5024  *              last reference, then closef_finish() is called
5025  *
5026  *              p and fp are allowed to  be NULL when closing a file that was
5027  *              being passed in a message (but only if we are called when this
5028  *              is NOT the last reference).
5029  */
5030 int
5031 closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
5032 {
5033         struct vnode *vp;
5034         struct flock lf;
5035         struct vfs_context context;
5036         int error;
5037
5038         if (fg == NULL) {
5039                 return (0);
5040         }
5041
5042         /* Set up context with cred stashed in fg */
5043         if (p == current_proc())
5044                 context.vc_thread = current_thread();
5045         else
5046                 context.vc_thread = NULL;
5047         context.vc_ucred = fg->fg_cred;
5048
5049         /*
5050          * POSIX record locking dictates that any close releases ALL
5051          * locks owned by this process.  This is handled by setting
5052          * a flag in the unlock to free ONLY locks obeying POSIX
5053          * semantics, and not to free BSD-style file locks.
5054          * If the descriptor was in a message, POSIX-style locks
5055          * aren't passed with the descriptor.
5056          */
5057         if (p && (p->p_ladvflag & P_LADVLOCK) &&
5058             DTYPE_VNODE == FILEGLOB_DTYPE(fg)) {
5059                 proc_fdunlock(p);
5060
5061                 lf.l_whence = SEEK_SET;
5062                 lf.l_start = 0;
5063                 lf.l_len = 0;
5064                 lf.l_type = F_UNLCK;
5065                 vp = (struct vnode *)fg->fg_data;
5066
5067                 if ( (error = vnode_getwithref(vp)) == 0 ) {
5068                         (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
5069                         (void)vnode_put(vp);
5070                 }
5071                 proc_fdlock(p);
5072         }
5073         lck_mtx_lock_spin(&fg->fg_lock);
5074         fg->fg_count--;
5075
5076         if (fg->fg_count > 0) {
5077                 lck_mtx_unlock(&fg->fg_lock);
5078                 return (0);
5079         }
5080 #if DIAGNOSTIC
5081         if (fg->fg_count != 0)
5082                 panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
5083 #endif
5084
5085         if (fp && (fp->f_flags & FP_WRITTEN))
5086                 fg->fg_flag |= FWASWRITTEN;
5087
5088         fg->fg_lflags |= FG_TERM;
5089         lck_mtx_unlock(&fg->fg_lock);
5090
5091         if (p)
5092                 proc_fdunlock(p);
5093
5094         /* Since we ensure that fg->fg_ops is always initialized,
5095          * it is safe to invoke fo_close on the fg */
5096         error = fo_close(fg, &context);
5097
5098         fg_free(fg);
5099
5100         if (p)
5101                 proc_fdlock(p);
5102
5103         return(error);
5104 }
5105
5106
5107 /*
5108  * fileproc_drain
5109  *
5110  * Description: Drain out pending I/O operations
5111  *
5112  * Parameters:  p                               Process closing this file
5113  *              fp                              fileproc struct for the open
5114  *                                              instance on the file
5115  *
5116  * Returns:     void
5117  *
5118  * Locks:       Assumes the caller holds the proc_fdlock
5119  *
5120  * Notes:       For character devices, this occurs on the last close of the
5121  *              device; for all other file descriptors, this occurs on each
5122  *              close to prevent fd's from being closed out from under
5123  *              operations currently in progress and blocked
5124  *
5125  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
5126  *              regarding their use and interaction with this function.
5127  */
5128 void
5129 fileproc_drain(proc_t p, struct fileproc * fp)
5130 {
5131         struct vfs_context context;
5132
5133         context.vc_thread = proc_thread(p);     /* XXX */
5134         context.vc_ucred = fp->f_fglob->fg_cred;
5135
5136         fp->f_iocount-- ; /* (the one the close holds) */
5137
5138         while (fp->f_iocount) {
5139
5140                 lck_mtx_convert_spin(&p->p_fdmlock);
5141
5142                 if (fp->f_fglob->fg_ops->fo_drain) {
5143                         (*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
5144                 }
5145                 if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
5146                         if (waitq_wakeup64_all((struct waitq *)fp->f_wset, NO_EVENT64,
5147                                                THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT)
5148                                 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->f_wset, fp);
5149                 }
5150                 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5151                         if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5152                                                THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT)
5153                                 panic("bad select_conflict_queue");
5154                 }
5155                 p->p_fpdrainwait = 1;
5156
5157                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5158
5159         }
5160 #if DIAGNOSTIC
5161         if ((fp->f_flags & FP_INSELECT) != 0)
5162                 panic("FP_INSELECT set on drained fp");
5163 #endif
5164         if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT)
5165                 fp->f_flags &= ~FP_SELCONFLICT;
5166 }
5167
5168
5169 /*
5170  * fp_free
5171  *
5172  * Description: Release the fd and free the fileproc associated with the fd
5173  *              in the per process open file table of the specified process;
5174  *              these values must correspond.
5175  *
5176  * Parameters:  p                               Process containing fd
5177  *              fd                              fd to be released
5178  *              fp                              fileproc to be freed
5179  *
5180  * Returns:     0                               Success
5181  *
5182  * Notes:       XXX function should be void - no one interprets the returns
5183  *              XXX code
5184  */
5185 int
5186 fp_free(proc_t p, int fd, struct fileproc * fp)
5187 {
5188         proc_fdlock_spin(p);
5189         fdrelse(p, fd);
5190         proc_fdunlock(p);
5191
5192         fg_free(fp->f_fglob);
5193         fileproc_free(fp);
5194         return(0);
5195 }
5196
5197
5198 /*
5199  * flock
5200  *
5201  * Description: Apply an advisory lock on a file descriptor.
5202  *
5203  * Parameters:  p                               Process making request
5204  *              uap->fd                         fd on which the lock is to be
5205  *                                              attempted
5206  *              uap->how                        (Un)Lock bits, including type
5207  *              retval                          Pointer to the call return area
5208  *
5209  * Returns:     0                               Success
5210  *      fp_getfvp:EBADF                         Bad file descriptor
5211  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5212  *      vnode_getwithref:???
5213  *      VNOP_ADVLOCK:???
5214  *
5215  * Implicit returns:
5216  *              *retval (modified)              Size of dtable
5217  *
5218  * Notes:       Just attempt to get a record lock of the requested type on
5219  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5220  */
5221 int
5222 flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5223 {
5224         int fd = uap->fd;
5225         int how = uap->how;
5226         struct fileproc *fp;
5227         struct vnode *vp;
5228         struct flock lf;
5229         vfs_context_t ctx = vfs_context_current();
5230         int error=0;
5231
5232         AUDIT_ARG(fd, uap->fd);
5233         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5234                 return(error);
5235         }
5236         if ( (error = vnode_getwithref(vp)) ) {
5237                 goto out1;
5238         }
5239         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5240
5241         lf.l_whence = SEEK_SET;
5242         lf.l_start = 0;
5243         lf.l_len = 0;
5244         if (how & LOCK_UN) {
5245                 lf.l_type = F_UNLCK;
5246                 fp->f_flag &= ~FHASLOCK;
5247                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5248                 goto out;
5249         }
5250         if (how & LOCK_EX)
5251                 lf.l_type = F_WRLCK;
5252         else if (how & LOCK_SH)
5253                 lf.l_type = F_RDLCK;
5254         else {
5255                 error = EBADF;
5256                 goto out;
5257         }
5258 #if CONFIG_MACF
5259         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
5260         if (error)
5261                 goto out;
5262 #endif
5263         fp->f_flag |= FHASLOCK;
5264         if (how & LOCK_NB) {
5265                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx, NULL);
5266                 goto out;
5267         }
5268         error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx, NULL);
5269 out:
5270         (void)vnode_put(vp);
5271 out1:
5272         fp_drop(p, fd, fp, 0);
5273         return(error);
5274
5275 }
5276
5277 /*
5278  * fileport_makeport
5279  *
5280  * Description: Obtain a Mach send right for a given file descriptor.
5281  *
5282  * Parameters:  p               Process calling fileport
5283  *              uap->fd         The fd to reference
5284  *              uap->portnamep  User address at which to place port name.
5285  *
5286  * Returns:     0               Success.
5287  *              EBADF           Bad file descriptor.
5288  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
5289  *              EFAULT          Address at which to store port name is not valid.
5290  *              EAGAIN          Resource shortage.
5291  *
5292  * Implicit returns:
5293  *              On success, name of send right is stored at user-specified address.
5294  */
5295 int
5296 fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5297     __unused int *retval)
5298 {
5299         int err;
5300         int fd = uap->fd;
5301         user_addr_t user_portaddr = uap->portnamep;
5302         struct fileproc *fp = FILEPROC_NULL;
5303         struct fileglob *fg = NULL;
5304         ipc_port_t fileport;
5305         mach_port_name_t name = MACH_PORT_NULL;
5306
5307         proc_fdlock(p);
5308         err = fp_lookup(p, fd, &fp, 1);
5309         if (err != 0) {
5310                 goto out_unlock;
5311         }
5312
5313         if (!file_issendable(p, fp)) {
5314                 err = EINVAL;
5315                 goto out_unlock;
5316         }
5317
5318         if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5319                 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5320                 goto out_unlock;
5321         }
5322
5323         /* Dropped when port is deallocated */
5324         fg = fp->f_fglob;
5325         fg_ref(fp);
5326
5327         proc_fdunlock(p);
5328
5329         /* Allocate and initialize a port */
5330         fileport = fileport_alloc(fg);
5331         if (fileport == IPC_PORT_NULL) {
5332                 err = EAGAIN;
5333                 fg_drop(fp);
5334                 goto out;
5335         }
5336
5337         /* Add an entry.  Deallocates port on failure. */
5338         name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5339         if (!MACH_PORT_VALID(name)) {
5340                 err = EINVAL;
5341                 goto out;
5342         }
5343
5344         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5345         if (err != 0) {
5346                 goto out;
5347         }
5348
5349         /* Tag the fileglob for debugging purposes */
5350         lck_mtx_lock_spin(&fg->fg_lock);
5351         fg->fg_lflags |= FG_PORTMADE;
5352         lck_mtx_unlock(&fg->fg_lock);
5353
5354         fp_drop(p, fd, fp, 0);
5355
5356         return 0;
5357
5358 out_unlock:
5359         proc_fdunlock(p);
5360 out:
5361         if (MACH_PORT_VALID(name)) {
5362                 /* Don't care if another thread races us to deallocate the entry */
5363                 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5364         }
5365
5366         if (fp != FILEPROC_NULL) {
5367                 fp_drop(p, fd, fp, 0);
5368         }
5369
5370         return err;
5371 }
5372
5373 void
5374 fileport_releasefg(struct fileglob *fg)
5375 {
5376         (void)closef_locked(NULL, fg, PROC_NULL);
5377
5378         return;
5379 }
5380
5381
5382 /*
5383  * fileport_makefd
5384  *
5385  * Description: Obtain the file descriptor for a given Mach send right.
5386  *
5387  * Parameters:  p               Process calling fileport
5388  *              uap->port       Name of send right to file port.
5389  *
5390  * Returns:     0               Success
5391  *              EINVAL          Invalid Mach port name, or port is not for a file.
5392  *      fdalloc:EMFILE
5393  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5394  *
5395  * Implicit returns:
5396  *              *retval (modified)              The new descriptor
5397  */
5398 int
5399 fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5400 {
5401         struct fileglob *fg;
5402         struct fileproc *fp = FILEPROC_NULL;
5403         ipc_port_t port = IPC_PORT_NULL;
5404         mach_port_name_t send = uap->port;
5405         kern_return_t res;
5406         int fd;
5407         int err;
5408
5409         res = ipc_object_copyin(get_task_ipcspace(p->task),
5410                         send, MACH_MSG_TYPE_COPY_SEND, &port);
5411
5412         if (res != KERN_SUCCESS) {
5413                 err = EINVAL;
5414                 goto out;
5415         }
5416
5417         fg = fileport_port_to_fileglob(port);
5418         if (fg == NULL) {
5419                 err = EINVAL;
5420                 goto out;
5421         }
5422
5423         fp = fileproc_alloc_init(NULL);
5424         if (fp == FILEPROC_NULL) {
5425                 err = ENOMEM;
5426                 goto out;
5427         }
5428
5429         fp->f_fglob = fg;
5430         fg_ref(fp);
5431
5432         proc_fdlock(p);
5433         err = fdalloc(p, 0, &fd);
5434         if (err != 0) {
5435                 proc_fdunlock(p);
5436                 goto out;
5437         }
5438         *fdflags(p, fd) |= UF_EXCLOSE;
5439
5440         procfdtbl_releasefd(p, fd, fp);
5441         proc_fdunlock(p);
5442
5443         *retval = fd;
5444         err = 0;
5445 out:
5446         if ((fp != NULL) && (0 != err)) {
5447                 fileproc_free(fp);
5448         }
5449
5450         if (IPC_PORT_NULL != port) {
5451                 ipc_port_release_send(port);
5452         }
5453
5454         return err;
5455 }
5456
5457
5458 /*
5459  * dupfdopen
5460  *
5461  * Description: Duplicate the specified descriptor to a free descriptor;
5462  *              this is the second half of fdopen(), above.
5463  *
5464  * Parameters:  fdp                             filedesc pointer to fill in
5465  *              indx                            fd to dup to
5466  *              dfd                             fd to dup from
5467  *              mode                            mode to set on new fd
5468  *              error                           command code
5469  *
5470  * Returns:     0                               Success
5471  *              EBADF                           Source fd is bad
5472  *              EACCES                          Requested mode not allowed
5473  *              !0                              'error', if not ENODEV or
5474  *                                              ENXIO
5475  *
5476  * Notes:       XXX This is not thread safe; see fdopen() above
5477  */
5478 int
5479 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5480 {
5481         struct fileproc *wfp;
5482         struct fileproc *fp;
5483 #if CONFIG_MACF
5484         int myerror;
5485 #endif
5486         proc_t p = current_proc();
5487
5488         /*
5489          * If the to-be-dup'd fd number is greater than the allowed number
5490          * of file descriptors, or the fd to be dup'd has already been
5491          * closed, reject.  Note, check for new == old is necessary as
5492          * falloc could allocate an already closed to-be-dup'd descriptor
5493          * as the new descriptor.
5494          */
5495         proc_fdlock(p);
5496
5497         fp = fdp->fd_ofiles[indx];
5498         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5499                         (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5500                         (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5501
5502                 proc_fdunlock(p);
5503                 return (EBADF);
5504         }
5505 #if CONFIG_MACF
5506         myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5507         if (myerror) {
5508                 proc_fdunlock(p);
5509                 return (myerror);
5510         }
5511 #endif
5512         /*
5513          * There are two cases of interest here.
5514          *
5515          * For ENODEV simply dup (dfd) to file descriptor
5516          * (indx) and return.
5517          *
5518          * For ENXIO steal away the file structure from (dfd) and
5519          * store it in (indx).  (dfd) is effectively closed by
5520          * this operation.
5521          *
5522          * Any other error code is just returned.
5523          */
5524         switch (error) {
5525         case ENODEV:
5526                 if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5527                         proc_fdunlock(p);
5528                         return (EPERM);
5529                 }
5530
5531                 /*
5532                  * Check that the mode the file is being opened for is a
5533                  * subset of the mode of the existing descriptor.
5534                  */
5535                 if (((flags & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5536                         proc_fdunlock(p);
5537                         return (EACCES);
5538                 }
5539                 if (indx > fdp->fd_lastfile)
5540                         fdp->fd_lastfile = indx;
5541                 (void)fg_ref(wfp);
5542
5543                 if (fp->f_fglob)
5544                         fg_free(fp->f_fglob);
5545                 fp->f_fglob = wfp->f_fglob;
5546
5547                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5548                         (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5549
5550                 proc_fdunlock(p);
5551                 return (0);
5552
5553         default:
5554                 proc_fdunlock(p);
5555                 return (error);
5556         }
5557         /* NOTREACHED */
5558 }
5559
5560
5561 /*
5562  * fg_ref
5563  *
5564  * Description: Add a reference to a fileglob by fileproc
5565  *
5566  * Parameters:  fp                              fileproc containing fileglob
5567  *                                              pointer
5568  *
5569  * Returns:     void
5570  *
5571  * Notes:       XXX Should use OSAddAtomic?
5572  */
5573 void
5574 fg_ref(struct fileproc * fp)
5575 {
5576         struct fileglob *fg;
5577
5578         fg = fp->f_fglob;
5579
5580         lck_mtx_lock_spin(&fg->fg_lock);
5581
5582 #if DIAGNOSTIC
5583         if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0)
5584                 panic("fg_ref: invalid bits on fp %p", fp);
5585
5586         if (fg->fg_count == 0)
5587                 panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5588                     fp, fg);
5589 #endif
5590         fg->fg_count++;
5591         lck_mtx_unlock(&fg->fg_lock);
5592 }
5593
5594
5595 /*
5596  * fg_drop
5597  *
5598  * Description: Remove a reference to a fileglob by fileproc
5599  *
5600  * Parameters:  fp                              fileproc containing fileglob
5601  *                                              pointer
5602  *
5603  * Returns:     void
5604  *
5605  * Notes:       XXX Should use OSAddAtomic?
5606  */
5607 void
5608 fg_drop(struct fileproc * fp)
5609 {
5610         struct fileglob *fg;
5611
5612         fg = fp->f_fglob;
5613         lck_mtx_lock_spin(&fg->fg_lock);
5614         fg->fg_count--;
5615         lck_mtx_unlock(&fg->fg_lock);
5616 }
5617
5618 #if SOCKETS
5619 /*
5620  * fg_insertuipc_mark
5621  *
5622  * Description: Mark fileglob for insertion onto message queue if needed
5623  *              Also takes fileglob reference
5624  *
5625  * Parameters:  fg      Fileglob pointer to insert
5626  *
5627  * Returns:     true, if the fileglob needs to be inserted onto msg queue
5628  *
5629  * Locks:       Takes and drops fg_lock, potentially many times
5630  */
5631 boolean_t
5632 fg_insertuipc_mark(struct fileglob * fg)
5633 {
5634         boolean_t insert = FALSE;
5635
5636         lck_mtx_lock_spin(&fg->fg_lock);
5637         while (fg->fg_lflags & FG_RMMSGQ) {
5638                 lck_mtx_convert_spin(&fg->fg_lock);
5639
5640                 fg->fg_lflags |= FG_WRMMSGQ;
5641                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
5642         }
5643
5644         fg->fg_count++;
5645         fg->fg_msgcount++;
5646         if (fg->fg_msgcount == 1) {
5647                 fg->fg_lflags |= FG_INSMSGQ;
5648                 insert = TRUE;
5649         }
5650         lck_mtx_unlock(&fg->fg_lock);
5651         return (insert);
5652 }
5653
5654 /*
5655  * fg_insertuipc
5656  *
5657  * Description: Insert marked fileglob onto message queue
5658  *
5659  * Parameters:  fg      Fileglob pointer to insert
5660  *
5661  * Returns:     void
5662  *
5663  * Locks:       Takes and drops fg_lock & uipc_lock
5664  *              DO NOT call this function with proc_fdlock held as unp_gc()
5665  *              can potentially try to acquire proc_fdlock, which can result
5666  *              in a deadlock if this function is in unp_gc_wait().
5667  */
5668 void
5669 fg_insertuipc(struct fileglob * fg)
5670 {
5671         if (fg->fg_lflags & FG_INSMSGQ) {
5672                 lck_mtx_lock_spin(uipc_lock);
5673                 unp_gc_wait();
5674                 LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
5675                 lck_mtx_unlock(uipc_lock);
5676                 lck_mtx_lock(&fg->fg_lock);
5677                 fg->fg_lflags &= ~FG_INSMSGQ;
5678                 if (fg->fg_lflags & FG_WINSMSGQ) {
5679                         fg->fg_lflags &= ~FG_WINSMSGQ;
5680                         wakeup(&fg->fg_lflags);
5681                 }
5682                 lck_mtx_unlock(&fg->fg_lock);
5683         }
5684 }
5685
5686 /*
5687  * fg_removeuipc_mark
5688  *
5689  * Description: Mark the fileglob for removal from message queue if needed
5690  *              Also releases fileglob message queue reference
5691  *
5692  * Parameters:  fg      Fileglob pointer to remove
5693  *
5694  * Returns:     true, if the fileglob needs to be removed from msg queue
5695  *
5696  * Locks:       Takes and drops fg_lock, potentially many times
5697  */
5698 boolean_t
5699 fg_removeuipc_mark(struct fileglob * fg)
5700 {
5701         boolean_t remove = FALSE;
5702
5703         lck_mtx_lock_spin(&fg->fg_lock);
5704         while (fg->fg_lflags & FG_INSMSGQ) {
5705                 lck_mtx_convert_spin(&fg->fg_lock);
5706
5707                 fg->fg_lflags |= FG_WINSMSGQ;
5708                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
5709         }
5710         fg->fg_msgcount--;
5711         if (fg->fg_msgcount == 0) {
5712                 fg->fg_lflags |= FG_RMMSGQ;
5713                 remove = TRUE;
5714         }
5715         lck_mtx_unlock(&fg->fg_lock);
5716         return (remove);
5717 }
5718
5719 /*
5720  * fg_removeuipc
5721  *
5722  * Description: Remove marked fileglob from message queue
5723  *
5724  * Parameters:  fg      Fileglob pointer to remove
5725  *
5726  * Returns:     void
5727  *
5728  * Locks:       Takes and drops fg_lock & uipc_lock
5729  *              DO NOT call this function with proc_fdlock held as unp_gc()
5730  *              can potentially try to acquire proc_fdlock, which can result
5731  *              in a deadlock if this function is in unp_gc_wait().
5732  */
5733 void
5734 fg_removeuipc(struct fileglob * fg)
5735 {
5736         if (fg->fg_lflags & FG_RMMSGQ) {
5737                 lck_mtx_lock_spin(uipc_lock);
5738                 unp_gc_wait();
5739                 LIST_REMOVE(fg, f_msglist);
5740                 lck_mtx_unlock(uipc_lock);
5741                 lck_mtx_lock(&fg->fg_lock);
5742                 fg->fg_lflags &= ~FG_RMMSGQ;
5743                 if (fg->fg_lflags & FG_WRMMSGQ) {
5744                         fg->fg_lflags &= ~FG_WRMMSGQ;
5745                         wakeup(&fg->fg_lflags);
5746                 }
5747                 lck_mtx_unlock(&fg->fg_lock);
5748         }
5749 }
5750 #endif /* SOCKETS */
5751
5752 /*
5753  * fo_read
5754  *
5755  * Description: Generic fileops read indirected through the fileops pointer
5756  *              in the fileproc structure
5757  *
5758  * Parameters:  fp                              fileproc structure pointer
5759  *              uio                             user I/O structure pointer
5760  *              flags                           FOF_ flags
5761  *              ctx                             VFS context for operation
5762  *
5763  * Returns:     0                               Success
5764  *              !0                              Errno from read
5765  */
5766 int
5767 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5768 {
5769         return ((*fp->f_ops->fo_read)(fp, uio, flags, ctx));
5770 }
5771
5772
5773 /*
5774  * fo_write
5775  *
5776  * Description: Generic fileops write indirected through the fileops pointer
5777  *              in the fileproc structure
5778  *
5779  * Parameters:  fp                              fileproc structure pointer
5780  *              uio                             user I/O structure pointer
5781  *              flags                           FOF_ flags
5782  *              ctx                             VFS context for operation
5783  *
5784  * Returns:     0                               Success
5785  *              !0                              Errno from write
5786  */
5787 int
5788 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5789 {
5790         return((*fp->f_ops->fo_write)(fp, uio, flags, ctx));
5791 }
5792
5793
5794 /*
5795  * fo_ioctl
5796  *
5797  * Description: Generic fileops ioctl indirected through the fileops pointer
5798  *              in the fileproc structure
5799  *
5800  * Parameters:  fp                              fileproc structure pointer
5801  *              com                             ioctl command
5802  *              data                            pointer to internalized copy
5803  *                                              of user space ioctl command
5804  *                                              parameter data in kernel space
5805  *              ctx                             VFS context for operation
5806  *
5807  * Returns:     0                               Success
5808  *              !0                              Errno from ioctl
5809  *
5810  * Locks:       The caller is assumed to have held the proc_fdlock; this
5811  *              function releases and reacquires this lock.  If the caller
5812  *              accesses data protected by this lock prior to calling this
5813  *              function, it will need to revalidate/reacquire any cached
5814  *              protected data obtained prior to the call.
5815  */
5816 int
5817 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5818 {
5819         int error;
5820
5821         proc_fdunlock(vfs_context_proc(ctx));
5822         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5823         proc_fdlock(vfs_context_proc(ctx));
5824         return(error);
5825 }
5826
5827
5828 /*
5829  * fo_select
5830  *
5831  * Description: Generic fileops select indirected through the fileops pointer
5832  *              in the fileproc structure
5833  *
5834  * Parameters:  fp                              fileproc structure pointer
5835  *              which                           select which
5836  *              wql                             pointer to wait queue list
5837  *              ctx                             VFS context for operation
5838  *
5839  * Returns:     0                               Success
5840  *              !0                              Errno from select
5841  */
5842 int
5843 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5844 {
5845         return((*fp->f_ops->fo_select)(fp, which, wql, ctx));
5846 }
5847
5848
5849 /*
5850  * fo_close
5851  *
5852  * Description: Generic fileops close indirected through the fileops pointer
5853  *              in the fileproc structure
5854  *
5855  * Parameters:  fp                              fileproc structure pointer for
5856  *                                              file to close
5857  *              ctx                             VFS context for operation
5858  *
5859  * Returns:     0                               Success
5860  *              !0                              Errno from close
5861  */
5862 int
5863 fo_close(struct fileglob *fg, vfs_context_t ctx)
5864 {
5865         return((*fg->fg_ops->fo_close)(fg, ctx));
5866 }
5867
5868
5869 /*
5870  * fo_kqfilter
5871  *
5872  * Description: Generic fileops kqueue filter indirected through the fileops
5873  *              pointer in the fileproc structure
5874  *
5875  * Parameters:  fp                              fileproc structure pointer
5876  *              kn                              pointer to knote to filter on
5877  *              ctx                             VFS context for operation
5878  *
5879  * Returns:     0                               Success
5880  *              !0                              Errno from kqueue filter
5881  */
5882 int
5883 fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
5884 {
5885         return ((*fp->f_ops->fo_kqfilter)(fp, kn, ctx));
5886 }
5887
5888 /*
5889  * The ability to send a file descriptor to another
5890  * process is opt-in by file type.
5891  */
5892 boolean_t
5893 file_issendable(proc_t p, struct fileproc *fp)
5894 {
5895         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
5896
5897         switch (fp->f_type) {
5898         case DTYPE_VNODE:
5899         case DTYPE_SOCKET:
5900         case DTYPE_PIPE:
5901         case DTYPE_PSXSHM:
5902                 return (0 == (fp->f_fglob->fg_lflags & FG_CONFINED));
5903         default:
5904                 /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
5905                 return FALSE;
5906         }
5907 }
5908
5909
5910 struct fileproc *
5911 fileproc_alloc_init(__unused void *arg)
5912 {
5913         struct fileproc *fp;
5914
5915         MALLOC_ZONE(fp, struct fileproc *, sizeof (*fp), M_FILEPROC, M_WAITOK);
5916         if (fp)
5917                 bzero(fp, sizeof (*fp));
5918
5919         return (fp);
5920 }
5921
5922 void
5923 fileproc_free(struct fileproc *fp)
5924 {
5925         switch (FILEPROC_TYPE(fp)) {
5926         case FTYPE_SIMPLE:
5927                 FREE_ZONE(fp, sizeof (*fp), M_FILEPROC);
5928                 break;
5929         case FTYPE_GUARDED:
5930                 guarded_fileproc_free(fp);
5931                 break;
5932         default:
5933                 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags);
5934         }
5935 }