bsd/kern/kern_descrip.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/file_internal.h>
  83 #include <sys/guarded.h>
  84 #include <sys/priv.h>
  85 #include <sys/socket.h>
  86 #include <sys/socketvar.h>
  87 #include <sys/stat.h>
  88 #include <sys/ioctl.h>
  89 #include <sys/fcntl.h>
  90 #include <sys/fsctl.h>
  91 #include <sys/malloc.h>
  92 #include <sys/mman.h>
  93 #include <sys/syslog.h>
  94 #include <sys/unistd.h>
  95 #include <sys/resourcevar.h>
  96 #include <sys/aio_kern.h>
  97 #include <sys/ev.h>
  98 #include <kern/locks.h>
  99 #include <sys/uio_internal.h>
 100 #include <sys/codesign.h>
 101 #include <sys/codedir_internal.h>
 102
 103 #include <security/audit/audit.h>
 104
 105 #include <sys/mount_internal.h>
 106 #include <sys/kdebug.h>
 107 #include <sys/sysproto.h>
 108 #include <sys/pipe.h>
 109 #include <sys/spawn.h>
 110 #include <sys/cprotect.h>
 111 #include <kern/kern_types.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/waitq.h>
 114 #include <libkern/OSAtomic.h>
 115
 116 #include <sys/ubc_internal.h>
 117
 118 #include <kern/ipc_misc.h>
 119 #include <vm/vm_protos.h>
 120
 121 #include <mach/mach_port.h>
 122 #include <stdbool.h>
 123
 124 #if CONFIG_MACF
 125 #include <security/mac_framework.h>
 126 #endif
 127
 128 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
 129 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
 130     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
 131 void ipc_port_release_send(ipc_port_t);
 132
 133 struct psemnode;
 134 struct pshmnode;
 135
 136 static int finishdup(proc_t p,
 137     struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 138
 139 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
 140 void fg_drop(struct fileproc * fp);
 141 void fg_free(struct fileglob *fg);
 142 void fg_ref(struct fileproc * fp);
 143 void fileport_releasefg(struct fileglob *fg);
 144
 145 /* flags for close_internal_locked */
 146 #define FD_DUP2RESV 1
 147
 148 /* We don't want these exported */
 149
 150 __private_extern__
 151 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
 152
 153 static void _fdrelse(struct proc * p, int fd);
 154
 155
 156 extern void file_lock_init(void);
 157
 158 extern kauth_scope_t    kauth_scope_fileop;
 159
 160 /* Conflict wait queue for when selects collide (opaque type) */
 161 extern struct waitq select_conflict_queue;
 162
 163 #ifndef HFS_GET_BOOT_INFO
 164 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
 165 #endif
 166
 167 #ifndef HFS_SET_BOOT_INFO
 168 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
 169 #endif
 170
 171 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
 172 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
 173 #endif
 174
 175 #define f_flag f_fglob->fg_flag
 176 #define f_type f_fglob->fg_ops->fo_type
 177 #define f_msgcount f_fglob->fg_msgcount
 178 #define f_cred f_fglob->fg_cred
 179 #define f_ops f_fglob->fg_ops
 180 #define f_offset f_fglob->fg_offset
 181 #define f_data f_fglob->fg_data
 182 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
 183                 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
 184                 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
 185                 ? 1 : 0)
 186 /*
 187  * Descriptor management.
 188  */
 189 struct fmsglist fmsghead;       /* head of list of open files */
 190 struct fmsglist fmsg_ithead;    /* head of list of open files */
 191 int nfiles;                     /* actual number of open files */
 192
 193
 194 lck_grp_attr_t * file_lck_grp_attr;
 195 lck_grp_t * file_lck_grp;
 196 lck_attr_t * file_lck_attr;
 197
 198 lck_mtx_t * uipc_lock;
 199
 200
 201 /*
 202  * check_file_seek_range
 203  *
 204  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
 205  *
 206  * Parameters:  fl              Flock structure.
 207  *              cur_file_offset Current offset in the file.
 208  *
 209  * Returns:     0               on Success.
 210  *              EOVERFLOW       on overflow.
 211  *              EINVAL          on offset less than zero.
 212  */
 213
 214 static int
 215 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
 216 {
 217         if (fl->l_whence == SEEK_CUR) {
 218                 /* Check if the start marker is beyond LLONG_MAX. */
 219                 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
 220                         /* Check if start marker is negative */
 221                         if (fl->l_start < 0) {
 222                                 return EINVAL;
 223                         }
 224                         return EOVERFLOW;
 225                 }
 226                 /* Check if the start marker is negative. */
 227                 if (fl->l_start + cur_file_offset < 0) {
 228                         return EINVAL;
 229                 }
 230                 /* Check if end marker is beyond LLONG_MAX. */
 231                 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
 232                     cur_file_offset, fl->l_len - 1))) {
 233                         return EOVERFLOW;
 234                 }
 235                 /* Check if the end marker is negative. */
 236                 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
 237                     fl->l_len < 0)) {
 238                         return EINVAL;
 239                 }
 240         } else if (fl->l_whence == SEEK_SET) {
 241                 /* Check if the start marker is negative. */
 242                 if (fl->l_start < 0) {
 243                         return EINVAL;
 244                 }
 245                 /* Check if the end marker is beyond LLONG_MAX. */
 246                 if ((fl->l_len > 0) &&
 247                     CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
 248                         return EOVERFLOW;
 249                 }
 250                 /* Check if the end marker is negative. */
 251                 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
 252                         return EINVAL;
 253                 }
 254         }
 255         return 0;
 256 }
 257
 258
 259 /*
 260  * file_lock_init
 261  *
 262  * Description: Initialize the file lock group and the uipc and flist locks
 263  *
 264  * Parameters:  (void)
 265  *
 266  * Returns:     void
 267  *
 268  * Notes:       Called at system startup from bsd_init().
 269  */
 270 void
 271 file_lock_init(void)
 272 {
 273         /* allocate file lock group attribute and group */
 274         file_lck_grp_attr = lck_grp_attr_alloc_init();
 275
 276         file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
 277
 278         /* Allocate file lock attribute */
 279         file_lck_attr = lck_attr_alloc_init();
 280
 281         uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr);
 282 }
 283
 284
 285 void
 286 proc_dirs_lock_shared(proc_t p)
 287 {
 288         lck_rw_lock_shared(&p->p_dirs_lock);
 289 }
 290
 291 void
 292 proc_dirs_unlock_shared(proc_t p)
 293 {
 294         lck_rw_unlock_shared(&p->p_dirs_lock);
 295 }
 296
 297 void
 298 proc_dirs_lock_exclusive(proc_t p)
 299 {
 300         lck_rw_lock_exclusive(&p->p_dirs_lock);
 301 }
 302
 303 void
 304 proc_dirs_unlock_exclusive(proc_t p)
 305 {
 306         lck_rw_unlock_exclusive(&p->p_dirs_lock);
 307 }
 308
 309 /*
 310  * proc_fdlock, proc_fdlock_spin
 311  *
 312  * Description: Lock to control access to the per process struct fileproc
 313  *              and struct filedesc
 314  *
 315  * Parameters:  p                               Process to take the lock on
 316  *
 317  * Returns:     void
 318  *
 319  * Notes:       The lock is initialized in forkproc() and destroyed in
 320  *              reap_child_process().
 321  */
 322 void
 323 proc_fdlock(proc_t p)
 324 {
 325         lck_mtx_lock(&p->p_fdmlock);
 326 }
 327
 328 void
 329 proc_fdlock_spin(proc_t p)
 330 {
 331         lck_mtx_lock_spin(&p->p_fdmlock);
 332 }
 333
 334 void
 335 proc_fdlock_assert(proc_t p, int assertflags)
 336 {
 337         lck_mtx_assert(&p->p_fdmlock, assertflags);
 338 }
 339
 340
 341 /*
 342  * proc_fdunlock
 343  *
 344  * Description: Unlock the lock previously locked by a call to proc_fdlock()
 345  *
 346  * Parameters:  p                               Process to drop the lock on
 347  *
 348  * Returns:     void
 349  */
 350 void
 351 proc_fdunlock(proc_t p)
 352 {
 353         lck_mtx_unlock(&p->p_fdmlock);
 354 }
 355
 356
 357 /*
 358  * System calls on descriptors.
 359  */
 360
 361
 362 /*
 363  * getdtablesize
 364  *
 365  * Description: Returns the per process maximum size of the descriptor table
 366  *
 367  * Parameters:  p                               Process being queried
 368  *              retval                          Pointer to the call return area
 369  *
 370  * Returns:     0                               Success
 371  *
 372  * Implicit returns:
 373  *              *retval (modified)              Size of dtable
 374  */
 375 int
 376 getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 377 {
 378         proc_fdlock_spin(p);
 379         *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
 380         proc_fdunlock(p);
 381
 382         return 0;
 383 }
 384
 385
 386 void
 387 procfdtbl_reservefd(struct proc * p, int fd)
 388 {
 389         p->p_fd->fd_ofiles[fd] = NULL;
 390         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
 391 }
 392
 393 void
 394 procfdtbl_markclosefd(struct proc * p, int fd)
 395 {
 396         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
 397 }
 398
 399 void
 400 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
 401 {
 402         if (fp != NULL) {
 403                 p->p_fd->fd_ofiles[fd] = fp;
 404         }
 405         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
 406         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
 407                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
 408                 wakeup(&p->p_fd);
 409         }
 410 }
 411
 412 void
 413 procfdtbl_waitfd(struct proc * p, int fd)
 414 {
 415         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
 416         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
 417 }
 418
 419
 420 void
 421 procfdtbl_clearfd(struct proc * p, int fd)
 422 {
 423         int waiting;
 424
 425         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
 426         p->p_fd->fd_ofiles[fd] = NULL;
 427         p->p_fd->fd_ofileflags[fd] = 0;
 428         if (waiting == UF_RESVWAIT) {
 429                 wakeup(&p->p_fd);
 430         }
 431 }
 432
 433 /*
 434  * _fdrelse
 435  *
 436  * Description: Inline utility function to free an fd in a filedesc
 437  *
 438  * Parameters:  fdp                             Pointer to filedesc fd lies in
 439  *              fd                              fd to free
 440  *              reserv                          fd should be reserved
 441  *
 442  * Returns:     void
 443  *
 444  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 445  *              the caller
 446  */
 447 static void
 448 _fdrelse(struct proc * p, int fd)
 449 {
 450         struct filedesc *fdp = p->p_fd;
 451         int nfd = 0;
 452
 453         if (fd < fdp->fd_freefile) {
 454                 fdp->fd_freefile = fd;
 455         }
 456 #if DIAGNOSTIC
 457         if (fd > fdp->fd_lastfile) {
 458                 panic("fdrelse: fd_lastfile inconsistent");
 459         }
 460 #endif
 461         procfdtbl_clearfd(p, fd);
 462
 463         while ((nfd = fdp->fd_lastfile) > 0 &&
 464             fdp->fd_ofiles[nfd] == NULL &&
 465             !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
 466                 /* JMM - What about files with lingering EV_VANISHED knotes? */
 467                 fdp->fd_lastfile--;
 468         }
 469 }
 470
 471
 472 int
 473 fd_rdwr(
 474         int fd,
 475         enum uio_rw rw,
 476         uint64_t base,
 477         int64_t len,
 478         enum uio_seg segflg,
 479         off_t   offset,
 480         int     io_flg,
 481         int64_t *aresid)
 482 {
 483         struct fileproc *fp;
 484         proc_t  p;
 485         int error = 0;
 486         int flags = 0;
 487         int spacetype;
 488         uio_t auio = NULL;
 489         char uio_buf[UIO_SIZEOF(1)];
 490         struct vfs_context context = *(vfs_context_current());
 491         bool wrote_some = false;
 492
 493         p = current_proc();
 494
 495         error = fp_lookup(p, fd, &fp, 0);
 496         if (error) {
 497                 return error;
 498         }
 499
 500         if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) {
 501                 error = EINVAL;
 502                 goto out;
 503         }
 504         if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
 505                 error = EBADF;
 506                 goto out;
 507         }
 508
 509         if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
 510                 error = EBADF;
 511                 goto out;
 512         }
 513
 514         context.vc_ucred = fp->f_fglob->fg_cred;
 515
 516         if (UIO_SEG_IS_USER_SPACE(segflg)) {
 517                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 518         } else {
 519                 spacetype = UIO_SYSSPACE;
 520         }
 521
 522         auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
 523
 524         uio_addiov(auio, base, len);
 525
 526         if (!(io_flg & IO_APPEND)) {
 527                 flags = FOF_OFFSET;
 528         }
 529
 530         if (rw == UIO_WRITE) {
 531                 user_ssize_t orig_resid = uio_resid(auio);
 532                 error = fo_write(fp, auio, flags, &context);
 533                 wrote_some = uio_resid(auio) < orig_resid;
 534         } else {
 535                 error = fo_read(fp, auio, flags, &context);
 536         }
 537
 538         if (aresid) {
 539                 *aresid = uio_resid(auio);
 540         } else {
 541                 if (uio_resid(auio) && error == 0) {
 542                         error = EIO;
 543                 }
 544         }
 545 out:
 546         if (wrote_some) {
 547                 fp_drop_written(p, fd, fp);
 548         } else {
 549                 fp_drop(p, fd, fp, 0);
 550         }
 551
 552         return error;
 553 }
 554
 555
 556
 557 /*
 558  * dup
 559  *
 560  * Description: Duplicate a file descriptor.
 561  *
 562  * Parameters:  p                               Process performing the dup
 563  *              uap->fd                         The fd to dup
 564  *              retval                          Pointer to the call return area
 565  *
 566  * Returns:     0                               Success
 567  *              !0                              Errno
 568  *
 569  * Implicit returns:
 570  *              *retval (modified)              The new descriptor
 571  */
 572 int
 573 dup(proc_t p, struct dup_args *uap, int32_t *retval)
 574 {
 575         struct filedesc *fdp = p->p_fd;
 576         int old = uap->fd;
 577         int new, error;
 578         struct fileproc *fp;
 579
 580         proc_fdlock(p);
 581         if ((error = fp_lookup(p, old, &fp, 1))) {
 582                 proc_fdunlock(p);
 583                 return error;
 584         }
 585         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 586                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 587                 (void) fp_drop(p, old, fp, 1);
 588                 proc_fdunlock(p);
 589                 return error;
 590         }
 591         if ((error = fdalloc(p, 0, &new))) {
 592                 fp_drop(p, old, fp, 1);
 593                 proc_fdunlock(p);
 594                 return error;
 595         }
 596         error = finishdup(p, fdp, old, new, 0, retval);
 597         fp_drop(p, old, fp, 1);
 598         proc_fdunlock(p);
 599
 600         if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
 601                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
 602                     new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
 603         }
 604
 605         return error;
 606 }
 607
 608 /*
 609  * dup2
 610  *
 611  * Description: Duplicate a file descriptor to a particular value.
 612  *
 613  * Parameters:  p                               Process performing the dup
 614  *              uap->from                       The fd to dup
 615  *              uap->to                         The fd to dup it to
 616  *              retval                          Pointer to the call return area
 617  *
 618  * Returns:     0                               Success
 619  *              !0                              Errno
 620  *
 621  * Implicit returns:
 622  *              *retval (modified)              The new descriptor
 623  */
 624 int
 625 dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 626 {
 627         struct filedesc *fdp = p->p_fd;
 628         int old = uap->from, new = uap->to;
 629         int i, error;
 630         struct fileproc *fp, *nfp;
 631
 632         proc_fdlock(p);
 633
 634 startover:
 635         if ((error = fp_lookup(p, old, &fp, 1))) {
 636                 proc_fdunlock(p);
 637                 return error;
 638         }
 639         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 640                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 641                 (void) fp_drop(p, old, fp, 1);
 642                 proc_fdunlock(p);
 643                 return error;
 644         }
 645         if (new < 0 ||
 646             (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 647             new >= maxfiles) {
 648                 fp_drop(p, old, fp, 1);
 649                 proc_fdunlock(p);
 650                 return EBADF;
 651         }
 652         if (old == new) {
 653                 fp_drop(p, old, fp, 1);
 654                 *retval = new;
 655                 proc_fdunlock(p);
 656                 return 0;
 657         }
 658         if (new < 0 || new >= fdp->fd_nfiles) {
 659                 if ((error = fdalloc(p, new, &i))) {
 660                         fp_drop(p, old, fp, 1);
 661                         proc_fdunlock(p);
 662                         return error;
 663                 }
 664                 if (new != i) {
 665                         fdrelse(p, i);
 666                         goto closeit;
 667                 }
 668         } else {
 669 closeit:
 670                 while ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
 671                         fp_drop(p, old, fp, 1);
 672                         procfdtbl_waitfd(p, new);
 673 #if DIAGNOSTIC
 674                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 675 #endif
 676                         goto startover;
 677                 }
 678
 679                 if ((fdp->fd_ofiles[new] != NULL) &&
 680                     ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
 681                         fp_drop(p, old, fp, 1);
 682                         if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
 683                                 error = fp_guard_exception(p,
 684                                     new, nfp, kGUARD_EXC_CLOSE);
 685                                 (void) fp_drop(p, new, nfp, 1);
 686                                 proc_fdunlock(p);
 687                                 return error;
 688                         }
 689                         (void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
 690 #if DIAGNOSTIC
 691                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 692 #endif
 693                         procfdtbl_clearfd(p, new);
 694                         goto startover;
 695                 } else {
 696 #if DIAGNOSTIC
 697                         if (fdp->fd_ofiles[new] != NULL) {
 698                                 panic("dup2: no ref on fileproc %d", new);
 699                         }
 700 #endif
 701                         procfdtbl_reservefd(p, new);
 702                 }
 703
 704 #if DIAGNOSTIC
 705                 proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 706 #endif
 707         }
 708 #if DIAGNOSTIC
 709         if (fdp->fd_ofiles[new] != 0) {
 710                 panic("dup2: overwriting fd_ofiles with new %d", new);
 711         }
 712         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
 713                 panic("dup2: unreserved fileflags with new %d", new);
 714         }
 715 #endif
 716         error = finishdup(p, fdp, old, new, 0, retval);
 717         fp_drop(p, old, fp, 1);
 718         proc_fdunlock(p);
 719
 720         return error;
 721 }
 722
 723
 724 /*
 725  * fcntl
 726  *
 727  * Description: The file control system call.
 728  *
 729  * Parameters:  p                               Process performing the fcntl
 730  *              uap->fd                         The fd to operate against
 731  *              uap->cmd                        The command to perform
 732  *              uap->arg                        Pointer to the command argument
 733  *              retval                          Pointer to the call return area
 734  *
 735  * Returns:     0                               Success
 736  *              !0                              Errno (see fcntl_nocancel)
 737  *
 738  * Implicit returns:
 739  *              *retval (modified)              fcntl return value (if any)
 740  *
 741  * Notes:       This system call differs from fcntl_nocancel() in that it
 742  *              tests for cancellation prior to performing a potentially
 743  *              blocking operation.
 744  */
 745 int
 746 fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 747 {
 748         __pthread_testcancel(1);
 749         return fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
 750 }
 751
 752
 753 /*
 754  * fcntl_nocancel
 755  *
 756  * Description: A non-cancel-testing file control system call.
 757  *
 758  * Parameters:  p                               Process performing the fcntl
 759  *              uap->fd                         The fd to operate against
 760  *              uap->cmd                        The command to perform
 761  *              uap->arg                        Pointer to the command argument
 762  *              retval                          Pointer to the call return area
 763  *
 764  * Returns:     0                               Success
 765  *              EINVAL
 766  *      fp_lookup:EBADF                         Bad file descriptor
 767  * [F_DUPFD]
 768  *      fdalloc:EMFILE
 769  *      fdalloc:ENOMEM
 770  *      finishdup:EBADF
 771  *      finishdup:ENOMEM
 772  * [F_SETOWN]
 773  *              ESRCH
 774  * [F_SETLK]
 775  *              EBADF
 776  *              EOVERFLOW
 777  *      copyin:EFAULT
 778  *      vnode_getwithref:???
 779  *      VNOP_ADVLOCK:???
 780  *      msleep:ETIMEDOUT
 781  * [F_GETLK]
 782  *              EBADF
 783  *              EOVERFLOW
 784  *      copyin:EFAULT
 785  *      copyout:EFAULT
 786  *      vnode_getwithref:???
 787  *      VNOP_ADVLOCK:???
 788  * [F_PREALLOCATE]
 789  *              EBADF
 790  *              EINVAL
 791  *      copyin:EFAULT
 792  *      copyout:EFAULT
 793  *      vnode_getwithref:???
 794  *      VNOP_ALLOCATE:???
 795  * [F_SETSIZE,F_RDADVISE]
 796  *              EBADF
 797  *      copyin:EFAULT
 798  *      vnode_getwithref:???
 799  * [F_RDAHEAD,F_NOCACHE]
 800  *              EBADF
 801  *      vnode_getwithref:???
 802  * [???]
 803  *
 804  * Implicit returns:
 805  *              *retval (modified)              fcntl return value (if any)
 806  */
 807 int
 808 fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 809 {
 810         int fd = uap->fd;
 811         struct filedesc *fdp = p->p_fd;
 812         struct fileproc *fp;
 813         char *pop;
 814         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 815         int i, tmp, error, error2, flg = 0;
 816         struct flock fl = {};
 817         struct flocktimeout fltimeout;
 818         struct timespec *timeout = NULL;
 819         struct vfs_context context;
 820         off_t offset;
 821         int newmin;
 822         daddr64_t lbn, bn;
 823         unsigned int fflag;
 824         user_addr_t argp;
 825         boolean_t is64bit;
 826
 827         AUDIT_ARG(fd, uap->fd);
 828         AUDIT_ARG(cmd, uap->cmd);
 829
 830         proc_fdlock(p);
 831         if ((error = fp_lookup(p, fd, &fp, 1))) {
 832                 proc_fdunlock(p);
 833                 return error;
 834         }
 835         context.vc_thread = current_thread();
 836         context.vc_ucred = fp->f_cred;
 837
 838         is64bit = proc_is64bit(p);
 839         if (is64bit) {
 840                 argp = uap->arg;
 841         } else {
 842                 /*
 843                  * Since the arg parameter is defined as a long but may be
 844                  * either a long or a pointer we must take care to handle
 845                  * sign extension issues.  Our sys call munger will sign
 846                  * extend a long when we are called from a 32-bit process.
 847                  * Since we can never have an address greater than 32-bits
 848                  * from a 32-bit process we lop off the top 32-bits to avoid
 849                  * getting the wrong address
 850                  */
 851                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
 852         }
 853
 854         pop = &fdp->fd_ofileflags[fd];
 855
 856 #if CONFIG_MACF
 857         error = mac_file_check_fcntl(proc_ucred(p), fp->f_fglob, uap->cmd,
 858             uap->arg);
 859         if (error) {
 860                 goto out;
 861         }
 862 #endif
 863
 864         switch (uap->cmd) {
 865         case F_DUPFD:
 866         case F_DUPFD_CLOEXEC:
 867                 if (FP_ISGUARDED(fp, GUARD_DUP)) {
 868                         error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
 869                         goto out;
 870                 }
 871                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 872                 AUDIT_ARG(value32, newmin);
 873                 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 874                     newmin >= maxfiles) {
 875                         error = EINVAL;
 876                         goto out;
 877                 }
 878                 if ((error = fdalloc(p, newmin, &i))) {
 879                         goto out;
 880                 }
 881                 error = finishdup(p, fdp, fd, i,
 882                     uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
 883                 goto out;
 884
 885         case F_GETFD:
 886                 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
 887                 error = 0;
 888                 goto out;
 889
 890         case F_SETFD:
 891                 AUDIT_ARG(value32, uap->arg);
 892                 if (uap->arg & FD_CLOEXEC) {
 893                         *pop |= UF_EXCLOSE;
 894                 } else {
 895                         if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
 896                                 error = fp_guard_exception(p,
 897                                     fd, fp, kGUARD_EXC_NOCLOEXEC);
 898                                 goto out;
 899                         }
 900                         *pop &= ~UF_EXCLOSE;
 901                 }
 902                 error = 0;
 903                 goto out;
 904
 905         case F_GETFL:
 906                 *retval = OFLAGS(fp->f_flag);
 907                 error = 0;
 908                 goto out;
 909
 910         case F_SETFL:
 911                 fp->f_flag &= ~FCNTLFLAGS;
 912                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 913                 AUDIT_ARG(value32, tmp);
 914                 fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS;
 915                 tmp = fp->f_flag & FNONBLOCK;
 916                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 917                 if (error) {
 918                         goto out;
 919                 }
 920                 tmp = fp->f_flag & FASYNC;
 921                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
 922                 if (!error) {
 923                         goto out;
 924                 }
 925                 fp->f_flag &= ~FNONBLOCK;
 926                 tmp = 0;
 927                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
 928                 goto out;
 929
 930         case F_GETOWN:
 931                 if (fp->f_type == DTYPE_SOCKET) {
 932                         *retval = ((struct socket *)fp->f_data)->so_pgid;
 933                         error = 0;
 934                         goto out;
 935                 }
 936                 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
 937                 *retval = -*retval;
 938                 goto out;
 939
 940         case F_SETOWN:
 941                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
 942                 AUDIT_ARG(value32, tmp);
 943                 if (fp->f_type == DTYPE_SOCKET) {
 944                         ((struct socket *)fp->f_data)->so_pgid = tmp;
 945                         error = 0;
 946                         goto out;
 947                 }
 948                 if (fp->f_type == DTYPE_PIPE) {
 949                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 950                         goto out;
 951                 }
 952
 953                 if (tmp <= 0) {
 954                         tmp = -tmp;
 955                 } else {
 956                         proc_t p1 = proc_find(tmp);
 957                         if (p1 == 0) {
 958                                 error = ESRCH;
 959                                 goto out;
 960                         }
 961                         tmp = (int)p1->p_pgrpid;
 962                         proc_rele(p1);
 963                 }
 964                 error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
 965                 goto out;
 966
 967         case F_SETNOSIGPIPE:
 968                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
 969                 if (fp->f_type == DTYPE_SOCKET) {
 970 #if SOCKETS
 971                         error = sock_setsockopt((struct socket *)fp->f_data,
 972                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
 973 #else
 974                         error = EINVAL;
 975 #endif
 976                 } else {
 977                         struct fileglob *fg = fp->f_fglob;
 978
 979                         lck_mtx_lock_spin(&fg->fg_lock);
 980                         if (tmp) {
 981                                 fg->fg_lflags |= FG_NOSIGPIPE;
 982                         } else {
 983                                 fg->fg_lflags &= ~FG_NOSIGPIPE;
 984                         }
 985                         lck_mtx_unlock(&fg->fg_lock);
 986                         error = 0;
 987                 }
 988                 goto out;
 989
 990         case F_GETNOSIGPIPE:
 991                 if (fp->f_type == DTYPE_SOCKET) {
 992 #if SOCKETS
 993                         int retsize = sizeof(*retval);
 994                         error = sock_getsockopt((struct socket *)fp->f_data,
 995                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
 996 #else
 997                         error = EINVAL;
 998 #endif
 999                 } else {
1000                         *retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
1001                             1 : 0;
1002                         error = 0;
1003                 }
1004                 goto out;
1005
1006         case F_SETCONFINED:
1007                 /*
1008                  * If this is the only reference to this fglob in the process
1009                  * and it's already marked as close-on-fork then mark it as
1010                  * (immutably) "confined" i.e. any fd that points to it will
1011                  * forever be close-on-fork, and attempts to use an IPC
1012                  * mechanism to move the descriptor elsewhere will fail.
1013                  */
1014                 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1015                         struct fileglob *fg = fp->f_fglob;
1016
1017                         lck_mtx_lock_spin(&fg->fg_lock);
1018                         if (fg->fg_lflags & FG_CONFINED) {
1019                                 error = 0;
1020                         } else if (1 != fg->fg_count) {
1021                                 error = EAGAIN; /* go close the dup .. */
1022                         } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1023                                 fg->fg_lflags |= FG_CONFINED;
1024                                 error = 0;
1025                         } else {
1026                                 error = EBADF;  /* open without O_CLOFORK? */
1027                         }
1028                         lck_mtx_unlock(&fg->fg_lock);
1029                 } else {
1030                         /*
1031                          * Other subsystems may have built on the immutability
1032                          * of FG_CONFINED; clearing it may be tricky.
1033                          */
1034                         error = EPERM;          /* immutable */
1035                 }
1036                 goto out;
1037
1038         case F_GETCONFINED:
1039                 *retval = (fp->f_fglob->fg_lflags & FG_CONFINED) ? 1 : 0;
1040                 error = 0;
1041                 goto out;
1042
1043         case F_SETLKWTIMEOUT:
1044         case F_SETLKW:
1045         case F_OFD_SETLKWTIMEOUT:
1046         case F_OFD_SETLKW:
1047                 flg |= F_WAIT;
1048         /* Fall into F_SETLK */
1049
1050         case F_SETLK:
1051         case F_OFD_SETLK:
1052                 if (fp->f_type != DTYPE_VNODE) {
1053                         error = EBADF;
1054                         goto out;
1055                 }
1056                 vp = (struct vnode *)fp->f_data;
1057
1058                 fflag = fp->f_flag;
1059                 offset = fp->f_offset;
1060                 proc_fdunlock(p);
1061
1062                 /* Copy in the lock structure */
1063                 if (F_SETLKWTIMEOUT == uap->cmd ||
1064                     F_OFD_SETLKWTIMEOUT == uap->cmd) {
1065                         error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1066                         if (error) {
1067                                 goto outdrop;
1068                         }
1069                         fl = fltimeout.fl;
1070                         timeout = &fltimeout.timeout;
1071                 } else {
1072                         error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1073                         if (error) {
1074                                 goto outdrop;
1075                         }
1076                 }
1077
1078                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1079                 /* and ending byte for EOVERFLOW in SEEK_SET */
1080                 error = check_file_seek_range(&fl, offset);
1081                 if (error) {
1082                         goto outdrop;
1083                 }
1084
1085                 if ((error = vnode_getwithref(vp))) {
1086                         goto outdrop;
1087                 }
1088                 if (fl.l_whence == SEEK_CUR) {
1089                         fl.l_start += offset;
1090                 }
1091
1092 #if CONFIG_MACF
1093                 error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1094                     F_SETLK, &fl);
1095                 if (error) {
1096                         (void)vnode_put(vp);
1097                         goto outdrop;
1098                 }
1099 #endif
1100                 switch (uap->cmd) {
1101                 case F_OFD_SETLK:
1102                 case F_OFD_SETLKW:
1103                 case F_OFD_SETLKWTIMEOUT:
1104                         flg |= F_OFD_LOCK;
1105                         switch (fl.l_type) {
1106                         case F_RDLCK:
1107                                 if ((fflag & FREAD) == 0) {
1108                                         error = EBADF;
1109                                         break;
1110                                 }
1111                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1112                                     F_SETLK, &fl, flg, &context, timeout);
1113                                 break;
1114                         case F_WRLCK:
1115                                 if ((fflag & FWRITE) == 0) {
1116                                         error = EBADF;
1117                                         break;
1118                                 }
1119                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1120                                     F_SETLK, &fl, flg, &context, timeout);
1121                                 break;
1122                         case F_UNLCK:
1123                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1124                                     F_UNLCK, &fl, F_OFD_LOCK, &context,
1125                                     timeout);
1126                                 break;
1127                         default:
1128                                 error = EINVAL;
1129                                 break;
1130                         }
1131                         if (0 == error &&
1132                             (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1133                                 struct fileglob *fg = fp->f_fglob;
1134
1135                                 /*
1136                                  * arrange F_UNLCK on last close (once
1137                                  * set, FG_HAS_OFDLOCK is immutable)
1138                                  */
1139                                 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1140                                         lck_mtx_lock_spin(&fg->fg_lock);
1141                                         fg->fg_lflags |= FG_HAS_OFDLOCK;
1142                                         lck_mtx_unlock(&fg->fg_lock);
1143                                 }
1144                         }
1145                         break;
1146                 default:
1147                         flg |= F_POSIX;
1148                         switch (fl.l_type) {
1149                         case F_RDLCK:
1150                                 if ((fflag & FREAD) == 0) {
1151                                         error = EBADF;
1152                                         break;
1153                                 }
1154                                 // XXX UInt32 unsafe for LP64 kernel
1155                                 OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1156                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1157                                     F_SETLK, &fl, flg, &context, timeout);
1158                                 break;
1159                         case F_WRLCK:
1160                                 if ((fflag & FWRITE) == 0) {
1161                                         error = EBADF;
1162                                         break;
1163                                 }
1164                                 // XXX UInt32 unsafe for LP64 kernel
1165                                 OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag);
1166                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1167                                     F_SETLK, &fl, flg, &context, timeout);
1168                                 break;
1169                         case F_UNLCK:
1170                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1171                                     F_UNLCK, &fl, F_POSIX, &context, timeout);
1172                                 break;
1173                         default:
1174                                 error = EINVAL;
1175                                 break;
1176                         }
1177                         break;
1178                 }
1179                 (void) vnode_put(vp);
1180                 goto outdrop;
1181
1182         case F_GETLK:
1183         case F_OFD_GETLK:
1184         case F_GETLKPID:
1185         case F_OFD_GETLKPID:
1186                 if (fp->f_type != DTYPE_VNODE) {
1187                         error = EBADF;
1188                         goto out;
1189                 }
1190                 vp = (struct vnode *)fp->f_data;
1191
1192                 offset = fp->f_offset;
1193                 proc_fdunlock(p);
1194
1195                 /* Copy in the lock structure */
1196                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1197                 if (error) {
1198                         goto outdrop;
1199                 }
1200
1201                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1202                 /* and ending byte for EOVERFLOW in SEEK_SET */
1203                 error = check_file_seek_range(&fl, offset);
1204                 if (error) {
1205                         goto outdrop;
1206                 }
1207
1208                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1209                         error = EINVAL;
1210                         goto outdrop;
1211                 }
1212
1213                 switch (fl.l_type) {
1214                 case F_RDLCK:
1215                 case F_UNLCK:
1216                 case F_WRLCK:
1217                         break;
1218                 default:
1219                         error = EINVAL;
1220                         goto outdrop;
1221                 }
1222
1223                 switch (fl.l_whence) {
1224                 case SEEK_CUR:
1225                 case SEEK_SET:
1226                 case SEEK_END:
1227                         break;
1228                 default:
1229                         error = EINVAL;
1230                         goto outdrop;
1231                 }
1232
1233                 if ((error = vnode_getwithref(vp)) == 0) {
1234                         if (fl.l_whence == SEEK_CUR) {
1235                                 fl.l_start += offset;
1236                         }
1237
1238 #if CONFIG_MACF
1239                         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
1240                             uap->cmd, &fl);
1241                         if (error == 0)
1242 #endif
1243                         switch (uap->cmd) {
1244                         case F_OFD_GETLK:
1245                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1246                                     F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1247                                 break;
1248                         case F_OFD_GETLKPID:
1249                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob,
1250                                     F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1251                                 break;
1252                         default:
1253                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1254                                     uap->cmd, &fl, F_POSIX, &context, NULL);
1255                                 break;
1256                         }
1257
1258                         (void)vnode_put(vp);
1259
1260                         if (error == 0) {
1261                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1262                         }
1263                 }
1264                 goto outdrop;
1265
1266         case F_PREALLOCATE: {
1267                 fstore_t alloc_struct;    /* structure for allocate command */
1268                 u_int32_t alloc_flags = 0;
1269
1270                 if (fp->f_type != DTYPE_VNODE) {
1271                         error = EBADF;
1272                         goto out;
1273                 }
1274
1275                 vp = (struct vnode *)fp->f_data;
1276                 proc_fdunlock(p);
1277
1278                 /* make sure that we have write permission */
1279                 if ((fp->f_flag & FWRITE) == 0) {
1280                         error = EBADF;
1281                         goto outdrop;
1282                 }
1283
1284                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1285                 if (error) {
1286                         goto outdrop;
1287                 }
1288
1289                 /* now set the space allocated to 0 */
1290                 alloc_struct.fst_bytesalloc = 0;
1291
1292                 /*
1293                  * Do some simple parameter checking
1294                  */
1295
1296                 /* set up the flags */
1297
1298                 alloc_flags |= PREALLOCATE;
1299
1300                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1301                         alloc_flags |= ALLOCATECONTIG;
1302                 }
1303
1304                 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1305                         alloc_flags |= ALLOCATEALL;
1306                 }
1307
1308                 /*
1309                  * Do any position mode specific stuff.  The only
1310                  * position mode  supported now is PEOFPOSMODE
1311                  */
1312
1313                 switch (alloc_struct.fst_posmode) {
1314                 case F_PEOFPOSMODE:
1315                         if (alloc_struct.fst_offset != 0) {
1316                                 error = EINVAL;
1317                                 goto outdrop;
1318                         }
1319
1320                         alloc_flags |= ALLOCATEFROMPEOF;
1321                         break;
1322
1323                 case F_VOLPOSMODE:
1324                         if (alloc_struct.fst_offset <= 0) {
1325                                 error = EINVAL;
1326                                 goto outdrop;
1327                         }
1328
1329                         alloc_flags |= ALLOCATEFROMVOL;
1330                         break;
1331
1332                 default: {
1333                         error = EINVAL;
1334                         goto outdrop;
1335                 }
1336                 }
1337                 if ((error = vnode_getwithref(vp)) == 0) {
1338                         /*
1339                          * call allocate to get the space
1340                          */
1341                         error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1342                             &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1343                             &context);
1344                         (void)vnode_put(vp);
1345
1346                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1347
1348                         if (error == 0) {
1349                                 error = error2;
1350                         }
1351                 }
1352                 goto outdrop;
1353         }
1354         case F_PUNCHHOLE: {
1355                 fpunchhole_t args;
1356
1357                 if (fp->f_type != DTYPE_VNODE) {
1358                         error = EBADF;
1359                         goto out;
1360                 }
1361
1362                 vp = (struct vnode *)fp->f_data;
1363                 proc_fdunlock(p);
1364
1365                 /* need write permissions */
1366                 if ((fp->f_flag & FWRITE) == 0) {
1367                         error = EPERM;
1368                         goto outdrop;
1369                 }
1370
1371                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1372                         goto outdrop;
1373                 }
1374
1375                 if ((error = vnode_getwithref(vp))) {
1376                         goto outdrop;
1377                 }
1378
1379 #if CONFIG_MACF
1380                 if ((error = mac_vnode_check_write(&context, fp->f_fglob->fg_cred, vp))) {
1381                         (void)vnode_put(vp);
1382                         goto outdrop;
1383                 }
1384 #endif
1385
1386                 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1387                 (void)vnode_put(vp);
1388
1389                 goto outdrop;
1390         }
1391         case F_TRIM_ACTIVE_FILE: {
1392                 ftrimactivefile_t args;
1393
1394                 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1395                         error = EACCES;
1396                         goto out;
1397                 }
1398
1399                 if (fp->f_type != DTYPE_VNODE) {
1400                         error = EBADF;
1401                         goto out;
1402                 }
1403
1404                 vp = (struct vnode *)fp->f_data;
1405                 proc_fdunlock(p);
1406
1407                 /* need write permissions */
1408                 if ((fp->f_flag & FWRITE) == 0) {
1409                         error = EPERM;
1410                         goto outdrop;
1411                 }
1412
1413                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1414                         goto outdrop;
1415                 }
1416
1417                 if ((error = vnode_getwithref(vp))) {
1418                         goto outdrop;
1419                 }
1420
1421                 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1422                 (void)vnode_put(vp);
1423
1424                 goto outdrop;
1425         }
1426         case F_SPECULATIVE_READ: {
1427                 fspecread_t args;
1428
1429                 if (fp->f_type != DTYPE_VNODE) {
1430                         error = EBADF;
1431                         goto out;
1432                 }
1433
1434                 vp = (struct vnode *)fp->f_data;
1435                 proc_fdunlock(p);
1436
1437                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1438                         goto outdrop;
1439                 }
1440
1441                 /* Discard invalid offsets or lengths */
1442                 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1443                         error = EINVAL;
1444                         goto outdrop;
1445                 }
1446
1447                 /*
1448                  * Round the file offset down to a page-size boundary (or to 0).
1449                  * The filesystem will need to round the length up to the end of the page boundary
1450                  * or to the EOF of the file.
1451                  */
1452                 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1453                 uint64_t foff_delta = args.fsr_offset - foff;
1454                 args.fsr_offset = (off_t) foff;
1455
1456                 /*
1457                  * Now add in the delta to the supplied length. Since we may have adjusted the
1458                  * offset, increase it by the amount that we adjusted.
1459                  */
1460                 args.fsr_length += foff_delta;
1461
1462                 if ((error = vnode_getwithref(vp))) {
1463                         goto outdrop;
1464                 }
1465                 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1466                 (void)vnode_put(vp);
1467
1468                 goto outdrop;
1469         }
1470         case F_SETSIZE:
1471                 if (fp->f_type != DTYPE_VNODE) {
1472                         error = EBADF;
1473                         goto out;
1474                 }
1475                 vp = (struct vnode *)fp->f_data;
1476                 proc_fdunlock(p);
1477
1478                 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1479                 if (error) {
1480                         goto outdrop;
1481                 }
1482                 AUDIT_ARG(value64, offset);
1483
1484                 error = vnode_getwithref(vp);
1485                 if (error) {
1486                         goto outdrop;
1487                 }
1488
1489 #if CONFIG_MACF
1490                 error = mac_vnode_check_truncate(&context,
1491                     fp->f_fglob->fg_cred, vp);
1492                 if (error) {
1493                         (void)vnode_put(vp);
1494                         goto outdrop;
1495                 }
1496 #endif
1497                 /*
1498                  * Make sure that we are root.  Growing a file
1499                  * without zero filling the data is a security hole
1500                  * root would have access anyway so we'll allow it
1501                  */
1502                 if (!kauth_cred_issuser(kauth_cred_get())) {
1503                         error = EACCES;
1504                 } else {
1505                         /*
1506                          * set the file size
1507                          */
1508                         error = vnode_setsize(vp, offset, IO_NOZEROFILL,
1509                             &context);
1510
1511 #if CONFIG_MACF
1512                         if (error == 0) {
1513                                 mac_vnode_notify_truncate(&context, fp->f_fglob->fg_cred, vp);
1514                         }
1515 #endif
1516                 }
1517
1518                 (void)vnode_put(vp);
1519                 goto outdrop;
1520
1521         case F_RDAHEAD:
1522                 if (fp->f_type != DTYPE_VNODE) {
1523                         error = EBADF;
1524                         goto out;
1525                 }
1526                 if (uap->arg) {
1527                         fp->f_fglob->fg_flag &= ~FNORDAHEAD;
1528                 } else {
1529                         fp->f_fglob->fg_flag |= FNORDAHEAD;
1530                 }
1531
1532                 goto out;
1533
1534         case F_NOCACHE:
1535                 if (fp->f_type != DTYPE_VNODE) {
1536                         error = EBADF;
1537                         goto out;
1538                 }
1539                 if (uap->arg) {
1540                         fp->f_fglob->fg_flag |= FNOCACHE;
1541                 } else {
1542                         fp->f_fglob->fg_flag &= ~FNOCACHE;
1543                 }
1544
1545                 goto out;
1546
1547         case F_NODIRECT:
1548                 if (fp->f_type != DTYPE_VNODE) {
1549                         error = EBADF;
1550                         goto out;
1551                 }
1552                 if (uap->arg) {
1553                         fp->f_fglob->fg_flag |= FNODIRECT;
1554                 } else {
1555                         fp->f_fglob->fg_flag &= ~FNODIRECT;
1556                 }
1557
1558                 goto out;
1559
1560         case F_SINGLE_WRITER:
1561                 if (fp->f_type != DTYPE_VNODE) {
1562                         error = EBADF;
1563                         goto out;
1564                 }
1565                 if (uap->arg) {
1566                         fp->f_fglob->fg_flag |= FSINGLE_WRITER;
1567                 } else {
1568                         fp->f_fglob->fg_flag &= ~FSINGLE_WRITER;
1569                 }
1570
1571                 goto out;
1572
1573         case F_GLOBAL_NOCACHE:
1574                 if (fp->f_type != DTYPE_VNODE) {
1575                         error = EBADF;
1576                         goto out;
1577                 }
1578                 vp = (struct vnode *)fp->f_data;
1579                 proc_fdunlock(p);
1580
1581                 if ((error = vnode_getwithref(vp)) == 0) {
1582                         *retval = vnode_isnocache(vp);
1583
1584                         if (uap->arg) {
1585                                 vnode_setnocache(vp);
1586                         } else {
1587                                 vnode_clearnocache(vp);
1588                         }
1589
1590                         (void)vnode_put(vp);
1591                 }
1592                 goto outdrop;
1593
1594         case F_CHECK_OPENEVT:
1595                 if (fp->f_type != DTYPE_VNODE) {
1596                         error = EBADF;
1597                         goto out;
1598                 }
1599                 vp = (struct vnode *)fp->f_data;
1600                 proc_fdunlock(p);
1601
1602                 if ((error = vnode_getwithref(vp)) == 0) {
1603                         *retval = vnode_is_openevt(vp);
1604
1605                         if (uap->arg) {
1606                                 vnode_set_openevt(vp);
1607                         } else {
1608                                 vnode_clear_openevt(vp);
1609                         }
1610
1611                         (void)vnode_put(vp);
1612                 }
1613                 goto outdrop;
1614
1615         case F_RDADVISE: {
1616                 struct radvisory ra_struct;
1617
1618                 if (fp->f_type != DTYPE_VNODE) {
1619                         error = EBADF;
1620                         goto out;
1621                 }
1622                 vp = (struct vnode *)fp->f_data;
1623                 proc_fdunlock(p);
1624
1625                 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1626                         goto outdrop;
1627                 }
1628                 if ((error = vnode_getwithref(vp)) == 0) {
1629                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1630
1631                         (void)vnode_put(vp);
1632                 }
1633                 goto outdrop;
1634         }
1635
1636         case F_FLUSH_DATA:
1637
1638                 if (fp->f_type != DTYPE_VNODE) {
1639                         error = EBADF;
1640                         goto out;
1641                 }
1642                 vp = (struct vnode *)fp->f_data;
1643                 proc_fdunlock(p);
1644
1645                 if ((error = vnode_getwithref(vp)) == 0) {
1646                         error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1647
1648                         (void)vnode_put(vp);
1649                 }
1650                 goto outdrop;
1651
1652         case F_LOG2PHYS:
1653         case F_LOG2PHYS_EXT: {
1654                 struct log2phys l2p_struct = {};    /* structure for allocate command */
1655                 int devBlockSize;
1656
1657                 off_t file_offset = 0;
1658                 size_t a_size = 0;
1659                 size_t run = 0;
1660
1661                 if (uap->cmd == F_LOG2PHYS_EXT) {
1662                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1663                         if (error) {
1664                                 goto out;
1665                         }
1666                         file_offset = l2p_struct.l2p_devoffset;
1667                 } else {
1668                         file_offset = fp->f_offset;
1669                 }
1670                 if (fp->f_type != DTYPE_VNODE) {
1671                         error = EBADF;
1672                         goto out;
1673                 }
1674                 vp = (struct vnode *)fp->f_data;
1675                 proc_fdunlock(p);
1676                 if ((error = vnode_getwithref(vp))) {
1677                         goto outdrop;
1678                 }
1679                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1680                 if (error) {
1681                         (void)vnode_put(vp);
1682                         goto outdrop;
1683                 }
1684                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1685                 if (error) {
1686                         (void)vnode_put(vp);
1687                         goto outdrop;
1688                 }
1689                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1690                 if (uap->cmd == F_LOG2PHYS_EXT) {
1691                         if (l2p_struct.l2p_contigbytes < 0) {
1692                                 vnode_put(vp);
1693                                 error = EINVAL;
1694                                 goto outdrop;
1695                         }
1696
1697                         a_size = MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1698                 } else {
1699                         a_size = devBlockSize;
1700                 }
1701
1702                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1703
1704                 (void)vnode_put(vp);
1705
1706                 if (!error) {
1707                         l2p_struct.l2p_flags = 0;       /* for now */
1708                         if (uap->cmd == F_LOG2PHYS_EXT) {
1709                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1710                         } else {
1711                                 l2p_struct.l2p_contigbytes = 0; /* for now */
1712                         }
1713
1714                         /*
1715                          * The block number being -1 suggests that the file offset is not backed
1716                          * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
1717                          */
1718                         if (bn == -1) {
1719                                 /* Don't multiply it by the block size */
1720                                 l2p_struct.l2p_devoffset = bn;
1721                         } else {
1722                                 l2p_struct.l2p_devoffset = bn * devBlockSize;
1723                                 l2p_struct.l2p_devoffset += file_offset - offset;
1724                         }
1725                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1726                 }
1727                 goto outdrop;
1728         }
1729         case F_GETPATH:
1730         case F_GETPATH_NOFIRMLINK: {
1731                 char *pathbufp;
1732                 int pathlen;
1733
1734                 if (fp->f_type != DTYPE_VNODE) {
1735                         error = EBADF;
1736                         goto out;
1737                 }
1738                 vp = (struct vnode *)fp->f_data;
1739                 proc_fdunlock(p);
1740
1741                 pathlen = MAXPATHLEN;
1742                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1743                 if (pathbufp == NULL) {
1744                         error = ENOMEM;
1745                         goto outdrop;
1746                 }
1747                 if ((error = vnode_getwithref(vp)) == 0) {
1748                         if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1749                                 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1750                         } else {
1751                                 error = vn_getpath(vp, pathbufp, &pathlen);
1752                         }
1753                         (void)vnode_put(vp);
1754
1755                         if (error == 0) {
1756                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
1757                         }
1758                 }
1759                 FREE(pathbufp, M_TEMP);
1760                 goto outdrop;
1761         }
1762
1763         case F_PATHPKG_CHECK: {
1764                 char *pathbufp;
1765                 size_t pathlen;
1766
1767                 if (fp->f_type != DTYPE_VNODE) {
1768                         error = EBADF;
1769                         goto out;
1770                 }
1771                 vp = (struct vnode *)fp->f_data;
1772                 proc_fdunlock(p);
1773
1774                 pathlen = MAXPATHLEN;
1775                 pathbufp = kalloc(MAXPATHLEN);
1776
1777                 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1778                         if ((error = vnode_getwithref(vp)) == 0) {
1779                                 AUDIT_ARG(text, pathbufp);
1780                                 error = vn_path_package_check(vp, pathbufp, pathlen, retval);
1781
1782                                 (void)vnode_put(vp);
1783                         }
1784                 }
1785                 kfree(pathbufp, MAXPATHLEN);
1786                 goto outdrop;
1787         }
1788
1789         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1790         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
1791         case F_BARRIERFSYNC:  // fsync + barrier
1792         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1793         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1794                 if (fp->f_type != DTYPE_VNODE) {
1795                         error = EBADF;
1796                         goto out;
1797                 }
1798                 vp = (struct vnode *)fp->f_data;
1799                 proc_fdunlock(p);
1800
1801                 if ((error = vnode_getwithref(vp)) == 0) {
1802                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1803
1804                         (void)vnode_put(vp);
1805                 }
1806                 break;
1807         }
1808
1809         /*
1810          * SPI (private) for opening a file starting from a dir fd
1811          */
1812         case F_OPENFROM: {
1813                 struct user_fopenfrom fopen;
1814                 struct vnode_attr va;
1815                 struct nameidata nd;
1816                 int cmode;
1817
1818                 /* Check if this isn't a valid file descriptor */
1819                 if ((fp->f_type != DTYPE_VNODE) ||
1820                     (fp->f_flag & FREAD) == 0) {
1821                         error = EBADF;
1822                         goto out;
1823                 }
1824                 vp = (struct vnode *)fp->f_data;
1825                 proc_fdunlock(p);
1826
1827                 if (vnode_getwithref(vp)) {
1828                         error = ENOENT;
1829                         goto outdrop;
1830                 }
1831
1832                 /* Only valid for directories */
1833                 if (vp->v_type != VDIR) {
1834                         vnode_put(vp);
1835                         error = ENOTDIR;
1836                         goto outdrop;
1837                 }
1838
1839                 /* Get flags, mode and pathname arguments. */
1840                 if (IS_64BIT_PROCESS(p)) {
1841                         error = copyin(argp, &fopen, sizeof(fopen));
1842                 } else {
1843                         struct user32_fopenfrom fopen32;
1844
1845                         error = copyin(argp, &fopen32, sizeof(fopen32));
1846                         fopen.o_flags = fopen32.o_flags;
1847                         fopen.o_mode = fopen32.o_mode;
1848                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
1849                 }
1850                 if (error) {
1851                         vnode_put(vp);
1852                         goto outdrop;
1853                 }
1854                 AUDIT_ARG(fflags, fopen.o_flags);
1855                 AUDIT_ARG(mode, fopen.o_mode);
1856                 VATTR_INIT(&va);
1857                 /* Mask off all but regular access permissions */
1858                 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1859                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
1860
1861                 /* Start the lookup relative to the file descriptor's vnode. */
1862                 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
1863                     fopen.o_pathname, &context);
1864                 nd.ni_dvp = vp;
1865
1866                 error = open1(&context, &nd, fopen.o_flags, &va,
1867                     fileproc_alloc_init, NULL, retval);
1868
1869                 vnode_put(vp);
1870                 break;
1871         }
1872         /*
1873          * SPI (private) for unlinking a file starting from a dir fd
1874          */
1875         case F_UNLINKFROM: {
1876                 user_addr_t pathname;
1877
1878                 /* Check if this isn't a valid file descriptor */
1879                 if ((fp->f_type != DTYPE_VNODE) ||
1880                     (fp->f_flag & FREAD) == 0) {
1881                         error = EBADF;
1882                         goto out;
1883                 }
1884                 vp = (struct vnode *)fp->f_data;
1885                 proc_fdunlock(p);
1886
1887                 if (vnode_getwithref(vp)) {
1888                         error = ENOENT;
1889                         goto outdrop;
1890                 }
1891
1892                 /* Only valid for directories */
1893                 if (vp->v_type != VDIR) {
1894                         vnode_put(vp);
1895                         error = ENOTDIR;
1896                         goto outdrop;
1897                 }
1898
1899                 /* Get flags, mode and pathname arguments. */
1900                 if (IS_64BIT_PROCESS(p)) {
1901                         pathname = (user_addr_t)argp;
1902                 } else {
1903                         pathname = CAST_USER_ADDR_T(argp);
1904                 }
1905
1906                 /* Start the lookup relative to the file descriptor's vnode. */
1907                 error = unlink1(&context, vp, pathname, UIO_USERSPACE, 0);
1908
1909                 vnode_put(vp);
1910                 break;
1911         }
1912
1913         case F_ADDSIGS:
1914         case F_ADDFILESIGS:
1915         case F_ADDFILESIGS_FOR_DYLD_SIM:
1916         case F_ADDFILESIGS_RETURN:
1917         {
1918                 struct cs_blob *blob = NULL;
1919                 struct user_fsignatures fs;
1920                 kern_return_t kr;
1921                 vm_offset_t kernel_blob_addr;
1922                 vm_size_t kernel_blob_size;
1923                 int blob_add_flags = 0;
1924
1925                 if (fp->f_type != DTYPE_VNODE) {
1926                         error = EBADF;
1927                         goto out;
1928                 }
1929                 vp = (struct vnode *)fp->f_data;
1930                 proc_fdunlock(p);
1931
1932                 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1933                         blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
1934                         if ((p->p_csflags & CS_KILL) == 0) {
1935                                 proc_lock(p);
1936                                 p->p_csflags |= CS_KILL;
1937                                 proc_unlock(p);
1938                         }
1939                 }
1940
1941                 error = vnode_getwithref(vp);
1942                 if (error) {
1943                         goto outdrop;
1944                 }
1945
1946                 if (IS_64BIT_PROCESS(p)) {
1947                         error = copyin(argp, &fs, sizeof(fs));
1948                 } else {
1949                         struct user32_fsignatures fs32;
1950
1951                         error = copyin(argp, &fs32, sizeof(fs32));
1952                         fs.fs_file_start = fs32.fs_file_start;
1953                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
1954                         fs.fs_blob_size = fs32.fs_blob_size;
1955                 }
1956
1957                 if (error) {
1958                         vnode_put(vp);
1959                         goto outdrop;
1960                 }
1961
1962                 /*
1963                  * First check if we have something loaded a this offset
1964                  */
1965                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start);
1966                 if (blob != NULL) {
1967                         /* If this is for dyld_sim revalidate the blob */
1968                         if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
1969                                 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags);
1970                                 if (error) {
1971                                         blob = NULL;
1972                                         if (error != EAGAIN) {
1973                                                 vnode_put(vp);
1974                                                 goto outdrop;
1975                                         }
1976                                 }
1977                         }
1978                 }
1979
1980                 if (blob == NULL) {
1981                         /*
1982                          * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
1983                          * our use cases for the immediate future, but note that at the time of this commit, some
1984                          * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
1985                          *
1986                          * We should consider how we can manage this more effectively; the above means that some
1987                          * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
1988                          * threshold considered ridiculous at the time of this change.
1989                          */
1990 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
1991                         if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
1992                                 error = E2BIG;
1993                                 vnode_put(vp);
1994                                 goto outdrop;
1995                         }
1996
1997                         kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
1998                         kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
1999                         if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2000                                 error = ENOMEM;
2001                                 vnode_put(vp);
2002                                 goto outdrop;
2003                         }
2004
2005                         if (uap->cmd == F_ADDSIGS) {
2006                                 error = copyin(fs.fs_blob_start,
2007                                     (void *) kernel_blob_addr,
2008                                     fs.fs_blob_size);
2009                         } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM */
2010                                 int resid;
2011
2012                                 error = vn_rdwr(UIO_READ,
2013                                     vp,
2014                                     (caddr_t) kernel_blob_addr,
2015                                     kernel_blob_size,
2016                                     fs.fs_file_start + fs.fs_blob_start,
2017                                     UIO_SYSSPACE,
2018                                     0,
2019                                     kauth_cred_get(),
2020                                     &resid,
2021                                     p);
2022                                 if ((error == 0) && resid) {
2023                                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2024                                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2025                                 }
2026                         }
2027
2028                         if (error) {
2029                                 ubc_cs_blob_deallocate(kernel_blob_addr,
2030                                     kernel_blob_size);
2031                                 vnode_put(vp);
2032                                 goto outdrop;
2033                         }
2034
2035                         blob = NULL;
2036                         error = ubc_cs_blob_add(vp,
2037                             CPU_TYPE_ANY,                       /* not for a specific architecture */
2038                             fs.fs_file_start,
2039                             &kernel_blob_addr,
2040                             kernel_blob_size,
2041                             NULL,
2042                             blob_add_flags,
2043                             &blob);
2044
2045                         /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2046                         if (error) {
2047                                 if (kernel_blob_addr) {
2048                                         ubc_cs_blob_deallocate(kernel_blob_addr,
2049                                             kernel_blob_size);
2050                                 }
2051                                 vnode_put(vp);
2052                                 goto outdrop;
2053                         } else {
2054 #if CHECK_CS_VALIDATION_BITMAP
2055                                 ubc_cs_validation_bitmap_allocate( vp );
2056 #endif
2057                         }
2058                 }
2059
2060                 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2061                         /*
2062                          * The first element of the structure is a
2063                          * off_t that happen to have the same size for
2064                          * all archs. Lets overwrite that.
2065                          */
2066                         off_t end_offset = 0;
2067                         if (blob) {
2068                                 end_offset = blob->csb_end_offset;
2069                         }
2070                         error = copyout(&end_offset, argp, sizeof(end_offset));
2071                 }
2072
2073                 (void) vnode_put(vp);
2074                 break;
2075         }
2076         case F_GETCODEDIR:
2077         case F_FINDSIGS: {
2078                 error = ENOTSUP;
2079                 goto out;
2080         }
2081         case F_CHECK_LV: {
2082                 struct fileglob *fg;
2083                 fchecklv_t lv = {};
2084
2085                 if (fp->f_type != DTYPE_VNODE) {
2086                         error = EBADF;
2087                         goto out;
2088                 }
2089                 fg = fp->f_fglob;
2090                 proc_fdunlock(p);
2091
2092                 if (IS_64BIT_PROCESS(p)) {
2093                         error = copyin(argp, &lv, sizeof(lv));
2094                 } else {
2095                         struct user32_fchecklv lv32 = {};
2096
2097                         error = copyin(argp, &lv32, sizeof(lv32));
2098                         lv.lv_file_start = lv32.lv_file_start;
2099                         lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2100                         lv.lv_error_message_size = lv32.lv_error_message_size;
2101                 }
2102                 if (error) {
2103                         goto outdrop;
2104                 }
2105
2106 #if CONFIG_MACF
2107                 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2108                     (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2109 #endif
2110
2111                 break;
2112         }
2113 #if CONFIG_PROTECT
2114         case F_GETPROTECTIONCLASS: {
2115                 if (fp->f_type != DTYPE_VNODE) {
2116                         error = EBADF;
2117                         goto out;
2118                 }
2119                 vp = (struct vnode *)fp->f_data;
2120
2121                 proc_fdunlock(p);
2122
2123                 if (vnode_getwithref(vp)) {
2124                         error = ENOENT;
2125                         goto outdrop;
2126                 }
2127
2128                 struct vnode_attr va;
2129
2130                 VATTR_INIT(&va);
2131                 VATTR_WANTED(&va, va_dataprotect_class);
2132                 error = VNOP_GETATTR(vp, &va, &context);
2133                 if (!error) {
2134                         if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2135                                 *retval = va.va_dataprotect_class;
2136                         } else {
2137                                 error = ENOTSUP;
2138                         }
2139                 }
2140
2141                 vnode_put(vp);
2142                 break;
2143         }
2144
2145         case F_SETPROTECTIONCLASS: {
2146                 /* tmp must be a valid PROTECTION_CLASS_* */
2147                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2148
2149                 if (fp->f_type != DTYPE_VNODE) {
2150                         error = EBADF;
2151                         goto out;
2152                 }
2153                 vp = (struct vnode *)fp->f_data;
2154
2155                 proc_fdunlock(p);
2156
2157                 if (vnode_getwithref(vp)) {
2158                         error = ENOENT;
2159                         goto outdrop;
2160                 }
2161
2162                 /* Only go forward if you have write access */
2163                 vfs_context_t ctx = vfs_context_current();
2164                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2165                         vnode_put(vp);
2166                         error = EBADF;
2167                         goto outdrop;
2168                 }
2169
2170                 struct vnode_attr va;
2171
2172                 VATTR_INIT(&va);
2173                 VATTR_SET(&va, va_dataprotect_class, tmp);
2174
2175                 error = VNOP_SETATTR(vp, &va, ctx);
2176
2177                 vnode_put(vp);
2178                 break;
2179         }
2180
2181         case F_TRANSCODEKEY: {
2182                 if (fp->f_type != DTYPE_VNODE) {
2183                         error = EBADF;
2184                         goto out;
2185                 }
2186
2187                 vp = (struct vnode *)fp->f_data;
2188                 proc_fdunlock(p);
2189
2190                 if (vnode_getwithref(vp)) {
2191                         error = ENOENT;
2192                         goto outdrop;
2193                 }
2194
2195                 cp_key_t k = {
2196                         .len = CP_MAX_WRAPPEDKEYSIZE,
2197                 };
2198
2199                 MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
2200
2201                 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2202
2203                 vnode_put(vp);
2204
2205                 if (error == 0) {
2206                         error = copyout(k.key, argp, k.len);
2207                         *retval = k.len;
2208                 }
2209
2210                 FREE(k.key, M_TEMP);
2211
2212                 break;
2213         }
2214
2215         case F_GETPROTECTIONLEVEL:  {
2216                 if (fp->f_type != DTYPE_VNODE) {
2217                         error = EBADF;
2218                         goto out;
2219                 }
2220
2221                 vp = (struct vnode*) fp->f_data;
2222                 proc_fdunlock(p);
2223
2224                 if (vnode_getwithref(vp)) {
2225                         error = ENOENT;
2226                         goto outdrop;
2227                 }
2228
2229                 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2230
2231                 vnode_put(vp);
2232                 break;
2233         }
2234
2235         case F_GETDEFAULTPROTLEVEL:  {
2236                 if (fp->f_type != DTYPE_VNODE) {
2237                         error = EBADF;
2238                         goto out;
2239                 }
2240
2241                 vp = (struct vnode*) fp->f_data;
2242                 proc_fdunlock(p);
2243
2244                 if (vnode_getwithref(vp)) {
2245                         error = ENOENT;
2246                         goto outdrop;
2247                 }
2248
2249                 /*
2250                  * if cp_get_major_vers fails, error will be set to proper errno
2251                  * and cp_version will still be 0.
2252                  */
2253
2254                 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2255
2256                 vnode_put(vp);
2257                 break;
2258         }
2259
2260 #endif /* CONFIG_PROTECT */
2261
2262         case F_MOVEDATAEXTENTS: {
2263                 struct fileproc *fp2 = NULL;
2264                 struct vnode *src_vp = NULLVP;
2265                 struct vnode *dst_vp = NULLVP;
2266                 /* We need to grab the 2nd FD out of the argments before moving on. */
2267                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2268
2269                 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2270                 if (error) {
2271                         goto out;
2272                 }
2273
2274                 if (fp->f_type != DTYPE_VNODE) {
2275                         error = EBADF;
2276                         goto out;
2277                 }
2278
2279                 /*
2280                  * For now, special case HFS+ and APFS only, since this
2281                  * is SPI.
2282                  */
2283                 src_vp = (struct vnode *)fp->f_data;
2284                 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2285                         error = ENOTSUP;
2286                         goto out;
2287                 }
2288
2289                 /*
2290                  * Get the references before we start acquiring iocounts on the vnodes,
2291                  * while we still hold the proc fd lock
2292                  */
2293                 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2294                         error = EBADF;
2295                         goto out;
2296                 }
2297                 if (fp2->f_type != DTYPE_VNODE) {
2298                         fp_drop(p, fd2, fp2, 1);
2299                         error = EBADF;
2300                         goto out;
2301                 }
2302                 dst_vp = (struct vnode *)fp2->f_data;
2303                 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2304                         fp_drop(p, fd2, fp2, 1);
2305                         error = ENOTSUP;
2306                         goto out;
2307                 }
2308
2309 #if CONFIG_MACF
2310                 /* Re-do MAC checks against the new FD, pass in a fake argument */
2311                 error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
2312                 if (error) {
2313                         fp_drop(p, fd2, fp2, 1);
2314                         goto out;
2315                 }
2316 #endif
2317                 /* Audit the 2nd FD */
2318                 AUDIT_ARG(fd, fd2);
2319
2320                 proc_fdunlock(p);
2321
2322                 if (vnode_getwithref(src_vp)) {
2323                         fp_drop(p, fd2, fp2, 0);
2324                         error = ENOENT;
2325                         goto outdrop;
2326                 }
2327                 if (vnode_getwithref(dst_vp)) {
2328                         vnode_put(src_vp);
2329                         fp_drop(p, fd2, fp2, 0);
2330                         error = ENOENT;
2331                         goto outdrop;
2332                 }
2333
2334                 /*
2335                  * Basic asserts; validate they are not the same and that
2336                  * both live on the same filesystem.
2337                  */
2338                 if (dst_vp == src_vp) {
2339                         vnode_put(src_vp);
2340                         vnode_put(dst_vp);
2341                         fp_drop(p, fd2, fp2, 0);
2342                         error = EINVAL;
2343                         goto outdrop;
2344                 }
2345
2346                 if (dst_vp->v_mount != src_vp->v_mount) {
2347                         vnode_put(src_vp);
2348                         vnode_put(dst_vp);
2349                         fp_drop(p, fd2, fp2, 0);
2350                         error = EXDEV;
2351                         goto outdrop;
2352                 }
2353
2354                 /* Now we have a legit pair of FDs.  Go to work */
2355
2356                 /* Now check for write access to the target files */
2357                 if (vnode_authorize(src_vp, NULLVP,
2358                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2359                         vnode_put(src_vp);
2360                         vnode_put(dst_vp);
2361                         fp_drop(p, fd2, fp2, 0);
2362                         error = EBADF;
2363                         goto outdrop;
2364                 }
2365
2366                 if (vnode_authorize(dst_vp, NULLVP,
2367                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2368                         vnode_put(src_vp);
2369                         vnode_put(dst_vp);
2370                         fp_drop(p, fd2, fp2, 0);
2371                         error = EBADF;
2372                         goto outdrop;
2373                 }
2374
2375                 /* Verify that both vps point to files and not directories */
2376                 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2377                         error = EINVAL;
2378                         vnode_put(src_vp);
2379                         vnode_put(dst_vp);
2380                         fp_drop(p, fd2, fp2, 0);
2381                         goto outdrop;
2382                 }
2383
2384                 /*
2385                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2386                  * We'll pass in our special bit indicating that the new behavior is expected
2387                  */
2388
2389                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2390
2391                 vnode_put(src_vp);
2392                 vnode_put(dst_vp);
2393                 fp_drop(p, fd2, fp2, 0);
2394                 break;
2395         }
2396
2397         /*
2398          * SPI for making a file compressed.
2399          */
2400         case F_MAKECOMPRESSED: {
2401                 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2402
2403                 if (fp->f_type != DTYPE_VNODE) {
2404                         error = EBADF;
2405                         goto out;
2406                 }
2407
2408                 vp = (struct vnode*) fp->f_data;
2409                 proc_fdunlock(p);
2410
2411                 /* get the vnode */
2412                 if (vnode_getwithref(vp)) {
2413                         error = ENOENT;
2414                         goto outdrop;
2415                 }
2416
2417                 /* Is it a file? */
2418                 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2419                         vnode_put(vp);
2420                         error = EBADF;
2421                         goto outdrop;
2422                 }
2423
2424                 /* invoke ioctl to pass off to FS */
2425                 /* Only go forward if you have write access */
2426                 vfs_context_t ctx = vfs_context_current();
2427                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2428                         vnode_put(vp);
2429                         error = EBADF;
2430                         goto outdrop;
2431                 }
2432
2433                 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2434
2435                 vnode_put(vp);
2436                 break;
2437         }
2438
2439         /*
2440          * SPI (private) for indicating to a filesystem that subsequent writes to
2441          * the open FD will written to the Fastflow.
2442          */
2443         case F_SET_GREEDY_MODE:
2444         /* intentionally drop through to the same handler as F_SETSTATIC.
2445          * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2446          */
2447
2448         /*
2449          * SPI (private) for indicating to a filesystem that subsequent writes to
2450          * the open FD will represent static content.
2451          */
2452         case F_SETSTATICCONTENT: {
2453                 caddr_t ioctl_arg = NULL;
2454
2455                 if (uap->arg) {
2456                         ioctl_arg = (caddr_t) 1;
2457                 }
2458
2459                 if (fp->f_type != DTYPE_VNODE) {
2460                         error = EBADF;
2461                         goto out;
2462                 }
2463                 vp = (struct vnode *)fp->f_data;
2464                 proc_fdunlock(p);
2465
2466                 error = vnode_getwithref(vp);
2467                 if (error) {
2468                         error = ENOENT;
2469                         goto outdrop;
2470                 }
2471
2472                 /* Only go forward if you have write access */
2473                 vfs_context_t ctx = vfs_context_current();
2474                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2475                         vnode_put(vp);
2476                         error = EBADF;
2477                         goto outdrop;
2478                 }
2479
2480                 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2481                 (void)vnode_put(vp);
2482
2483                 break;
2484         }
2485
2486         /*
2487          * SPI (private) for indicating to the lower level storage driver that the
2488          * subsequent writes should be of a particular IO type (burst, greedy, static),
2489          * or other flavors that may be necessary.
2490          */
2491         case F_SETIOTYPE: {
2492                 caddr_t param_ptr;
2493                 uint32_t param;
2494
2495                 if (uap->arg) {
2496                         /* extract 32 bits of flags from userland */
2497                         param_ptr = (caddr_t) uap->arg;
2498                         param = (uint32_t) param_ptr;
2499                 } else {
2500                         /* If no argument is specified, error out */
2501                         error = EINVAL;
2502                         goto out;
2503                 }
2504
2505                 /*
2506                  * Validate the different types of flags that can be specified:
2507                  * all of them are mutually exclusive for now.
2508                  */
2509                 switch (param) {
2510                 case F_IOTYPE_ISOCHRONOUS:
2511                         break;
2512
2513                 default:
2514                         error = EINVAL;
2515                         goto out;
2516                 }
2517
2518
2519                 if (fp->f_type != DTYPE_VNODE) {
2520                         error = EBADF;
2521                         goto out;
2522                 }
2523                 vp = (struct vnode *)fp->f_data;
2524                 proc_fdunlock(p);
2525
2526                 error = vnode_getwithref(vp);
2527                 if (error) {
2528                         error = ENOENT;
2529                         goto outdrop;
2530                 }
2531
2532                 /* Only go forward if you have write access */
2533                 vfs_context_t ctx = vfs_context_current();
2534                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2535                         vnode_put(vp);
2536                         error = EBADF;
2537                         goto outdrop;
2538                 }
2539
2540                 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2541                 (void)vnode_put(vp);
2542
2543                 break;
2544         }
2545
2546         /*
2547          * Set the vnode pointed to by 'fd'
2548          * and tag it as the (potentially future) backing store
2549          * for another filesystem
2550          */
2551         case F_SETBACKINGSTORE: {
2552                 if (fp->f_type != DTYPE_VNODE) {
2553                         error = EBADF;
2554                         goto out;
2555                 }
2556
2557                 vp = (struct vnode *)fp->f_data;
2558
2559                 if (vp->v_tag != VT_HFS) {
2560                         error = EINVAL;
2561                         goto out;
2562                 }
2563                 proc_fdunlock(p);
2564
2565                 if (vnode_getwithref(vp)) {
2566                         error = ENOENT;
2567                         goto outdrop;
2568                 }
2569
2570                 /* only proceed if you have write access */
2571                 vfs_context_t ctx = vfs_context_current();
2572                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2573                         vnode_put(vp);
2574                         error = EBADF;
2575                         goto outdrop;
2576                 }
2577
2578
2579                 /* If arg != 0, set, otherwise unset */
2580                 if (uap->arg) {
2581                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2582                 } else {
2583                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
2584                 }
2585
2586                 vnode_put(vp);
2587                 break;
2588         }
2589
2590         /*
2591          * like F_GETPATH, but special semantics for
2592          * the mobile time machine handler.
2593          */
2594         case F_GETPATH_MTMINFO: {
2595                 char *pathbufp;
2596                 int pathlen;
2597
2598                 if (fp->f_type != DTYPE_VNODE) {
2599                         error = EBADF;
2600                         goto out;
2601                 }
2602                 vp = (struct vnode *)fp->f_data;
2603                 proc_fdunlock(p);
2604
2605                 pathlen = MAXPATHLEN;
2606                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
2607                 if (pathbufp == NULL) {
2608                         error = ENOMEM;
2609                         goto outdrop;
2610                 }
2611                 if ((error = vnode_getwithref(vp)) == 0) {
2612                         int backingstore = 0;
2613
2614                         /* Check for error from vn_getpath before moving on */
2615                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
2616                                 if (vp->v_tag == VT_HFS) {
2617                                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
2618                                 }
2619                                 (void)vnode_put(vp);
2620
2621                                 if (error == 0) {
2622                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
2623                                 }
2624                                 if (error == 0) {
2625                                         /*
2626                                          * If the copyout was successful, now check to ensure
2627                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
2628                                          * wants the path regardless.
2629                                          */
2630                                         if (backingstore) {
2631                                                 error = EBUSY;
2632                                         }
2633                                 }
2634                         } else {
2635                                 (void)vnode_put(vp);
2636                         }
2637                 }
2638                 FREE(pathbufp, M_TEMP);
2639                 goto outdrop;
2640         }
2641
2642 #if DEBUG || DEVELOPMENT
2643         case F_RECYCLE:
2644                 if (fp->f_type != DTYPE_VNODE) {
2645                         error = EBADF;
2646                         goto out;
2647                 }
2648                 vp = (struct vnode *)fp->f_data;
2649                 proc_fdunlock(p);
2650
2651                 vnode_recycle(vp);
2652                 break;
2653 #endif
2654
2655         default:
2656                 /*
2657                  * This is an fcntl() that we d not recognize at this level;
2658                  * if this is a vnode, we send it down into the VNOP_IOCTL
2659                  * for this vnode; this can include special devices, and will
2660                  * effectively overload fcntl() to send ioctl()'s.
2661                  */
2662                 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
2663                         error = EINVAL;
2664                         goto out;
2665                 }
2666
2667                 /* Catch any now-invalid fcntl() selectors */
2668                 switch (uap->cmd) {
2669                 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
2670                 case (int)FSIOC_FIOSEEKHOLE:
2671                 case (int)FSIOC_FIOSEEKDATA:
2672                 case (int)FSIOC_CAS_BSDFLAGS:
2673                 case HFS_GET_BOOT_INFO:
2674                 case HFS_SET_BOOT_INFO:
2675                 case FIOPINSWAP:
2676                 case F_MARKDEPENDENCY:
2677                 case TIOCREVOKE:
2678                         error = EINVAL;
2679                         goto out;
2680                 default:
2681                         break;
2682                 }
2683
2684                 if (fp->f_type != DTYPE_VNODE) {
2685                         error = EBADF;
2686                         goto out;
2687                 }
2688                 vp = (struct vnode *)fp->f_data;
2689                 proc_fdunlock(p);
2690
2691                 if ((error = vnode_getwithref(vp)) == 0) {
2692 #define STK_PARAMS 128
2693                         char stkbuf[STK_PARAMS] = {0};
2694                         unsigned int size;
2695                         caddr_t data, memp;
2696                         /*
2697                          * For this to work properly, we have to copy in the
2698                          * ioctl() cmd argument if there is one; we must also
2699                          * check that a command parameter, if present, does
2700                          * not exceed the maximum command length dictated by
2701                          * the number of bits we have available in the command
2702                          * to represent a structure length.  Finally, we have
2703                          * to copy the results back out, if it is that type of
2704                          * ioctl().
2705                          */
2706                         size = IOCPARM_LEN(uap->cmd);
2707                         if (size > IOCPARM_MAX) {
2708                                 (void)vnode_put(vp);
2709                                 error = EINVAL;
2710                                 break;
2711                         }
2712
2713                         memp = NULL;
2714                         if (size > sizeof(stkbuf)) {
2715                                 if ((memp = (caddr_t)kalloc(size)) == 0) {
2716                                         (void)vnode_put(vp);
2717                                         error = ENOMEM;
2718                                         goto outdrop;
2719                                 }
2720                                 data = memp;
2721                         } else {
2722                                 data = &stkbuf[0];
2723                         }
2724
2725                         if (uap->cmd & IOC_IN) {
2726                                 if (size) {
2727                                         /* structure */
2728                                         error = copyin(argp, data, size);
2729                                         if (error) {
2730                                                 (void)vnode_put(vp);
2731                                                 if (memp) {
2732                                                         kfree(memp, size);
2733                                                 }
2734                                                 goto outdrop;
2735                                         }
2736
2737                                         /* Bzero the section beyond that which was needed */
2738                                         if (size <= sizeof(stkbuf)) {
2739                                                 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
2740                                         }
2741                                 } else {
2742                                         /* int */
2743                                         if (is64bit) {
2744                                                 *(user_addr_t *)data = argp;
2745                                         } else {
2746                                                 *(uint32_t *)data = (uint32_t)argp;
2747                                         }
2748                                 };
2749                         } else if ((uap->cmd & IOC_OUT) && size) {
2750                                 /*
2751                                  * Zero the buffer so the user always
2752                                  * gets back something deterministic.
2753                                  */
2754                                 bzero(data, size);
2755                         } else if (uap->cmd & IOC_VOID) {
2756                                 if (is64bit) {
2757                                         *(user_addr_t *)data = argp;
2758                                 } else {
2759                                         *(uint32_t *)data = (uint32_t)argp;
2760                                 }
2761                         }
2762
2763                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
2764
2765                         (void)vnode_put(vp);
2766
2767                         /* Copy any output data to user */
2768                         if (error == 0 && (uap->cmd & IOC_OUT) && size) {
2769                                 error = copyout(data, argp, size);
2770                         }
2771                         if (memp) {
2772                                 kfree(memp, size);
2773                         }
2774                 }
2775                 break;
2776         }
2777
2778 outdrop:
2779         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2780         fp_drop(p, fd, fp, 0);
2781         return error;
2782 out:
2783         fp_drop(p, fd, fp, 1);
2784         proc_fdunlock(p);
2785         return error;
2786 }
2787
2788
2789 /*
2790  * finishdup
2791  *
2792  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
2793  *
2794  * Parameters:  p                               Process performing the dup
2795  *              old                             The fd to dup
2796  *              new                             The fd to dup it to
2797  *              fd_flags                        Flags to augment the new fd
2798  *              retval                          Pointer to the call return area
2799  *
2800  * Returns:     0                               Success
2801  *              EBADF
2802  *              ENOMEM
2803  *
2804  * Implicit returns:
2805  *              *retval (modified)              The new descriptor
2806  *
2807  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
2808  *              the caller
2809  *
2810  * Notes:       This function may drop and reacquire this lock; it is unsafe
2811  *              for a caller to assume that other state protected by the lock
2812  *              has not been subsequently changed out from under it.
2813  */
2814 int
2815 finishdup(proc_t p,
2816     struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
2817 {
2818         struct fileproc *nfp;
2819         struct fileproc *ofp;
2820 #if CONFIG_MACF
2821         int error;
2822 #endif
2823
2824 #if DIAGNOSTIC
2825         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2826 #endif
2827         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
2828             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
2829                 fdrelse(p, new);
2830                 return EBADF;
2831         }
2832         fg_ref(ofp);
2833
2834 #if CONFIG_MACF
2835         error = mac_file_check_dup(proc_ucred(p), ofp->f_fglob, new);
2836         if (error) {
2837                 fg_drop(ofp);
2838                 fdrelse(p, new);
2839                 return error;
2840         }
2841 #endif
2842
2843         proc_fdunlock(p);
2844
2845         nfp = fileproc_alloc_init(NULL);
2846
2847         proc_fdlock(p);
2848
2849         if (nfp == NULL) {
2850                 fg_drop(ofp);
2851                 fdrelse(p, new);
2852                 return ENOMEM;
2853         }
2854
2855         nfp->f_fglob = ofp->f_fglob;
2856
2857 #if DIAGNOSTIC
2858         if (fdp->fd_ofiles[new] != 0) {
2859                 panic("finishdup: overwriting fd_ofiles with new %d", new);
2860         }
2861         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
2862                 panic("finishdup: unreserved fileflags with new %d", new);
2863         }
2864 #endif
2865
2866         if (new > fdp->fd_lastfile) {
2867                 fdp->fd_lastfile = new;
2868         }
2869         *fdflags(p, new) |= fd_flags;
2870         procfdtbl_releasefd(p, new, nfp);
2871         *retval = new;
2872         return 0;
2873 }
2874
2875
2876 /*
2877  * close
2878  *
2879  * Description: The implementation of the close(2) system call
2880  *
2881  * Parameters:  p                       Process in whose per process file table
2882  *                                      the close is to occur
2883  *              uap->fd                 fd to be closed
2884  *              retval                  <unused>
2885  *
2886  * Returns:     0                       Success
2887  *      fp_lookup:EBADF                 Bad file descriptor
2888  *      fp_guard_exception:???          Guarded file descriptor
2889  *      close_internal:EBADF
2890  *      close_internal:???              Anything returnable by a per-fileops
2891  *                                      close function
2892  */
2893 int
2894 close(proc_t p, struct close_args *uap, int32_t *retval)
2895 {
2896         __pthread_testcancel(1);
2897         return close_nocancel(p, (struct close_nocancel_args *)uap, retval);
2898 }
2899
2900
2901 int
2902 close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
2903 {
2904         struct fileproc *fp;
2905         int fd = uap->fd;
2906         int error;
2907
2908         AUDIT_SYSCLOSE(p, fd);
2909
2910         proc_fdlock(p);
2911
2912         if ((error = fp_lookup(p, fd, &fp, 1))) {
2913                 proc_fdunlock(p);
2914                 return error;
2915         }
2916
2917         if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
2918                 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
2919                 (void) fp_drop(p, fd, fp, 1);
2920                 proc_fdunlock(p);
2921                 return error;
2922         }
2923
2924         error = close_internal_locked(p, fd, fp, 0);
2925
2926         proc_fdunlock(p);
2927
2928         return error;
2929 }
2930
2931
2932 /*
2933  * close_internal_locked
2934  *
2935  * Close a file descriptor.
2936  *
2937  * Parameters:  p                       Process in whose per process file table
2938  *                                      the close is to occur
2939  *              fd                      fd to be closed
2940  *              fp                      fileproc associated with the fd
2941  *
2942  * Returns:     0                       Success
2943  *              EBADF                   fd already in close wait state
2944  *      closef_locked:???               Anything returnable by a per-fileops
2945  *                                      close function
2946  *
2947  * Locks:       Assumes proc_fdlock for process is held by the caller and returns
2948  *              with lock held
2949  *
2950  * Notes:       This function may drop and reacquire this lock; it is unsafe
2951  *              for a caller to assume that other state protected by the lock
2952  *              has not been subsequently changed out from under it.
2953  */
2954 int
2955 close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
2956 {
2957         struct filedesc *fdp = p->p_fd;
2958         int error = 0;
2959         int resvfd = flags & FD_DUP2RESV;
2960
2961
2962 #if DIAGNOSTIC
2963         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
2964 #endif
2965
2966         /* Keep people from using the filedesc while we are closing it */
2967         procfdtbl_markclosefd(p, fd);
2968
2969
2970         if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
2971                 panic("close_internal_locked: being called on already closing fd");
2972         }
2973
2974
2975 #if DIAGNOSTIC
2976         if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0) {
2977                 panic("close_internal: unreserved fileflags with fd %d", fd);
2978         }
2979 #endif
2980
2981         fp->f_flags |= FP_CLOSING;
2982
2983         if ((fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
2984                 proc_fdunlock(p);
2985
2986                 if ((fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
2987                         /*
2988                          * call out to allow 3rd party notification of close.
2989                          * Ignore result of kauth_authorize_fileop call.
2990                          */
2991                         if (vnode_getwithref((vnode_t)fp->f_data) == 0) {
2992                                 u_int   fileop_flags = 0;
2993                                 if ((fp->f_flags & FP_WRITTEN) != 0) {
2994                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
2995                                 }
2996                                 kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE,
2997                                     (uintptr_t)fp->f_data, (uintptr_t)fileop_flags);
2998                                 vnode_put((vnode_t)fp->f_data);
2999                         }
3000                 }
3001                 if (fp->f_flags & FP_AIOISSUED) {
3002                         /*
3003                          * cancel all async IO requests that can be cancelled.
3004                          */
3005                         _aio_close( p, fd );
3006                 }
3007
3008                 proc_fdlock(p);
3009         }
3010
3011         if (fd < fdp->fd_knlistsize) {
3012                 knote_fdclose(p, fd);
3013         }
3014
3015         /* release the ref returned from fp_lookup before calling drain */
3016         (void) os_ref_release_locked(&fp->f_iocount);
3017         fileproc_drain(p, fp);
3018
3019         if (fp->f_flags & FP_WAITEVENT) {
3020                 (void)waitevent_close(p, fp);
3021         }
3022
3023         if (resvfd == 0) {
3024                 _fdrelse(p, fd);
3025         } else {
3026                 procfdtbl_reservefd(p, fd);
3027         }
3028
3029         if (ENTR_SHOULDTRACE && fp->f_type == DTYPE_SOCKET) {
3030                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3031                     fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
3032         }
3033
3034         error = closef_locked(fp, fp->f_fglob, p);
3035         if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE) {
3036                 wakeup(&fp->f_flags);
3037         }
3038         fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING);
3039
3040         proc_fdunlock(p);
3041
3042         fileproc_free(fp);
3043
3044         proc_fdlock(p);
3045
3046 #if DIAGNOSTIC
3047         if (resvfd != 0) {
3048                 if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0) {
3049                         panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
3050                 }
3051         }
3052 #endif
3053
3054         return error;
3055 }
3056
3057
3058 /*
3059  * fstat1
3060  *
3061  * Description: Return status information about a file descriptor.
3062  *
3063  * Parameters:  p                               The process doing the fstat
3064  *              fd                              The fd to stat
3065  *              ub                              The user stat buffer
3066  *              xsecurity                       The user extended security
3067  *                                              buffer, or 0 if none
3068  *              xsecurity_size                  The size of xsecurity, or 0
3069  *                                              if no xsecurity
3070  *              isstat64                        Flag to indicate 64 bit version
3071  *                                              for inode size, etc.
3072  *
3073  * Returns:     0                               Success
3074  *              EBADF
3075  *              EFAULT
3076  *      fp_lookup:EBADF                         Bad file descriptor
3077  *      vnode_getwithref:???
3078  *      copyout:EFAULT
3079  *      vnode_getwithref:???
3080  *      vn_stat:???
3081  *      soo_stat:???
3082  *      pipe_stat:???
3083  *      pshm_stat:???
3084  *      kqueue_stat:???
3085  *
3086  * Notes:       Internal implementation for all other fstat() related
3087  *              functions
3088  *
3089  *              XXX switch on node type is bogus; need a stat in struct
3090  *              XXX fileops instead.
3091  */
3092 static int
3093 fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3094 {
3095         struct fileproc *fp;
3096         union {
3097                 struct stat sb;
3098                 struct stat64 sb64;
3099         } source;
3100         union {
3101                 struct user64_stat user64_sb;
3102                 struct user32_stat user32_sb;
3103                 struct user64_stat64 user64_sb64;
3104                 struct user32_stat64 user32_sb64;
3105         } dest;
3106         int error, my_size;
3107         file_type_t type;
3108         caddr_t data;
3109         kauth_filesec_t fsec;
3110         user_size_t xsecurity_bufsize;
3111         vfs_context_t ctx = vfs_context_current();
3112         void * sbptr;
3113
3114
3115         AUDIT_ARG(fd, fd);
3116
3117         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3118                 return error;
3119         }
3120         type = fp->f_type;
3121         data = fp->f_data;
3122         fsec = KAUTH_FILESEC_NONE;
3123
3124         sbptr = (void *)&source;
3125
3126         switch (type) {
3127         case DTYPE_VNODE:
3128                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3129                         /*
3130                          * If the caller has the file open, and is not
3131                          * requesting extended security information, we are
3132                          * going to let them get the basic stat information.
3133                          */
3134                         if (xsecurity == USER_ADDR_NULL) {
3135                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3136                                     fp->f_fglob->fg_cred);
3137                         } else {
3138                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3139                         }
3140
3141                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3142                         (void)vnode_put((vnode_t)data);
3143                 }
3144                 break;
3145
3146 #if SOCKETS
3147         case DTYPE_SOCKET:
3148                 error = soo_stat((struct socket *)data, sbptr, isstat64);
3149                 break;
3150 #endif /* SOCKETS */
3151
3152         case DTYPE_PIPE:
3153                 error = pipe_stat((void *)data, sbptr, isstat64);
3154                 break;
3155
3156         case DTYPE_PSXSHM:
3157                 error = pshm_stat((void *)data, sbptr, isstat64);
3158                 break;
3159
3160         case DTYPE_KQUEUE:
3161                 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3162                 break;
3163
3164         default:
3165                 error = EBADF;
3166                 goto out;
3167         }
3168         if (error == 0) {
3169                 caddr_t sbp;
3170
3171                 if (isstat64 != 0) {
3172                         source.sb64.st_lspare = 0;
3173                         source.sb64.st_qspare[0] = 0LL;
3174                         source.sb64.st_qspare[1] = 0LL;
3175
3176                         if (IS_64BIT_PROCESS(current_proc())) {
3177                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3178                                 my_size = sizeof(dest.user64_sb64);
3179                                 sbp = (caddr_t)&dest.user64_sb64;
3180                         } else {
3181                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3182                                 my_size = sizeof(dest.user32_sb64);
3183                                 sbp = (caddr_t)&dest.user32_sb64;
3184                         }
3185                 } else {
3186                         source.sb.st_lspare = 0;
3187                         source.sb.st_qspare[0] = 0LL;
3188                         source.sb.st_qspare[1] = 0LL;
3189                         if (IS_64BIT_PROCESS(current_proc())) {
3190                                 munge_user64_stat(&source.sb, &dest.user64_sb);
3191                                 my_size = sizeof(dest.user64_sb);
3192                                 sbp = (caddr_t)&dest.user64_sb;
3193                         } else {
3194                                 munge_user32_stat(&source.sb, &dest.user32_sb);
3195                                 my_size = sizeof(dest.user32_sb);
3196                                 sbp = (caddr_t)&dest.user32_sb;
3197                         }
3198                 }
3199
3200                 error = copyout(sbp, ub, my_size);
3201         }
3202
3203         /* caller wants extended security information? */
3204         if (xsecurity != USER_ADDR_NULL) {
3205                 /* did we get any? */
3206                 if (fsec == KAUTH_FILESEC_NONE) {
3207                         if (susize(xsecurity_size, 0) != 0) {
3208                                 error = EFAULT;
3209                                 goto out;
3210                         }
3211                 } else {
3212                         /* find the user buffer size */
3213                         xsecurity_bufsize = fusize(xsecurity_size);
3214
3215                         /* copy out the actual data size */
3216                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3217                                 error = EFAULT;
3218                                 goto out;
3219                         }
3220
3221                         /* if the caller supplied enough room, copy out to it */
3222                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3223                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3224                         }
3225                 }
3226         }
3227 out:
3228         fp_drop(p, fd, fp, 0);
3229         if (fsec != NULL) {
3230                 kauth_filesec_free(fsec);
3231         }
3232         return error;
3233 }
3234
3235
3236 /*
3237  * fstat_extended
3238  *
3239  * Description: Extended version of fstat supporting returning extended
3240  *              security information
3241  *
3242  * Parameters:  p                               The process doing the fstat
3243  *              uap->fd                         The fd to stat
3244  *              uap->ub                         The user stat buffer
3245  *              uap->xsecurity                  The user extended security
3246  *                                              buffer, or 0 if none
3247  *              uap->xsecurity_size             The size of xsecurity, or 0
3248  *
3249  * Returns:     0                               Success
3250  *              !0                              Errno (see fstat1)
3251  */
3252 int
3253 fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3254 {
3255         return fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3256 }
3257
3258
3259 /*
3260  * fstat
3261  *
3262  * Description: Get file status for the file associated with fd
3263  *
3264  * Parameters:  p                               The process doing the fstat
3265  *              uap->fd                         The fd to stat
3266  *              uap->ub                         The user stat buffer
3267  *
3268  * Returns:     0                               Success
3269  *              !0                              Errno (see fstat1)
3270  */
3271 int
3272 fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3273 {
3274         return fstat1(p, uap->fd, uap->ub, 0, 0, 0);
3275 }
3276
3277
3278 /*
3279  * fstat64_extended
3280  *
3281  * Description: Extended version of fstat64 supporting returning extended
3282  *              security information
3283  *
3284  * Parameters:  p                               The process doing the fstat
3285  *              uap->fd                         The fd to stat
3286  *              uap->ub                         The user stat buffer
3287  *              uap->xsecurity                  The user extended security
3288  *                                              buffer, or 0 if none
3289  *              uap->xsecurity_size             The size of xsecurity, or 0
3290  *
3291  * Returns:     0                               Success
3292  *              !0                              Errno (see fstat1)
3293  */
3294 int
3295 fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3296 {
3297         return fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3298 }
3299
3300
3301 /*
3302  * fstat64
3303  *
3304  * Description: Get 64 bit version of the file status for the file associated
3305  *              with fd
3306  *
3307  * Parameters:  p                               The process doing the fstat
3308  *              uap->fd                         The fd to stat
3309  *              uap->ub                         The user stat buffer
3310  *
3311  * Returns:     0                               Success
3312  *              !0                              Errno (see fstat1)
3313  */
3314 int
3315 fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3316 {
3317         return fstat1(p, uap->fd, uap->ub, 0, 0, 1);
3318 }
3319
3320
3321 /*
3322  * fpathconf
3323  *
3324  * Description: Return pathconf information about a file descriptor.
3325  *
3326  * Parameters:  p                               Process making the request
3327  *              uap->fd                         fd to get information about
3328  *              uap->name                       Name of information desired
3329  *              retval                          Pointer to the call return area
3330  *
3331  * Returns:     0                               Success
3332  *              EINVAL
3333  *      fp_lookup:EBADF                         Bad file descriptor
3334  *      vnode_getwithref:???
3335  *      vn_pathconf:???
3336  *
3337  * Implicit returns:
3338  *              *retval (modified)              Returned information (numeric)
3339  */
3340 int
3341 fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3342 {
3343         int fd = uap->fd;
3344         struct fileproc *fp;
3345         struct vnode *vp;
3346         int error = 0;
3347         file_type_t type;
3348         caddr_t data;
3349
3350
3351         AUDIT_ARG(fd, uap->fd);
3352         if ((error = fp_lookup(p, fd, &fp, 0))) {
3353                 return error;
3354         }
3355         type = fp->f_type;
3356         data = fp->f_data;
3357
3358         switch (type) {
3359         case DTYPE_SOCKET:
3360                 if (uap->name != _PC_PIPE_BUF) {
3361                         error = EINVAL;
3362                         goto out;
3363                 }
3364                 *retval = PIPE_BUF;
3365                 error = 0;
3366                 goto out;
3367
3368         case DTYPE_PIPE:
3369                 if (uap->name != _PC_PIPE_BUF) {
3370                         error = EINVAL;
3371                         goto out;
3372                 }
3373                 *retval = PIPE_BUF;
3374                 error = 0;
3375                 goto out;
3376
3377         case DTYPE_VNODE:
3378                 vp = (struct vnode *)data;
3379
3380                 if ((error = vnode_getwithref(vp)) == 0) {
3381                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3382
3383                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3384
3385                         (void)vnode_put(vp);
3386                 }
3387                 goto out;
3388
3389         default:
3390                 error = EINVAL;
3391                 goto out;
3392         }
3393         /*NOTREACHED*/
3394 out:
3395         fp_drop(p, fd, fp, 0);
3396         return error;
3397 }
3398
3399 /*
3400  * Statistics counter for the number of times a process calling fdalloc()
3401  * has resulted in an expansion of the per process open file table.
3402  *
3403  * XXX This would likely be of more use if it were per process
3404  */
3405 int fdexpand;
3406
3407
3408 /*
3409  * fdalloc
3410  *
3411  * Description: Allocate a file descriptor for the process.
3412  *
3413  * Parameters:  p                               Process to allocate the fd in
3414  *              want                            The fd we would prefer to get
3415  *              result                          Pointer to fd we got
3416  *
3417  * Returns:     0                               Success
3418  *              EMFILE
3419  *              ENOMEM
3420  *
3421  * Implicit returns:
3422  *              *result (modified)              The fd which was allocated
3423  */
3424 int
3425 fdalloc(proc_t p, int want, int *result)
3426 {
3427         struct filedesc *fdp = p->p_fd;
3428         int i;
3429         int lim, last, numfiles, oldnfiles;
3430         struct fileproc **newofiles, **ofiles;
3431         char *newofileflags;
3432
3433         /*
3434          * Search for a free descriptor starting at the higher
3435          * of want or fd_freefile.  If that fails, consider
3436          * expanding the ofile array.
3437          */
3438 #if DIAGNOSTIC
3439         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3440 #endif
3441
3442         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3443         for (;;) {
3444                 last = min(fdp->fd_nfiles, lim);
3445                 if ((i = want) < fdp->fd_freefile) {
3446                         i = fdp->fd_freefile;
3447                 }
3448                 for (; i < last; i++) {
3449                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3450                                 procfdtbl_reservefd(p, i);
3451                                 if (i > fdp->fd_lastfile) {
3452                                         fdp->fd_lastfile = i;
3453                                 }
3454                                 if (want <= fdp->fd_freefile) {
3455                                         fdp->fd_freefile = i;
3456                                 }
3457                                 *result = i;
3458                                 return 0;
3459                         }
3460                 }
3461
3462                 /*
3463                  * No space in current array.  Expand?
3464                  */
3465                 if (fdp->fd_nfiles >= lim) {
3466                         return EMFILE;
3467                 }
3468                 if (fdp->fd_nfiles < NDEXTENT) {
3469                         numfiles = NDEXTENT;
3470                 } else {
3471                         numfiles = 2 * fdp->fd_nfiles;
3472                 }
3473                 /* Enforce lim */
3474                 if (numfiles > lim) {
3475                         numfiles = lim;
3476                 }
3477                 proc_fdunlock(p);
3478                 MALLOC_ZONE(newofiles, struct fileproc **,
3479                     numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3480                 proc_fdlock(p);
3481                 if (newofiles == NULL) {
3482                         return ENOMEM;
3483                 }
3484                 if (fdp->fd_nfiles >= numfiles) {
3485                         FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL);
3486                         continue;
3487                 }
3488                 newofileflags = (char *) &newofiles[numfiles];
3489                 /*
3490                  * Copy the existing ofile and ofileflags arrays
3491                  * and zero the new portion of each array.
3492                  */
3493                 oldnfiles = fdp->fd_nfiles;
3494                 (void) memcpy(newofiles, fdp->fd_ofiles,
3495                     oldnfiles * sizeof(*fdp->fd_ofiles));
3496                 (void) memset(&newofiles[oldnfiles], 0,
3497                     (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3498
3499                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3500                     oldnfiles * sizeof(*fdp->fd_ofileflags));
3501                 (void) memset(&newofileflags[oldnfiles], 0,
3502                     (numfiles - oldnfiles) *
3503                     sizeof(*fdp->fd_ofileflags));
3504                 ofiles = fdp->fd_ofiles;
3505                 fdp->fd_ofiles = newofiles;
3506                 fdp->fd_ofileflags = newofileflags;
3507                 fdp->fd_nfiles = numfiles;
3508                 FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL);
3509                 fdexpand++;
3510         }
3511 }
3512
3513
3514 /*
3515  * fdavail
3516  *
3517  * Description: Check to see whether n user file descriptors are available
3518  *              to the process p.
3519  *
3520  * Parameters:  p                               Process to check in
3521  *              n                               The number of fd's desired
3522  *
3523  * Returns:     0                               No
3524  *              1                               Yes
3525  *
3526  * Locks:       Assumes proc_fdlock for process is held by the caller
3527  *
3528  * Notes:       The answer only remains valid so long as the proc_fdlock is
3529  *              held by the caller.
3530  */
3531 int
3532 fdavail(proc_t p, int n)
3533 {
3534         struct filedesc *fdp = p->p_fd;
3535         struct fileproc **fpp;
3536         char *flags;
3537         int i, lim;
3538
3539         lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
3540         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3541                 return 1;
3542         }
3543         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3544         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3545         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3546                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3547                         return 1;
3548                 }
3549         }
3550         return 0;
3551 }
3552
3553
3554 /*
3555  * fdrelse
3556  *
3557  * Description: Legacy KPI wrapper function for _fdrelse
3558  *
3559  * Parameters:  p                               Process in which fd lives
3560  *              fd                              fd to free
3561  *
3562  * Returns:     void
3563  *
3564  * Locks:       Assumes proc_fdlock for process is held by the caller
3565  */
3566 void
3567 fdrelse(proc_t p, int fd)
3568 {
3569         _fdrelse(p, fd);
3570 }
3571
3572
3573 /*
3574  * fdgetf_noref
3575  *
3576  * Description: Get the fileproc pointer for the given fd from the per process
3577  *              open file table without taking an explicit reference on it.
3578  *
3579  * Parameters:  p                               Process containing fd
3580  *              fd                              fd to obtain fileproc for
3581  *              resultfp                        Pointer to pointer return area
3582  *
3583  * Returns:     0                               Success
3584  *              EBADF
3585  *
3586  * Implicit returns:
3587  *              *resultfp (modified)            Pointer to fileproc pointer
3588  *
3589  * Locks:       Assumes proc_fdlock for process is held by the caller
3590  *
3591  * Notes:       Because there is no reference explicitly taken, the returned
3592  *              fileproc pointer is only valid so long as the proc_fdlock
3593  *              remains held by the caller.
3594  */
3595 int
3596 fdgetf_noref(proc_t p, int fd, struct fileproc **resultfp)
3597 {
3598         struct filedesc *fdp = p->p_fd;
3599         struct fileproc *fp;
3600
3601         if (fd < 0 || fd >= fdp->fd_nfiles ||
3602             (fp = fdp->fd_ofiles[fd]) == NULL ||
3603             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3604                 return EBADF;
3605         }
3606         if (resultfp) {
3607                 *resultfp = fp;
3608         }
3609         return 0;
3610 }
3611
3612
3613 /*
3614  * fp_getfvp
3615  *
3616  * Description: Get fileproc and vnode pointer for a given fd from the per
3617  *              process open file table of the specified process, and if
3618  *              successful, increment the f_iocount
3619  *
3620  * Parameters:  p                               Process in which fd lives
3621  *              fd                              fd to get information for
3622  *              resultfp                        Pointer to result fileproc
3623  *                                              pointer area, or 0 if none
3624  *              resultvp                        Pointer to result vnode pointer
3625  *                                              area, or 0 if none
3626  *
3627  * Returns:     0                               Success
3628  *              EBADF                           Bad file descriptor
3629  *              ENOTSUP                         fd does not refer to a vnode
3630  *
3631  * Implicit returns:
3632  *              *resultfp (modified)            Fileproc pointer
3633  *              *resultvp (modified)            vnode pointer
3634  *
3635  * Notes:       The resultfp and resultvp fields are optional, and may be
3636  *              independently specified as NULL to skip returning information
3637  *
3638  * Locks:       Internally takes and releases proc_fdlock
3639  */
3640 int
3641 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
3642 {
3643         struct filedesc *fdp = p->p_fd;
3644         struct fileproc *fp;
3645
3646         proc_fdlock_spin(p);
3647         if (fd < 0 || fd >= fdp->fd_nfiles ||
3648             (fp = fdp->fd_ofiles[fd]) == NULL ||
3649             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3650                 proc_fdunlock(p);
3651                 return EBADF;
3652         }
3653         if (fp->f_type != DTYPE_VNODE) {
3654                 proc_fdunlock(p);
3655                 return ENOTSUP;
3656         }
3657         os_ref_retain_locked(&fp->f_iocount);
3658
3659         if (resultfp) {
3660                 *resultfp = fp;
3661         }
3662         if (resultvp) {
3663                 *resultvp = (struct vnode *)fp->f_data;
3664         }
3665         proc_fdunlock(p);
3666
3667         return 0;
3668 }
3669
3670
3671 /*
3672  * fp_getfvpandvid
3673  *
3674  * Description: Get fileproc, vnode pointer, and vid for a given fd from the
3675  *              per process open file table of the specified process, and if
3676  *              successful, increment the f_iocount
3677  *
3678  * Parameters:  p                               Process in which fd lives
3679  *              fd                              fd to get information for
3680  *              resultfp                        Pointer to result fileproc
3681  *                                              pointer area, or 0 if none
3682  *              resultvp                        Pointer to result vnode pointer
3683  *                                              area, or 0 if none
3684  *              vidp                            Pointer to resuld vid area
3685  *
3686  * Returns:     0                               Success
3687  *              EBADF                           Bad file descriptor
3688  *              ENOTSUP                         fd does not refer to a vnode
3689  *
3690  * Implicit returns:
3691  *              *resultfp (modified)            Fileproc pointer
3692  *              *resultvp (modified)            vnode pointer
3693  *              *vidp                           vid value
3694  *
3695  * Notes:       The resultfp and resultvp fields are optional, and may be
3696  *              independently specified as NULL to skip returning information
3697  *
3698  * Locks:       Internally takes and releases proc_fdlock
3699  */
3700 int
3701 fp_getfvpandvid(proc_t p, int fd, struct fileproc **resultfp,
3702     struct vnode **resultvp, uint32_t *vidp)
3703 {
3704         struct filedesc *fdp = p->p_fd;
3705         struct fileproc *fp;
3706
3707         proc_fdlock_spin(p);
3708         if (fd < 0 || fd >= fdp->fd_nfiles ||
3709             (fp = fdp->fd_ofiles[fd]) == NULL ||
3710             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3711                 proc_fdunlock(p);
3712                 return EBADF;
3713         }
3714         if (fp->f_type != DTYPE_VNODE) {
3715                 proc_fdunlock(p);
3716                 return ENOTSUP;
3717         }
3718         os_ref_retain_locked(&fp->f_iocount);
3719
3720         if (resultfp) {
3721                 *resultfp = fp;
3722         }
3723         if (resultvp) {
3724                 *resultvp = (struct vnode *)fp->f_data;
3725         }
3726         if (vidp) {
3727                 *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data);
3728         }
3729         proc_fdunlock(p);
3730
3731         return 0;
3732 }
3733
3734
3735 /*
3736  * fp_getfsock
3737  *
3738  * Description: Get fileproc and socket pointer for a given fd from the
3739  *              per process open file table of the specified process, and if
3740  *              successful, increment the f_iocount
3741  *
3742  * Parameters:  p                               Process in which fd lives
3743  *              fd                              fd to get information for
3744  *              resultfp                        Pointer to result fileproc
3745  *                                              pointer area, or 0 if none
3746  *              results                         Pointer to result socket
3747  *                                              pointer area, or 0 if none
3748  *
3749  * Returns:     EBADF                   The file descriptor is invalid
3750  *              EOPNOTSUPP              The file descriptor is not a socket
3751  *              0                       Success
3752  *
3753  * Implicit returns:
3754  *              *resultfp (modified)            Fileproc pointer
3755  *              *results (modified)             socket pointer
3756  *
3757  * Notes:       EOPNOTSUPP should probably be ENOTSOCK; this function is only
3758  *              ever called from accept1().
3759  */
3760 int
3761 fp_getfsock(proc_t p, int fd, struct fileproc **resultfp,
3762     struct socket **results)
3763 {
3764         struct filedesc *fdp = p->p_fd;
3765         struct fileproc *fp;
3766
3767         proc_fdlock_spin(p);
3768         if (fd < 0 || fd >= fdp->fd_nfiles ||
3769             (fp = fdp->fd_ofiles[fd]) == NULL ||
3770             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3771                 proc_fdunlock(p);
3772                 return EBADF;
3773         }
3774         if (fp->f_type != DTYPE_SOCKET) {
3775                 proc_fdunlock(p);
3776                 return EOPNOTSUPP;
3777         }
3778         os_ref_retain_locked(&fp->f_iocount);
3779
3780         if (resultfp) {
3781                 *resultfp = fp;
3782         }
3783         if (results) {
3784                 *results = (struct socket *)fp->f_data;
3785         }
3786         proc_fdunlock(p);
3787
3788         return 0;
3789 }
3790
3791
3792 /*
3793  * fp_getfkq
3794  *
3795  * Description: Get fileproc and kqueue pointer for a given fd from the
3796  *              per process open file table of the specified process, and if
3797  *              successful, increment the f_iocount
3798  *
3799  * Parameters:  p                               Process in which fd lives
3800  *              fd                              fd to get information for
3801  *              resultfp                        Pointer to result fileproc
3802  *                                              pointer area, or 0 if none
3803  *              resultkq                        Pointer to result kqueue
3804  *                                              pointer area, or 0 if none
3805  *
3806  * Returns:     EBADF                   The file descriptor is invalid
3807  *              EBADF                   The file descriptor is not a socket
3808  *              0                       Success
3809  *
3810  * Implicit returns:
3811  *              *resultfp (modified)            Fileproc pointer
3812  *              *resultkq (modified)            kqueue pointer
3813  *
3814  * Notes:       The second EBADF should probably be something else to make
3815  *              the error condition distinct.
3816  */
3817 int
3818 fp_getfkq(proc_t p, int fd, struct fileproc **resultfp,
3819     struct kqueue **resultkq)
3820 {
3821         struct filedesc *fdp = p->p_fd;
3822         struct fileproc *fp;
3823
3824         proc_fdlock_spin(p);
3825         if (fd < 0 || fd >= fdp->fd_nfiles ||
3826             (fp = fdp->fd_ofiles[fd]) == NULL ||
3827             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3828                 proc_fdunlock(p);
3829                 return EBADF;
3830         }
3831         if (fp->f_type != DTYPE_KQUEUE) {
3832                 proc_fdunlock(p);
3833                 return EBADF;
3834         }
3835         os_ref_retain_locked(&fp->f_iocount);
3836
3837         if (resultfp) {
3838                 *resultfp = fp;
3839         }
3840         if (resultkq) {
3841                 *resultkq = (struct kqueue *)fp->f_data;
3842         }
3843         proc_fdunlock(p);
3844
3845         return 0;
3846 }
3847
3848
3849 /*
3850  * fp_getfpshm
3851  *
3852  * Description: Get fileproc and POSIX shared memory pointer for a given fd
3853  *              from the per process open file table of the specified process
3854  *              and if successful, increment the f_iocount
3855  *
3856  * Parameters:  p                               Process in which fd lives
3857  *              fd                              fd to get information for
3858  *              resultfp                        Pointer to result fileproc
3859  *                                              pointer area, or 0 if none
3860  *              resultpshm                      Pointer to result POSIX
3861  *                                              shared memory pointer
3862  *                                              pointer area, or 0 if none
3863  *
3864  * Returns:     EBADF                   The file descriptor is invalid
3865  *              EBADF                   The file descriptor is not a POSIX
3866  *                                      shared memory area
3867  *              0                       Success
3868  *
3869  * Implicit returns:
3870  *              *resultfp (modified)            Fileproc pointer
3871  *              *resultpshm (modified)          POSIX shared memory pointer
3872  *
3873  * Notes:       The second EBADF should probably be something else to make
3874  *              the error condition distinct.
3875  */
3876 int
3877 fp_getfpshm(proc_t p, int fd, struct fileproc **resultfp,
3878     struct pshmnode **resultpshm)
3879 {
3880         struct filedesc *fdp = p->p_fd;
3881         struct fileproc *fp;
3882
3883         proc_fdlock_spin(p);
3884         if (fd < 0 || fd >= fdp->fd_nfiles ||
3885             (fp = fdp->fd_ofiles[fd]) == NULL ||
3886             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3887                 proc_fdunlock(p);
3888                 return EBADF;
3889         }
3890         if (fp->f_type != DTYPE_PSXSHM) {
3891                 proc_fdunlock(p);
3892                 return EBADF;
3893         }
3894         os_ref_retain_locked(&fp->f_iocount);
3895
3896         if (resultfp) {
3897                 *resultfp = fp;
3898         }
3899         if (resultpshm) {
3900                 *resultpshm = (struct pshmnode *)fp->f_data;
3901         }
3902         proc_fdunlock(p);
3903
3904         return 0;
3905 }
3906
3907
3908 /*
3909  * fp_getfsem
3910  *
3911  * Description: Get fileproc and POSIX semaphore pointer for a given fd from
3912  *              the per process open file table of the specified process
3913  *              and if successful, increment the f_iocount
3914  *
3915  * Parameters:  p                               Process in which fd lives
3916  *              fd                              fd to get information for
3917  *              resultfp                        Pointer to result fileproc
3918  *                                              pointer area, or 0 if none
3919  *              resultpsem                      Pointer to result POSIX
3920  *                                              semaphore pointer area, or
3921  *                                              0 if none
3922  *
3923  * Returns:     EBADF                   The file descriptor is invalid
3924  *              EBADF                   The file descriptor is not a POSIX
3925  *                                      semaphore
3926  *              0                       Success
3927  *
3928  * Implicit returns:
3929  *              *resultfp (modified)            Fileproc pointer
3930  *              *resultpsem (modified)          POSIX semaphore pointer
3931  *
3932  * Notes:       The second EBADF should probably be something else to make
3933  *              the error condition distinct.
3934  *
3935  *              In order to support unnamed POSIX semaphores, the named
3936  *              POSIX semaphores will have to move out of the per-process
3937  *              open filetable, and into a global table that is shared with
3938  *              unnamed POSIX semaphores, since unnamed POSIX semaphores
3939  *              are typically used by declaring instances in shared memory,
3940  *              and there's no other way to do this without changing the
3941  *              underlying type, which would introduce binary compatibility
3942  *              issues.
3943  */
3944 int
3945 fp_getfpsem(proc_t p, int fd, struct fileproc **resultfp,
3946     struct psemnode **resultpsem)
3947 {
3948         struct filedesc *fdp = p->p_fd;
3949         struct fileproc *fp;
3950
3951         proc_fdlock_spin(p);
3952         if (fd < 0 || fd >= fdp->fd_nfiles ||
3953             (fp = fdp->fd_ofiles[fd]) == NULL ||
3954             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3955                 proc_fdunlock(p);
3956                 return EBADF;
3957         }
3958         if (fp->f_type != DTYPE_PSXSEM) {
3959                 proc_fdunlock(p);
3960                 return EBADF;
3961         }
3962         os_ref_retain_locked(&fp->f_iocount);
3963
3964         if (resultfp) {
3965                 *resultfp = fp;
3966         }
3967         if (resultpsem) {
3968                 *resultpsem = (struct psemnode *)fp->f_data;
3969         }
3970         proc_fdunlock(p);
3971
3972         return 0;
3973 }
3974
3975
3976 /*
3977  * fp_getfpipe
3978  *
3979  * Description: Get fileproc and pipe pointer for a given fd from the
3980  *              per process open file table of the specified process
3981  *              and if successful, increment the f_iocount
3982  *
3983  * Parameters:  p                               Process in which fd lives
3984  *              fd                              fd to get information for
3985  *              resultfp                        Pointer to result fileproc
3986  *                                              pointer area, or 0 if none
3987  *              resultpipe                      Pointer to result pipe
3988  *                                              pointer area, or 0 if none
3989  *
3990  * Returns:     EBADF                   The file descriptor is invalid
3991  *              EBADF                   The file descriptor is not a socket
3992  *              0                       Success
3993  *
3994  * Implicit returns:
3995  *              *resultfp (modified)            Fileproc pointer
3996  *              *resultpipe (modified)          pipe pointer
3997  *
3998  * Notes:       The second EBADF should probably be something else to make
3999  *              the error condition distinct.
4000  */
4001 int
4002 fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp,
4003     struct pipe **resultpipe)
4004 {
4005         struct filedesc *fdp = p->p_fd;
4006         struct fileproc *fp;
4007
4008         proc_fdlock_spin(p);
4009         if (fd < 0 || fd >= fdp->fd_nfiles ||
4010             (fp = fdp->fd_ofiles[fd]) == NULL ||
4011             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4012                 proc_fdunlock(p);
4013                 return EBADF;
4014         }
4015         if (fp->f_type != DTYPE_PIPE) {
4016                 proc_fdunlock(p);
4017                 return EBADF;
4018         }
4019         os_ref_retain_locked(&fp->f_iocount);
4020
4021         if (resultfp) {
4022                 *resultfp = fp;
4023         }
4024         if (resultpipe) {
4025                 *resultpipe = (struct pipe *)fp->f_data;
4026         }
4027         proc_fdunlock(p);
4028
4029         return 0;
4030 }
4031
4032
4033 /*
4034  * fp_lookup
4035  *
4036  * Description: Get fileproc pointer for a given fd from the per process
4037  *              open file table of the specified process and if successful,
4038  *              increment the f_iocount
4039  *
4040  * Parameters:  p                               Process in which fd lives
4041  *              fd                              fd to get information for
4042  *              resultfp                        Pointer to result fileproc
4043  *                                              pointer area, or 0 if none
4044  *              locked                          !0 if the caller holds the
4045  *                                              proc_fdlock, 0 otherwise
4046  *
4047  * Returns:     0                       Success
4048  *              EBADF                   Bad file descriptor
4049  *
4050  * Implicit returns:
4051  *              *resultfp (modified)            Fileproc pointer
4052  *
4053  * Locks:       If the argument 'locked' is non-zero, then the caller is
4054  *              expected to have taken and held the proc_fdlock; if it is
4055  *              zero, than this routine internally takes and drops this lock.
4056  */
4057 int
4058 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4059 {
4060         struct filedesc *fdp = p->p_fd;
4061         struct fileproc *fp;
4062
4063         if (!locked) {
4064                 proc_fdlock_spin(p);
4065         }
4066         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4067             (fp = fdp->fd_ofiles[fd]) == NULL ||
4068             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4069                 if (!locked) {
4070                         proc_fdunlock(p);
4071                 }
4072                 return EBADF;
4073         }
4074         os_ref_retain_locked(&fp->f_iocount);
4075
4076         if (resultfp) {
4077                 *resultfp = fp;
4078         }
4079         if (!locked) {
4080                 proc_fdunlock(p);
4081         }
4082
4083         return 0;
4084 }
4085
4086
4087 /*
4088  * fp_tryswap
4089  *
4090  * Description: Swap the fileproc pointer for a given fd with a new
4091  *              fileproc pointer in the per-process open file table of
4092  *              the specified process.  The fdlock must be held at entry.
4093  *              Iff the swap is successful, the old fileproc pointer is freed.
4094  *
4095  * Parameters:  p               Process containing the fd
4096  *              fd              The fd of interest
4097  *              nfp             Pointer to the newfp
4098  *
4099  * Returns:     0               Success
4100  *              EBADF           Bad file descriptor
4101  *              EINTR           Interrupted
4102  *              EKEEPLOOKING    Other references were active, try again.
4103  */
4104 int
4105 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4106 {
4107         struct fileproc *fp;
4108         int error;
4109
4110         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4111
4112         if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4113                 return error;
4114         }
4115         /*
4116          * At this point, our caller (change_guardedfd_np) has
4117          * one f_iocount reference, and we just took another
4118          * one to begin the replacement.
4119          * fp and nfp have a +1 reference from allocation.
4120          * Thus if no-one else is looking, f_iocount should be 3.
4121          */
4122         if (os_ref_get_count(&fp->f_iocount) < 3 ||
4123             1 != os_ref_get_count(&nfp->f_iocount)) {
4124                 panic("%s: f_iocount", __func__);
4125         } else if (3 == os_ref_get_count(&fp->f_iocount)) {
4126                 /* Copy the contents of *fp, preserving the "type" of *nfp */
4127
4128                 nfp->f_flags = (nfp->f_flags & FP_TYPEMASK) |
4129                     (fp->f_flags & ~FP_TYPEMASK);
4130                 os_ref_retain_locked(&nfp->f_iocount);
4131                 os_ref_retain_locked(&nfp->f_iocount);
4132                 nfp->f_fglob = fp->f_fglob;
4133                 nfp->f_wset = fp->f_wset;
4134
4135                 p->p_fd->fd_ofiles[fd] = nfp;
4136                 fp_drop(p, fd, nfp, 1);
4137
4138                 os_ref_release_live(&fp->f_iocount);
4139                 os_ref_release_live(&fp->f_iocount);
4140                 fileproc_free(fp);
4141         } else {
4142                 /*
4143                  * Wait for all other active references to evaporate.
4144                  */
4145                 p->p_fpdrainwait = 1;
4146                 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4147                     PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4148                 if (0 == error) {
4149                         /*
4150                          * Return an "internal" errno to trigger a full
4151                          * reevaluation of the change-guard attempt.
4152                          */
4153                         error = EKEEPLOOKING;
4154                 }
4155                 (void) fp_drop(p, fd, fp, 1);
4156         }
4157         return error;
4158 }
4159
4160
4161 /*
4162  * fp_drop_written
4163  *
4164  * Description: Set the FP_WRITTEN flag on the fileproc and drop the I/O
4165  *              reference previously taken by calling fp_lookup et. al.
4166  *
4167  * Parameters:  p                               Process in which the fd lives
4168  *              fd                              fd associated with the fileproc
4169  *              fp                              fileproc on which to set the
4170  *                                              flag and drop the reference
4171  *
4172  * Returns:     0                               Success
4173  *      fp_drop:EBADF                           Bad file descriptor
4174  *
4175  * Locks:       This function internally takes and drops the proc_fdlock for
4176  *              the supplied process
4177  *
4178  * Notes:       The fileproc must correspond to the fd in the supplied proc
4179  */
4180 int
4181 fp_drop_written(proc_t p, int fd, struct fileproc *fp)
4182 {
4183         int error;
4184
4185         proc_fdlock_spin(p);
4186
4187         fp->f_flags |= FP_WRITTEN;
4188
4189         error = fp_drop(p, fd, fp, 1);
4190
4191         proc_fdunlock(p);
4192
4193         return error;
4194 }
4195
4196
4197 /*
4198  * fp_drop_event
4199  *
4200  * Description: Set the FP_WAITEVENT flag on the fileproc and drop the I/O
4201  *              reference previously taken by calling fp_lookup et. al.
4202  *
4203  * Parameters:  p                               Process in which the fd lives
4204  *              fd                              fd associated with the fileproc
4205  *              fp                              fileproc on which to set the
4206  *                                              flag and drop the reference
4207  *
4208  * Returns:     0                               Success
4209  *      fp_drop:EBADF                           Bad file descriptor
4210  *
4211  * Locks:       This function internally takes and drops the proc_fdlock for
4212  *              the supplied process
4213  *
4214  * Notes:       The fileproc must correspond to the fd in the supplied proc
4215  */
4216 int
4217 fp_drop_event(proc_t p, int fd, struct fileproc *fp)
4218 {
4219         int error;
4220
4221         proc_fdlock_spin(p);
4222
4223         fp->f_flags |= FP_WAITEVENT;
4224
4225         error = fp_drop(p, fd, fp, 1);
4226
4227         proc_fdunlock(p);
4228
4229         return error;
4230 }
4231
4232
4233 /*
4234  * fp_drop
4235  *
4236  * Description: Drop the I/O reference previously taken by calling fp_lookup
4237  *              et. al.
4238  *
4239  * Parameters:  p                               Process in which the fd lives
4240  *              fd                              fd associated with the fileproc
4241  *              fp                              fileproc on which to set the
4242  *                                              flag and drop the reference
4243  *              locked                          flag to internally take and
4244  *                                              drop proc_fdlock if it is not
4245  *                                              already held by the caller
4246  *
4247  * Returns:     0                               Success
4248  *              EBADF                           Bad file descriptor
4249  *
4250  * Locks:       This function internally takes and drops the proc_fdlock for
4251  *              the supplied process if 'locked' is non-zero, and assumes that
4252  *              the caller already holds this lock if 'locked' is non-zero.
4253  *
4254  * Notes:       The fileproc must correspond to the fd in the supplied proc
4255  */
4256 int
4257 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4258 {
4259         struct filedesc *fdp = p->p_fd;
4260         int     needwakeup = 0;
4261
4262         if (!locked) {
4263                 proc_fdlock_spin(p);
4264         }
4265         if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4266             (fp = fdp->fd_ofiles[fd]) == NULL ||
4267             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4268             !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4269                 if (!locked) {
4270                         proc_fdunlock(p);
4271                 }
4272                 return EBADF;
4273         }
4274
4275         if (1 == os_ref_release_locked(&fp->f_iocount)) {
4276                 if (fp->f_flags & FP_SELCONFLICT) {
4277                         fp->f_flags &= ~FP_SELCONFLICT;
4278                 }
4279
4280                 if (p->p_fpdrainwait) {
4281                         p->p_fpdrainwait = 0;
4282                         needwakeup = 1;
4283                 }
4284         }
4285         if (!locked) {
4286                 proc_fdunlock(p);
4287         }
4288         if (needwakeup) {
4289                 wakeup(&p->p_fpdrainwait);
4290         }
4291
4292         return 0;
4293 }
4294
4295
4296 /*
4297  * file_vnode
4298  *
4299  * Description: Given an fd, look it up in the current process's per process
4300  *              open file table, and return its internal vnode pointer.
4301  *
4302  * Parameters:  fd                              fd to obtain vnode from
4303  *              vpp                             pointer to vnode return area
4304  *
4305  * Returns:     0                               Success
4306  *              EINVAL                          The fd does not refer to a
4307  *                                              vnode fileproc entry
4308  *      fp_lookup:EBADF                         Bad file descriptor
4309  *
4310  * Implicit returns:
4311  *              *vpp (modified)                 Returned vnode pointer
4312  *
4313  * Locks:       This function internally takes and drops the proc_fdlock for
4314  *              the current process
4315  *
4316  * Notes:       If successful, this function increments the f_iocount on the
4317  *              fd's corresponding fileproc.
4318  *
4319  *              The fileproc referenced is not returned; because of this, care
4320  *              must be taken to not drop the last reference (e.g. by closing
4321  *              the file).  This is inherently unsafe, since the reference may
4322  *              not be recoverable from the vnode, if there is a subsequent
4323  *              close that destroys the associate fileproc.  The caller should
4324  *              therefore retain their own reference on the fileproc so that
4325  *              the f_iocount can be dropped subsequently.  Failure to do this
4326  *              can result in the returned pointer immediately becoming invalid
4327  *              following the call.
4328  *
4329  *              Use of this function is discouraged.
4330  */
4331 int
4332 file_vnode(int fd, struct vnode **vpp)
4333 {
4334         proc_t p = current_proc();
4335         struct fileproc *fp;
4336         int error;
4337
4338         proc_fdlock_spin(p);
4339         if ((error = fp_lookup(p, fd, &fp, 1))) {
4340                 proc_fdunlock(p);
4341                 return error;
4342         }
4343         if (fp->f_type != DTYPE_VNODE) {
4344                 fp_drop(p, fd, fp, 1);
4345                 proc_fdunlock(p);
4346                 return EINVAL;
4347         }
4348         if (vpp != NULL) {
4349                 *vpp = (struct vnode *)fp->f_data;
4350         }
4351         proc_fdunlock(p);
4352
4353         return 0;
4354 }
4355
4356
4357 /*
4358  * file_vnode_withvid
4359  *
4360  * Description: Given an fd, look it up in the current process's per process
4361  *              open file table, and return its internal vnode pointer.
4362  *
4363  * Parameters:  fd                              fd to obtain vnode from
4364  *              vpp                             pointer to vnode return area
4365  *              vidp                            pointer to vid of the returned vnode
4366  *
4367  * Returns:     0                               Success
4368  *              EINVAL                          The fd does not refer to a
4369  *                                              vnode fileproc entry
4370  *      fp_lookup:EBADF                         Bad file descriptor
4371  *
4372  * Implicit returns:
4373  *              *vpp (modified)                 Returned vnode pointer
4374  *
4375  * Locks:       This function internally takes and drops the proc_fdlock for
4376  *              the current process
4377  *
4378  * Notes:       If successful, this function increments the f_iocount on the
4379  *              fd's corresponding fileproc.
4380  *
4381  *              The fileproc referenced is not returned; because of this, care
4382  *              must be taken to not drop the last reference (e.g. by closing
4383  *              the file).  This is inherently unsafe, since the reference may
4384  *              not be recoverable from the vnode, if there is a subsequent
4385  *              close that destroys the associate fileproc.  The caller should
4386  *              therefore retain their own reference on the fileproc so that
4387  *              the f_iocount can be dropped subsequently.  Failure to do this
4388  *              can result in the returned pointer immediately becoming invalid
4389  *              following the call.
4390  *
4391  *              Use of this function is discouraged.
4392  */
4393 int
4394 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
4395 {
4396         proc_t p = current_proc();
4397         struct fileproc *fp;
4398         vnode_t vp;
4399         int error;
4400
4401         proc_fdlock_spin(p);
4402         if ((error = fp_lookup(p, fd, &fp, 1))) {
4403                 proc_fdunlock(p);
4404                 return error;
4405         }
4406         if (fp->f_type != DTYPE_VNODE) {
4407                 fp_drop(p, fd, fp, 1);
4408                 proc_fdunlock(p);
4409                 return EINVAL;
4410         }
4411         vp = (struct vnode *)fp->f_data;
4412         if (vpp != NULL) {
4413                 *vpp = vp;
4414         }
4415
4416         if ((vidp != NULL) && (vp != NULLVP)) {
4417                 *vidp = (uint32_t)vp->v_id;
4418         }
4419
4420         proc_fdunlock(p);
4421
4422         return 0;
4423 }
4424
4425
4426 /*
4427  * file_socket
4428  *
4429  * Description: Given an fd, look it up in the current process's per process
4430  *              open file table, and return its internal socket pointer.
4431  *
4432  * Parameters:  fd                              fd to obtain vnode from
4433  *              sp                              pointer to socket return area
4434  *
4435  * Returns:     0                               Success
4436  *              ENOTSOCK                        Not a socket
4437  *              fp_lookup:EBADF                 Bad file descriptor
4438  *
4439  * Implicit returns:
4440  *              *sp (modified)                  Returned socket pointer
4441  *
4442  * Locks:       This function internally takes and drops the proc_fdlock for
4443  *              the current process
4444  *
4445  * Notes:       If successful, this function increments the f_iocount on the
4446  *              fd's corresponding fileproc.
4447  *
4448  *              The fileproc referenced is not returned; because of this, care
4449  *              must be taken to not drop the last reference (e.g. by closing
4450  *              the file).  This is inherently unsafe, since the reference may
4451  *              not be recoverable from the socket, if there is a subsequent
4452  *              close that destroys the associate fileproc.  The caller should
4453  *              therefore retain their own reference on the fileproc so that
4454  *              the f_iocount can be dropped subsequently.  Failure to do this
4455  *              can result in the returned pointer immediately becoming invalid
4456  *              following the call.
4457  *
4458  *              Use of this function is discouraged.
4459  */
4460 int
4461 file_socket(int fd, struct socket **sp)
4462 {
4463         proc_t p = current_proc();
4464         struct fileproc *fp;
4465         int error;
4466
4467         proc_fdlock_spin(p);
4468         if ((error = fp_lookup(p, fd, &fp, 1))) {
4469                 proc_fdunlock(p);
4470                 return error;
4471         }
4472         if (fp->f_type != DTYPE_SOCKET) {
4473                 fp_drop(p, fd, fp, 1);
4474                 proc_fdunlock(p);
4475                 return ENOTSOCK;
4476         }
4477         *sp = (struct socket *)fp->f_data;
4478         proc_fdunlock(p);
4479
4480         return 0;
4481 }
4482
4483
4484 /*
4485  * file_flags
4486  *
4487  * Description: Given an fd, look it up in the current process's per process
4488  *              open file table, and return its fileproc's flags field.
4489  *
4490  * Parameters:  fd                              fd whose flags are to be
4491  *                                              retrieved
4492  *              flags                           pointer to flags data area
4493  *
4494  * Returns:     0                               Success
4495  *              ENOTSOCK                        Not a socket
4496  *              fp_lookup:EBADF                 Bad file descriptor
4497  *
4498  * Implicit returns:
4499  *              *flags (modified)               Returned flags field
4500  *
4501  * Locks:       This function internally takes and drops the proc_fdlock for
4502  *              the current process
4503  *
4504  * Notes:       This function will internally increment and decrement the
4505  *              f_iocount of the fileproc as part of its operation.
4506  */
4507 int
4508 file_flags(int fd, int *flags)
4509 {
4510         proc_t p = current_proc();
4511         struct fileproc *fp;
4512         int error;
4513
4514         proc_fdlock_spin(p);
4515         if ((error = fp_lookup(p, fd, &fp, 1))) {
4516                 proc_fdunlock(p);
4517                 return error;
4518         }
4519         *flags = (int)fp->f_flag;
4520         fp_drop(p, fd, fp, 1);
4521         proc_fdunlock(p);
4522
4523         return 0;
4524 }
4525
4526
4527 /*
4528  * file_drop
4529  *
4530  * Description: Drop an iocount reference on an fd, and wake up any waiters
4531  *              for draining (i.e. blocked in fileproc_drain() called during
4532  *              the last attempt to close a file).
4533  *
4534  * Parameters:  fd                              fd on which an ioreference is
4535  *                                              to be dropped
4536  *
4537  * Returns:     0                               Success
4538  *              EBADF                           Bad file descriptor
4539  *
4540  * Description: Given an fd, look it up in the current process's per process
4541  *              open file table, and drop it's fileproc's f_iocount by one
4542  *
4543  * Notes:       This is intended as a corresponding operation to the functions
4544  *              file_vnode() and file_socket() operations.
4545  *
4546  *              Technically, the close reference is supposed to be protected
4547  *              by a fileproc_drain(), however, a drain will only block if
4548  *              the fd refers to a character device, and that device has had
4549  *              preparefileread() called on it.  If it refers to something
4550  *              other than a character device, then the drain will occur and
4551  *              block each close attempt, rather than merely the last close.
4552  *
4553  *              Since it's possible for an fd that refers to a character
4554  *              device to have an intermediate close followed by an open to
4555  *              cause a different file to correspond to that descriptor,
4556  *              unless there was a cautionary reference taken on the fileproc,
4557  *              this is an inherently unsafe function.  This happens in the
4558  *              case where multiple fd's in a process refer to the same
4559  *              character device (e.g. stdin/out/err pointing to a tty, etc.).
4560  *
4561  *              Use of this function is discouraged.
4562  */
4563 int
4564 file_drop(int fd)
4565 {
4566         struct fileproc *fp;
4567         proc_t p = current_proc();
4568         int     needwakeup = 0;
4569
4570         proc_fdlock_spin(p);
4571         if (fd < 0 || fd >= p->p_fd->fd_nfiles ||
4572             (fp = p->p_fd->fd_ofiles[fd]) == NULL ||
4573             ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) &&
4574             !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) {
4575                 proc_fdunlock(p);
4576                 return EBADF;
4577         }
4578
4579         if (1 == os_ref_release_locked(&fp->f_iocount)) {
4580                 if (fp->f_flags & FP_SELCONFLICT) {
4581                         fp->f_flags &= ~FP_SELCONFLICT;
4582                 }
4583
4584                 if (p->p_fpdrainwait) {
4585                         p->p_fpdrainwait = 0;
4586                         needwakeup = 1;
4587                 }
4588         }
4589         proc_fdunlock(p);
4590
4591         if (needwakeup) {
4592                 wakeup(&p->p_fpdrainwait);
4593         }
4594         return 0;
4595 }
4596
4597
4598 static int falloc_withalloc_locked(proc_t, struct fileproc **, int *,
4599     vfs_context_t, struct fileproc * (*)(void *), void *, int);
4600
4601 /*
4602  * falloc
4603  *
4604  * Description: Allocate an entry in the per process open file table and
4605  *              return the corresponding fileproc and fd.
4606  *
4607  * Parameters:  p                               The process in whose open file
4608  *                                              table the fd is to be allocated
4609  *              resultfp                        Pointer to fileproc pointer
4610  *                                              return area
4611  *              resultfd                        Pointer to fd return area
4612  *              ctx                             VFS context
4613  *
4614  * Returns:     0                               Success
4615  *      falloc:ENFILE                           Too many open files in system
4616  *      falloc:EMFILE                           Too many open files in process
4617  *      falloc:ENOMEM                           M_FILEPROC or M_FILEGLOB zone
4618  *                                              exhausted
4619  *
4620  * Implicit returns:
4621  *              *resultfd (modified)            Returned fileproc pointer
4622  *              *resultfd (modified)            Returned fd
4623  *
4624  * Locks:       This function takes and drops the proc_fdlock; if this lock
4625  *              is already held, use falloc_locked() instead.
4626  *
4627  * Notes:       This function takes separate process and context arguments
4628  *              solely to support kern_exec.c; otherwise, it would take
4629  *              neither, and expect falloc_locked() to use the
4630  *              vfs_context_current() routine internally.
4631  */
4632 int
4633 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4634 {
4635         return falloc_withalloc(p, resultfp, resultfd, ctx,
4636                    fileproc_alloc_init, NULL);
4637 }
4638
4639 /*
4640  * Like falloc, but including the fileproc allocator and create-args
4641  */
4642 int
4643 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4644     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *arg)
4645 {
4646         int error;
4647
4648         proc_fdlock(p);
4649         error = falloc_withalloc_locked(p,
4650             resultfp, resultfd, ctx, fp_zalloc, arg, 1);
4651         proc_fdunlock(p);
4652
4653         return error;
4654 }
4655
4656 /*
4657  * "uninitialized" ops -- ensure fg->fg_ops->fo_type always exists
4658  */
4659 static const struct fileops uninitops;
4660
4661 /*
4662  * falloc_locked
4663  *
4664  * Create a new open file structure and allocate
4665  * a file descriptor for the process that refers to it.
4666  *
4667  * Returns:     0                       Success
4668  *
4669  * Description: Allocate an entry in the per process open file table and
4670  *              return the corresponding fileproc and fd.
4671  *
4672  * Parameters:  p                               The process in whose open file
4673  *                                              table the fd is to be allocated
4674  *              resultfp                        Pointer to fileproc pointer
4675  *                                              return area
4676  *              resultfd                        Pointer to fd return area
4677  *              ctx                             VFS context
4678  *              locked                          Flag to indicate whether the
4679  *                                              caller holds proc_fdlock
4680  *
4681  * Returns:     0                               Success
4682  *              ENFILE                          Too many open files in system
4683  *              fdalloc:EMFILE                  Too many open files in process
4684  *              ENOMEM                          M_FILEPROC or M_FILEGLOB zone
4685  *                                              exhausted
4686  *      fdalloc:ENOMEM
4687  *
4688  * Implicit returns:
4689  *              *resultfd (modified)            Returned fileproc pointer
4690  *              *resultfd (modified)            Returned fd
4691  *
4692  * Locks:       If the parameter 'locked' is zero, this function takes and
4693  *              drops the proc_fdlock; if non-zero, the caller must hold the
4694  *              lock.
4695  *
4696  * Notes:       If you intend to use a non-zero 'locked' parameter, use the
4697  *              utility function falloc() instead.
4698  *
4699  *              This function takes separate process and context arguments
4700  *              solely to support kern_exec.c; otherwise, it would take
4701  *              neither, and use the vfs_context_current() routine internally.
4702  */
4703 int
4704 falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4705     vfs_context_t ctx, int locked)
4706 {
4707         return falloc_withalloc_locked(p, resultfp, resultfd, ctx,
4708                    fileproc_alloc_init, NULL, locked);
4709 }
4710
4711 static int
4712 falloc_withalloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd,
4713     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg,
4714     int locked)
4715 {
4716         struct fileproc *fp;
4717         struct fileglob *fg;
4718         int error, nfd;
4719
4720         if (nfiles >= maxfiles) {
4721                 tablefull("file");
4722                 return ENFILE;
4723         }
4724
4725         if (!locked) {
4726                 proc_fdlock(p);
4727         }
4728
4729         if ((error = fdalloc(p, 0, &nfd))) {
4730                 if (!locked) {
4731                         proc_fdunlock(p);
4732                 }
4733                 return error;
4734         }
4735
4736 #if CONFIG_MACF
4737         error = mac_file_check_create(proc_ucred(p));
4738         if (error) {
4739                 if (!locked) {
4740                         proc_fdunlock(p);
4741                 }
4742                 return error;
4743         }
4744 #endif
4745
4746         /*
4747          * Allocate a new file descriptor.
4748          * If the process has file descriptor zero open, add to the list
4749          * of open files at that point, otherwise put it at the front of
4750          * the list of open files.
4751          */
4752         proc_fdunlock(p);
4753
4754         fp = (*fp_zalloc)(crarg);
4755         if (fp == NULL) {
4756                 if (locked) {
4757                         proc_fdlock(p);
4758                 }
4759                 return ENOMEM;
4760         }
4761         MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK);
4762         if (fg == NULL) {
4763                 fileproc_free(fp);
4764                 if (locked) {
4765                         proc_fdlock(p);
4766                 }
4767                 return ENOMEM;
4768         }
4769         bzero(fg, sizeof(struct fileglob));
4770         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4771
4772         os_ref_retain_locked(&fp->f_iocount);
4773         fg->fg_count = 1;
4774         fg->fg_ops = &uninitops;
4775         fp->f_fglob = fg;
4776 #if CONFIG_MACF
4777         mac_file_label_init(fg);
4778 #endif
4779
4780         kauth_cred_ref(ctx->vc_ucred);
4781
4782         proc_fdlock(p);
4783
4784         fp->f_cred = ctx->vc_ucred;
4785
4786 #if CONFIG_MACF
4787         mac_file_label_associate(fp->f_cred, fg);
4788 #endif
4789
4790         OSAddAtomic(1, &nfiles);
4791
4792         p->p_fd->fd_ofiles[nfd] = fp;
4793
4794         if (!locked) {
4795                 proc_fdunlock(p);
4796         }
4797
4798         if (resultfp) {
4799                 *resultfp = fp;
4800         }
4801         if (resultfd) {
4802                 *resultfd = nfd;
4803         }
4804
4805         return 0;
4806 }
4807
4808
4809 /*
4810  * fg_free
4811  *
4812  * Description: Free a file structure; drop the global open file count, and
4813  *              drop the credential reference, if the fileglob has one, and
4814  *              destroy the instance mutex before freeing
4815  *
4816  * Parameters:  fg                              Pointer to fileglob to be
4817  *                                              freed
4818  *
4819  * Returns:     void
4820  */
4821 void
4822 fg_free(struct fileglob *fg)
4823 {
4824         OSAddAtomic(-1, &nfiles);
4825
4826         if (fg->fg_vn_data) {
4827                 fg_vn_data_free(fg->fg_vn_data);
4828                 fg->fg_vn_data = NULL;
4829         }
4830
4831         if (IS_VALID_CRED(fg->fg_cred)) {
4832                 kauth_cred_unref(&fg->fg_cred);
4833         }
4834         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
4835
4836 #if CONFIG_MACF
4837         mac_file_label_destroy(fg);
4838 #endif
4839         FREE_ZONE(fg, sizeof *fg, M_FILEGLOB);
4840 }
4841
4842
4843 /*
4844  * fg_get_vnode
4845  *
4846  * Description: Return vnode associated with the file structure, if
4847  *              any.  The lifetime of the returned vnode is bound to
4848  *              the lifetime of the file structure.
4849  *
4850  * Parameters:  fg                              Pointer to fileglob to
4851  *                                              inspect
4852  *
4853  * Returns:     vnode_t
4854  */
4855 vnode_t
4856 fg_get_vnode(struct fileglob *fg)
4857 {
4858         if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
4859                 return (vnode_t)fg->fg_data;
4860         } else {
4861                 return NULL;
4862         }
4863 }
4864
4865 /*
4866  * fdexec
4867  *
4868  * Description: Perform close-on-exec processing for all files in a process
4869  *              that are either marked as close-on-exec, or which were in the
4870  *              process of being opened at the time of the execve
4871  *
4872  *              Also handles the case (via posix_spawn()) where -all-
4873  *              files except those marked with "inherit" as treated as
4874  *              close-on-exec.
4875  *
4876  * Parameters:  p                               Pointer to process calling
4877  *                                              execve
4878  *
4879  * Returns:     void
4880  *
4881  * Locks:       This function internally takes and drops proc_fdlock()
4882  *          But assumes tables don't grow/change while unlocked.
4883  *
4884  */
4885 void
4886 fdexec(proc_t p, short flags, int self_exec)
4887 {
4888         struct filedesc *fdp = p->p_fd;
4889         int i;
4890         boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4891         thread_t self = current_thread();
4892         struct uthread *ut = get_bsdthread_info(self);
4893         struct kqworkq *dealloc_kqwq = NULL;
4894
4895         /*
4896          * If the current thread is bound as a workq/workloop
4897          * servicing thread, we need to unbind it first.
4898          */
4899         if (ut->uu_kqr_bound && self_exec) {
4900                 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4901         }
4902
4903         proc_fdlock(p);
4904
4905         /*
4906          * Deallocate the knotes for this process
4907          * and mark the tables non-existent so
4908          * subsequent kqueue closes go faster.
4909          */
4910         knotes_dealloc(p);
4911         assert(fdp->fd_knlistsize == 0);
4912         assert(fdp->fd_knhashmask == 0);
4913
4914         for (i = fdp->fd_lastfile; i >= 0; i--) {
4915                 struct fileproc *fp = fdp->fd_ofiles[i];
4916                 char *flagp = &fdp->fd_ofileflags[i];
4917
4918                 if (fp && cloexec_default) {
4919                         /*
4920                          * Reverse the usual semantics of file descriptor
4921                          * inheritance - all of them should be closed
4922                          * except files marked explicitly as "inherit" and
4923                          * not marked close-on-exec.
4924                          */
4925                         if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4926                                 *flagp |= UF_EXCLOSE;
4927                         }
4928                         *flagp &= ~UF_INHERIT;
4929                 }
4930
4931                 if (
4932                         ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4933 #if CONFIG_MACF
4934                         || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
4935 #endif
4936                         ) {
4937                         procfdtbl_clearfd(p, i);
4938                         if (i == fdp->fd_lastfile && i > 0) {
4939                                 fdp->fd_lastfile--;
4940                         }
4941                         if (i < fdp->fd_freefile) {
4942                                 fdp->fd_freefile = i;
4943                         }
4944
4945                         /*
4946                          * Wait for any third party viewers (e.g., lsof)
4947                          * to release their references to this fileproc.
4948                          */
4949                         while (os_ref_get_count(&fp->f_iocount) > 1) {
4950                                 p->p_fpdrainwait = 1;
4951                                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
4952                                     "fpdrain", NULL);
4953                         }
4954                         if (fp->f_flags & FP_WAITEVENT) {
4955                                 (void)waitevent_close(p, fp);
4956                         }
4957                         closef_locked(fp, fp->f_fglob, p);
4958
4959                         fileproc_free(fp);
4960                 }
4961         }
4962
4963         /* release the per-process workq kq */
4964         if (fdp->fd_wqkqueue) {
4965                 dealloc_kqwq = fdp->fd_wqkqueue;
4966                 fdp->fd_wqkqueue = NULL;
4967         }
4968
4969         proc_fdunlock(p);
4970
4971         /* Anything to free? */
4972         if (dealloc_kqwq) {
4973                 kqworkq_dealloc(dealloc_kqwq);
4974         }
4975 }
4976
4977
4978 /*
4979  * fdcopy
4980  *
4981  * Description: Copy a filedesc structure.  This is normally used as part of
4982  *              forkproc() when forking a new process, to copy the per process
4983  *              open file table over to the new process.
4984  *
4985  * Parameters:  p                               Process whose open file table
4986  *                                              is to be copied (parent)
4987  *              uth_cdir                        Per thread current working
4988  *                                              cirectory, or NULL
4989  *
4990  * Returns:     NULL                            Copy failed
4991  *              !NULL                           Pointer to new struct filedesc
4992  *
4993  * Locks:       This function internally takes and drops proc_fdlock()
4994  *
4995  * Notes:       Files are copied directly, ignoring the new resource limits
4996  *              for the process that's being copied into.  Since the descriptor
4997  *              references are just additional references, this does not count
4998  *              against the number of open files on the system.
4999  *
5000  *              The struct filedesc includes the current working directory,
5001  *              and the current root directory, if the process is chroot'ed.
5002  *
5003  *              If the exec was called by a thread using a per thread current
5004  *              working directory, we inherit the working directory from the
5005  *              thread making the call, rather than from the process.
5006  *
5007  *              In the case of a failure to obtain a reference, for most cases,
5008  *              the file entry will be silently dropped.  There's an exception
5009  *              for the case of a chroot dir, since a failure to to obtain a
5010  *              reference there would constitute an "escape" from the chroot
5011  *              environment, which must not be allowed.  In that case, we will
5012  *              deny the execve() operation, rather than allowing the escape.
5013  */
5014 struct filedesc *
5015 fdcopy(proc_t p, vnode_t uth_cdir)
5016 {
5017         struct filedesc *newfdp, *fdp = p->p_fd;
5018         int i;
5019         struct fileproc *ofp, *fp;
5020         vnode_t v_dir;
5021
5022         MALLOC_ZONE(newfdp, struct filedesc *,
5023             sizeof(*newfdp), M_FILEDESC, M_WAITOK);
5024         if (newfdp == NULL) {
5025                 return NULL;
5026         }
5027
5028         proc_fdlock(p);
5029
5030         /*
5031          * the FD_CHROOT flag will be inherited via this copy
5032          */
5033         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
5034
5035         /*
5036          * If we are running with per-thread current working directories,
5037          * inherit the new current working directory from the current thread
5038          * instead, before we take our references.
5039          */
5040         if (uth_cdir != NULLVP) {
5041                 newfdp->fd_cdir = uth_cdir;
5042         }
5043
5044         /*
5045          * For both fd_cdir and fd_rdir make sure we get
5046          * a valid reference... if we can't, than set
5047          * set the pointer(s) to NULL in the child... this
5048          * will keep us from using a non-referenced vp
5049          * and allows us to do the vnode_rele only on
5050          * a properly referenced vp
5051          */
5052         if ((v_dir = newfdp->fd_cdir)) {
5053                 if (vnode_getwithref(v_dir) == 0) {
5054                         if ((vnode_ref(v_dir))) {
5055                                 newfdp->fd_cdir = NULL;
5056                         }
5057                         vnode_put(v_dir);
5058                 } else {
5059                         newfdp->fd_cdir = NULL;
5060                 }
5061         }
5062         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
5063                 /*
5064                  * we couldn't get a new reference on
5065                  * the current working directory being
5066                  * inherited... we might as well drop
5067                  * our reference from the parent also
5068                  * since the vnode has gone DEAD making
5069                  * it useless... by dropping it we'll
5070                  * be that much closer to recycling it
5071                  */
5072                 vnode_rele(fdp->fd_cdir);
5073                 fdp->fd_cdir = NULL;
5074         }
5075
5076         if ((v_dir = newfdp->fd_rdir)) {
5077                 if (vnode_getwithref(v_dir) == 0) {
5078                         if ((vnode_ref(v_dir))) {
5079                                 newfdp->fd_rdir = NULL;
5080                         }
5081                         vnode_put(v_dir);
5082                 } else {
5083                         newfdp->fd_rdir = NULL;
5084                 }
5085         }
5086         /* Coming from a chroot environment and unable to get a reference... */
5087         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
5088                 proc_fdunlock(p);
5089                 /*
5090                  * We couldn't get a new reference on
5091                  * the chroot directory being
5092                  * inherited... this is fatal, since
5093                  * otherwise it would constitute an
5094                  * escape from a chroot environment by
5095                  * the new process.
5096                  */
5097                 if (newfdp->fd_cdir) {
5098                         vnode_rele(newfdp->fd_cdir);
5099                 }
5100                 FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC);
5101                 return NULL;
5102         }
5103
5104         /*
5105          * If the number of open files fits in the internal arrays
5106          * of the open file structure, use them, otherwise allocate
5107          * additional memory for the number of descriptors currently
5108          * in use.
5109          */
5110         if (newfdp->fd_lastfile < NDFILE) {
5111                 i = NDFILE;
5112         } else {
5113                 /*
5114                  * Compute the smallest multiple of NDEXTENT needed
5115                  * for the file descriptors currently in use,
5116                  * allowing the table to shrink.
5117                  */
5118                 i = newfdp->fd_nfiles;
5119                 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
5120                         i /= 2;
5121                 }
5122         }
5123         proc_fdunlock(p);
5124
5125         MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **,
5126             i * OFILESIZE, M_OFILETABL, M_WAITOK);
5127         if (newfdp->fd_ofiles == NULL) {
5128                 if (newfdp->fd_cdir) {
5129                         vnode_rele(newfdp->fd_cdir);
5130                 }
5131                 if (newfdp->fd_rdir) {
5132                         vnode_rele(newfdp->fd_rdir);
5133                 }
5134
5135                 FREE_ZONE(newfdp, sizeof(*newfdp), M_FILEDESC);
5136                 return NULL;
5137         }
5138         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
5139         proc_fdlock(p);
5140
5141         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
5142         newfdp->fd_nfiles = i;
5143
5144         if (fdp->fd_nfiles > 0) {
5145                 struct fileproc **fpp;
5146                 char *flags;
5147
5148                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
5149                     (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
5150                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
5151                     (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
5152
5153                 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
5154                 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
5155                 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
5156                         if ((ofp = *fpp) != NULL &&
5157                             0 == (ofp->f_fglob->fg_lflags & FG_CONFINED) &&
5158                             0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
5159 #if DEBUG
5160                                 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
5161                                         panic("complex fileproc");
5162                                 }
5163 #endif
5164                                 fp = fileproc_alloc_init(NULL);
5165                                 if (fp == NULL) {
5166                                         /*
5167                                          * XXX no room to copy, unable to
5168                                          * XXX safely unwind state at present
5169                                          */
5170                                         *fpp = NULL;
5171                                 } else {
5172                                         fp->f_flags |=
5173                                             (ofp->f_flags & ~FP_TYPEMASK);
5174                                         fp->f_fglob = ofp->f_fglob;
5175                                         (void)fg_ref(fp);
5176                                         *fpp = fp;
5177                                 }
5178                         } else {
5179                                 *fpp = NULL;
5180                                 *flags = 0;
5181                         }
5182                         if (*fpp == NULL) {
5183                                 if (i == newfdp->fd_lastfile && i > 0) {
5184                                         newfdp->fd_lastfile--;
5185                                 }
5186                                 if (i < newfdp->fd_freefile) {
5187                                         newfdp->fd_freefile = i;
5188                                 }
5189                         }
5190                 }
5191         }
5192
5193         proc_fdunlock(p);
5194
5195         /*
5196          * Initialize knote and kqueue tracking structs
5197          */
5198         newfdp->fd_knlist = NULL;
5199         newfdp->fd_knlistsize = 0;
5200         newfdp->fd_knhash = NULL;
5201         newfdp->fd_knhashmask = 0;
5202         newfdp->fd_kqhash = NULL;
5203         newfdp->fd_kqhashmask = 0;
5204         newfdp->fd_wqkqueue = NULL;
5205         lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
5206         lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
5207
5208         return newfdp;
5209 }
5210
5211
5212 /*
5213  * fdfree
5214  *
5215  * Description: Release a filedesc (per process open file table) structure;
5216  *              this is done on process exit(), or from forkproc_free() if
5217  *              the fork fails for some reason subsequent to a successful
5218  *              call to fdcopy()
5219  *
5220  * Parameters:  p                               Pointer to process going away
5221  *
5222  * Returns:     void
5223  *
5224  * Locks:       This function internally takes and drops proc_fdlock()
5225  */
5226 void
5227 fdfree(proc_t p)
5228 {
5229         struct filedesc *fdp;
5230         struct fileproc *fp;
5231         struct kqworkq *dealloc_kqwq = NULL;
5232         int i;
5233
5234         proc_fdlock(p);
5235
5236         if (p == kernproc || NULL == (fdp = p->p_fd)) {
5237                 proc_fdunlock(p);
5238                 return;
5239         }
5240
5241         extern struct filedesc filedesc0;
5242
5243         if (&filedesc0 == fdp) {
5244                 panic("filedesc0");
5245         }
5246
5247         /*
5248          * deallocate all the knotes up front and claim empty
5249          * tables to make any subsequent kqueue closes faster.
5250          */
5251         knotes_dealloc(p);
5252         assert(fdp->fd_knlistsize == 0);
5253         assert(fdp->fd_knhashmask == 0);
5254
5255         /*
5256          * dealloc all workloops that have outstanding retains
5257          * when created with scheduling parameters.
5258          */
5259         kqworkloops_dealloc(p);
5260
5261         /* close file descriptors */
5262         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
5263                 for (i = fdp->fd_lastfile; i >= 0; i--) {
5264                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
5265                                 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
5266                                         panic("fdfree: found fp with UF_RESERVED");
5267                                 }
5268
5269                                 fileproc_drain(p, fp);
5270                                 procfdtbl_reservefd(p, i);
5271
5272                                 if (fp->f_flags & FP_WAITEVENT) {
5273                                         (void)waitevent_close(p, fp);
5274                                 }
5275                                 (void) closef_locked(fp, fp->f_fglob, p);
5276                                 fileproc_free(fp);
5277                         }
5278                 }
5279                 FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL);
5280                 fdp->fd_ofiles = NULL;
5281                 fdp->fd_nfiles = 0;
5282         }
5283
5284         if (fdp->fd_wqkqueue) {
5285                 dealloc_kqwq = fdp->fd_wqkqueue;
5286                 fdp->fd_wqkqueue = NULL;
5287         }
5288
5289         proc_fdunlock(p);
5290
5291         if (dealloc_kqwq) {
5292                 kqworkq_dealloc(dealloc_kqwq);
5293         }
5294         if (fdp->fd_cdir) {
5295                 vnode_rele(fdp->fd_cdir);
5296         }
5297         if (fdp->fd_rdir) {
5298                 vnode_rele(fdp->fd_rdir);
5299         }
5300
5301         proc_fdlock_spin(p);
5302         p->p_fd = NULL;
5303         proc_fdunlock(p);
5304
5305         if (fdp->fd_kqhash) {
5306                 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5307                         assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5308                 }
5309                 FREE(fdp->fd_kqhash, M_KQUEUE);
5310         }
5311
5312         lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5313         lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5314
5315         FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC);
5316 }
5317
5318 /*
5319  * closef_locked
5320  *
5321  * Description: Internal form of closef; called with proc_fdlock held
5322  *
5323  * Parameters:  fp                      Pointer to fileproc for fd
5324  *              fg                      Pointer to fileglob for fd
5325  *              p                       Pointer to proc structure
5326  *
5327  * Returns:     0                       Success
5328  *      closef_finish:???               Anything returnable by a per-fileops
5329  *                                      close function
5330  *
5331  * Note:        Decrements reference count on file structure; if this was the
5332  *              last reference, then closef_finish() is called
5333  *
5334  *              p and fp are allowed to  be NULL when closing a file that was
5335  *              being passed in a message (but only if we are called when this
5336  *              is NOT the last reference).
5337  */
5338 int
5339 closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
5340 {
5341         struct vnode *vp;
5342         struct flock lf;
5343         struct vfs_context context;
5344         int error;
5345
5346         if (fg == NULL) {
5347                 return 0;
5348         }
5349
5350         /* Set up context with cred stashed in fg */
5351         if (p == current_proc()) {
5352                 context.vc_thread = current_thread();
5353         } else {
5354                 context.vc_thread = NULL;
5355         }
5356         context.vc_ucred = fg->fg_cred;
5357
5358         /*
5359          * POSIX record locking dictates that any close releases ALL
5360          * locks owned by this process.  This is handled by setting
5361          * a flag in the unlock to free ONLY locks obeying POSIX
5362          * semantics, and not to free BSD-style file locks.
5363          * If the descriptor was in a message, POSIX-style locks
5364          * aren't passed with the descriptor.
5365          */
5366         if (p && (p->p_ladvflag & P_LADVLOCK) &&
5367             DTYPE_VNODE == FILEGLOB_DTYPE(fg)) {
5368                 proc_fdunlock(p);
5369
5370                 lf.l_whence = SEEK_SET;
5371                 lf.l_start = 0;
5372                 lf.l_len = 0;
5373                 lf.l_type = F_UNLCK;
5374                 vp = (struct vnode *)fg->fg_data;
5375
5376                 if ((error = vnode_getwithref(vp)) == 0) {
5377                         (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
5378                         (void)vnode_put(vp);
5379                 }
5380                 proc_fdlock(p);
5381         }
5382         lck_mtx_lock_spin(&fg->fg_lock);
5383         fg->fg_count--;
5384
5385         if (fg->fg_count > 0) {
5386                 lck_mtx_unlock(&fg->fg_lock);
5387                 return 0;
5388         }
5389 #if DIAGNOSTIC
5390         if (fg->fg_count != 0) {
5391                 panic("fg %p: being freed with bad fg_count (%d)", fg, fg->fg_count);
5392         }
5393 #endif
5394
5395         if (fp && (fp->f_flags & FP_WRITTEN)) {
5396                 fg->fg_flag |= FWASWRITTEN;
5397         }
5398
5399         fg->fg_lflags |= FG_TERM;
5400         lck_mtx_unlock(&fg->fg_lock);
5401
5402         if (p) {
5403                 proc_fdunlock(p);
5404         }
5405
5406         /* Since we ensure that fg->fg_ops is always initialized,
5407          * it is safe to invoke fo_close on the fg */
5408         error = fo_close(fg, &context);
5409
5410         fg_free(fg);
5411
5412         if (p) {
5413                 proc_fdlock(p);
5414         }
5415
5416         return error;
5417 }
5418
5419
5420 /*
5421  * fileproc_drain
5422  *
5423  * Description: Drain out pending I/O operations
5424  *
5425  * Parameters:  p                               Process closing this file
5426  *              fp                              fileproc struct for the open
5427  *                                              instance on the file
5428  *
5429  * Returns:     void
5430  *
5431  * Locks:       Assumes the caller holds the proc_fdlock
5432  *
5433  * Notes:       For character devices, this occurs on the last close of the
5434  *              device; for all other file descriptors, this occurs on each
5435  *              close to prevent fd's from being closed out from under
5436  *              operations currently in progress and blocked
5437  *
5438  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
5439  *              regarding their use and interaction with this function.
5440  */
5441 void
5442 fileproc_drain(proc_t p, struct fileproc * fp)
5443 {
5444         struct vfs_context context;
5445
5446         context.vc_thread = proc_thread(p);     /* XXX */
5447         context.vc_ucred = fp->f_fglob->fg_cred;
5448
5449         /* Set the vflag for drain */
5450         fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5451
5452         while (os_ref_get_count(&fp->f_iocount) > 1) {
5453                 lck_mtx_convert_spin(&p->p_fdmlock);
5454
5455                 fo_drain(fp, &context);
5456                 if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
5457                         if (waitq_wakeup64_all((struct waitq *)fp->f_wset, NO_EVENT64,
5458                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5459                                 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->f_wset, fp);
5460                         }
5461                 }
5462                 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5463                         if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5464                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5465                                 panic("bad select_conflict_queue");
5466                         }
5467                 }
5468                 p->p_fpdrainwait = 1;
5469
5470                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5471         }
5472 #if DIAGNOSTIC
5473         if ((fp->f_flags & FP_INSELECT) != 0) {
5474                 panic("FP_INSELECT set on drained fp");
5475         }
5476 #endif
5477         if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5478                 fp->f_flags &= ~FP_SELCONFLICT;
5479         }
5480 }
5481
5482
5483 /*
5484  * fp_free
5485  *
5486  * Description: Release the fd and free the fileproc associated with the fd
5487  *              in the per process open file table of the specified process;
5488  *              these values must correspond.
5489  *
5490  * Parameters:  p                               Process containing fd
5491  *              fd                              fd to be released
5492  *              fp                              fileproc to be freed
5493  */
5494 void
5495 fp_free(proc_t p, int fd, struct fileproc * fp)
5496 {
5497         proc_fdlock_spin(p);
5498         fdrelse(p, fd);
5499         proc_fdunlock(p);
5500
5501         fg_free(fp->f_fglob);
5502         os_ref_release_live(&fp->f_iocount);
5503         fileproc_free(fp);
5504 }
5505
5506
5507 /*
5508  * flock
5509  *
5510  * Description: Apply an advisory lock on a file descriptor.
5511  *
5512  * Parameters:  p                               Process making request
5513  *              uap->fd                         fd on which the lock is to be
5514  *                                              attempted
5515  *              uap->how                        (Un)Lock bits, including type
5516  *              retval                          Pointer to the call return area
5517  *
5518  * Returns:     0                               Success
5519  *      fp_getfvp:EBADF                         Bad file descriptor
5520  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5521  *      vnode_getwithref:???
5522  *      VNOP_ADVLOCK:???
5523  *
5524  * Implicit returns:
5525  *              *retval (modified)              Size of dtable
5526  *
5527  * Notes:       Just attempt to get a record lock of the requested type on
5528  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5529  */
5530 int
5531 flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5532 {
5533         int fd = uap->fd;
5534         int how = uap->how;
5535         struct fileproc *fp;
5536         struct vnode *vp;
5537         struct flock lf;
5538         vfs_context_t ctx = vfs_context_current();
5539         int error = 0;
5540
5541         AUDIT_ARG(fd, uap->fd);
5542         if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5543                 return error;
5544         }
5545         if ((error = vnode_getwithref(vp))) {
5546                 goto out1;
5547         }
5548         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5549
5550         lf.l_whence = SEEK_SET;
5551         lf.l_start = 0;
5552         lf.l_len = 0;
5553         if (how & LOCK_UN) {
5554                 lf.l_type = F_UNLCK;
5555                 fp->f_flag &= ~FHASLOCK;
5556                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5557                 goto out;
5558         }
5559         if (how & LOCK_EX) {
5560                 lf.l_type = F_WRLCK;
5561         } else if (how & LOCK_SH) {
5562                 lf.l_type = F_RDLCK;
5563         } else {
5564                 error = EBADF;
5565                 goto out;
5566         }
5567 #if CONFIG_MACF
5568         error = mac_file_check_lock(proc_ucred(p), fp->f_fglob, F_SETLK, &lf);
5569         if (error) {
5570                 goto out;
5571         }
5572 #endif
5573         error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf,
5574             (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5575             ctx, NULL);
5576         if (!error) {
5577                 fp->f_flag |= FHASLOCK;
5578         }
5579 out:
5580         (void)vnode_put(vp);
5581 out1:
5582         fp_drop(p, fd, fp, 0);
5583         return error;
5584 }
5585
5586 /*
5587  * fileport_makeport
5588  *
5589  * Description: Obtain a Mach send right for a given file descriptor.
5590  *
5591  * Parameters:  p               Process calling fileport
5592  *              uap->fd         The fd to reference
5593  *              uap->portnamep  User address at which to place port name.
5594  *
5595  * Returns:     0               Success.
5596  *              EBADF           Bad file descriptor.
5597  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
5598  *              EFAULT          Address at which to store port name is not valid.
5599  *              EAGAIN          Resource shortage.
5600  *
5601  * Implicit returns:
5602  *              On success, name of send right is stored at user-specified address.
5603  */
5604 int
5605 fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5606     __unused int *retval)
5607 {
5608         int err;
5609         int fd = uap->fd;
5610         user_addr_t user_portaddr = uap->portnamep;
5611         struct fileproc *fp = FILEPROC_NULL;
5612         struct fileglob *fg = NULL;
5613         ipc_port_t fileport;
5614         mach_port_name_t name = MACH_PORT_NULL;
5615
5616         proc_fdlock(p);
5617         err = fp_lookup(p, fd, &fp, 1);
5618         if (err != 0) {
5619                 goto out_unlock;
5620         }
5621
5622         if (!file_issendable(p, fp)) {
5623                 err = EINVAL;
5624                 goto out_unlock;
5625         }
5626
5627         if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5628                 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5629                 goto out_unlock;
5630         }
5631
5632         /* Dropped when port is deallocated */
5633         fg = fp->f_fglob;
5634         fg_ref(fp);
5635
5636         proc_fdunlock(p);
5637
5638         /* Allocate and initialize a port */
5639         fileport = fileport_alloc(fg);
5640         if (fileport == IPC_PORT_NULL) {
5641                 err = EAGAIN;
5642                 fg_drop(fp);
5643                 goto out;
5644         }
5645
5646         /* Add an entry.  Deallocates port on failure. */
5647         name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5648         if (!MACH_PORT_VALID(name)) {
5649                 err = EINVAL;
5650                 goto out;
5651         }
5652
5653         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5654         if (err != 0) {
5655                 goto out;
5656         }
5657
5658         /* Tag the fileglob for debugging purposes */
5659         lck_mtx_lock_spin(&fg->fg_lock);
5660         fg->fg_lflags |= FG_PORTMADE;
5661         lck_mtx_unlock(&fg->fg_lock);
5662
5663         fp_drop(p, fd, fp, 0);
5664
5665         return 0;
5666
5667 out_unlock:
5668         proc_fdunlock(p);
5669 out:
5670         if (MACH_PORT_VALID(name)) {
5671                 /* Don't care if another thread races us to deallocate the entry */
5672                 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5673         }
5674
5675         if (fp != FILEPROC_NULL) {
5676                 fp_drop(p, fd, fp, 0);
5677         }
5678
5679         return err;
5680 }
5681
5682 void
5683 fileport_releasefg(struct fileglob *fg)
5684 {
5685         (void)closef_locked(NULL, fg, PROC_NULL);
5686
5687         return;
5688 }
5689
5690 /*
5691  * fileport_makefd_internal
5692  *
5693  * Description: Obtain the file descriptor for a given Mach send right.
5694  *
5695  * Returns:     0               Success
5696  *              EINVAL          Invalid Mach port name, or port is not for a file.
5697  *      fdalloc:EMFILE
5698  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5699  *
5700  * Implicit returns:
5701  *              *retval (modified)              The new descriptor
5702  */
5703 int
5704 fileport_makefd_internal(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5705 {
5706         struct fileglob *fg;
5707         struct fileproc *fp = FILEPROC_NULL;
5708         int fd;
5709         int err;
5710
5711         fg = fileport_port_to_fileglob(port);
5712         if (fg == NULL) {
5713                 err = EINVAL;
5714                 goto out;
5715         }
5716
5717         fp = fileproc_alloc_init(NULL);
5718         if (fp == FILEPROC_NULL) {
5719                 err = ENOMEM;
5720                 goto out;
5721         }
5722
5723         fp->f_fglob = fg;
5724         fg_ref(fp);
5725
5726         proc_fdlock(p);
5727         err = fdalloc(p, 0, &fd);
5728         if (err != 0) {
5729                 proc_fdunlock(p);
5730                 fg_drop(fp);
5731                 goto out;
5732         }
5733         if (uf_flags) {
5734                 *fdflags(p, fd) |= uf_flags;
5735         }
5736
5737         procfdtbl_releasefd(p, fd, fp);
5738         proc_fdunlock(p);
5739
5740         *retval = fd;
5741         err = 0;
5742 out:
5743         if ((fp != NULL) && (0 != err)) {
5744                 fileproc_free(fp);
5745         }
5746
5747         return err;
5748 }
5749
5750 /*
5751  * fileport_makefd
5752  *
5753  * Description: Obtain the file descriptor for a given Mach send right.
5754  *
5755  * Parameters:  p               Process calling fileport
5756  *              uap->port       Name of send right to file port.
5757  *
5758  * Returns:     0               Success
5759  *              EINVAL          Invalid Mach port name, or port is not for a file.
5760  *      fdalloc:EMFILE
5761  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5762  *
5763  * Implicit returns:
5764  *              *retval (modified)              The new descriptor
5765  */
5766 int
5767 fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5768 {
5769         ipc_port_t port = IPC_PORT_NULL;
5770         mach_port_name_t send = uap->port;
5771         kern_return_t res;
5772         int err;
5773
5774         res = ipc_object_copyin(get_task_ipcspace(p->task),
5775             send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
5776
5777         if (res == KERN_SUCCESS) {
5778                 err = fileport_makefd_internal(p, port, UF_EXCLOSE, retval);
5779         } else {
5780                 err = EINVAL;
5781         }
5782
5783         if (IPC_PORT_NULL != port) {
5784                 ipc_port_release_send(port);
5785         }
5786
5787         return err;
5788 }
5789
5790
5791 /*
5792  * dupfdopen
5793  *
5794  * Description: Duplicate the specified descriptor to a free descriptor;
5795  *              this is the second half of fdopen(), above.
5796  *
5797  * Parameters:  fdp                             filedesc pointer to fill in
5798  *              indx                            fd to dup to
5799  *              dfd                             fd to dup from
5800  *              mode                            mode to set on new fd
5801  *              error                           command code
5802  *
5803  * Returns:     0                               Success
5804  *              EBADF                           Source fd is bad
5805  *              EACCES                          Requested mode not allowed
5806  *              !0                              'error', if not ENODEV or
5807  *                                              ENXIO
5808  *
5809  * Notes:       XXX This is not thread safe; see fdopen() above
5810  */
5811 int
5812 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5813 {
5814         struct fileproc *wfp;
5815         struct fileproc *fp;
5816 #if CONFIG_MACF
5817         int myerror;
5818 #endif
5819         proc_t p = current_proc();
5820
5821         /*
5822          * If the to-be-dup'd fd number is greater than the allowed number
5823          * of file descriptors, or the fd to be dup'd has already been
5824          * closed, reject.  Note, check for new == old is necessary as
5825          * falloc could allocate an already closed to-be-dup'd descriptor
5826          * as the new descriptor.
5827          */
5828         proc_fdlock(p);
5829
5830         fp = fdp->fd_ofiles[indx];
5831         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5832             (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5833             (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5834                 proc_fdunlock(p);
5835                 return EBADF;
5836         }
5837 #if CONFIG_MACF
5838         myerror = mac_file_check_dup(proc_ucred(p), wfp->f_fglob, dfd);
5839         if (myerror) {
5840                 proc_fdunlock(p);
5841                 return myerror;
5842         }
5843 #endif
5844         /*
5845          * There are two cases of interest here.
5846          *
5847          * For ENODEV simply dup (dfd) to file descriptor
5848          * (indx) and return.
5849          *
5850          * For ENXIO steal away the file structure from (dfd) and
5851          * store it in (indx).  (dfd) is effectively closed by
5852          * this operation.
5853          *
5854          * Any other error code is just returned.
5855          */
5856         switch (error) {
5857         case ENODEV:
5858                 if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5859                         proc_fdunlock(p);
5860                         return EPERM;
5861                 }
5862
5863                 /*
5864                  * Check that the mode the file is being opened for is a
5865                  * subset of the mode of the existing descriptor.
5866                  */
5867                 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5868                         proc_fdunlock(p);
5869                         return EACCES;
5870                 }
5871                 if (indx > fdp->fd_lastfile) {
5872                         fdp->fd_lastfile = indx;
5873                 }
5874                 (void)fg_ref(wfp);
5875
5876                 if (fp->f_fglob) {
5877                         fg_free(fp->f_fglob);
5878                 }
5879                 fp->f_fglob = wfp->f_fglob;
5880
5881                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5882                     (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5883
5884                 proc_fdunlock(p);
5885                 return 0;
5886
5887         default:
5888                 proc_fdunlock(p);
5889                 return error;
5890         }
5891         /* NOTREACHED */
5892 }
5893
5894
5895 /*
5896  * fg_ref
5897  *
5898  * Description: Add a reference to a fileglob by fileproc
5899  *
5900  * Parameters:  fp                              fileproc containing fileglob
5901  *                                              pointer
5902  *
5903  * Returns:     void
5904  *
5905  * Notes:       XXX Should use OSAddAtomic?
5906  */
5907 void
5908 fg_ref(struct fileproc * fp)
5909 {
5910         struct fileglob *fg;
5911
5912         fg = fp->f_fglob;
5913
5914         lck_mtx_lock_spin(&fg->fg_lock);
5915
5916 #if DIAGNOSTIC
5917         if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0) {
5918                 panic("fg_ref: invalid bits on fp %p", fp);
5919         }
5920
5921         if (fg->fg_count == 0) {
5922                 panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
5923                     fp, fg);
5924         }
5925 #endif
5926         fg->fg_count++;
5927         lck_mtx_unlock(&fg->fg_lock);
5928 }
5929
5930
5931 /*
5932  * fg_drop
5933  *
5934  * Description: Remove a reference to a fileglob by fileproc
5935  *
5936  * Parameters:  fp                              fileproc containing fileglob
5937  *                                              pointer
5938  *
5939  * Returns:     void
5940  *
5941  * Notes:       XXX Should use OSAddAtomic?
5942  */
5943 void
5944 fg_drop(struct fileproc * fp)
5945 {
5946         struct fileglob *fg;
5947
5948         fg = fp->f_fglob;
5949         lck_mtx_lock_spin(&fg->fg_lock);
5950         fg->fg_count--;
5951         lck_mtx_unlock(&fg->fg_lock);
5952 }
5953
5954 #if SOCKETS
5955 /*
5956  * fg_insertuipc_mark
5957  *
5958  * Description: Mark fileglob for insertion onto message queue if needed
5959  *              Also takes fileglob reference
5960  *
5961  * Parameters:  fg      Fileglob pointer to insert
5962  *
5963  * Returns:     true, if the fileglob needs to be inserted onto msg queue
5964  *
5965  * Locks:       Takes and drops fg_lock, potentially many times
5966  */
5967 boolean_t
5968 fg_insertuipc_mark(struct fileglob * fg)
5969 {
5970         boolean_t insert = FALSE;
5971
5972         lck_mtx_lock_spin(&fg->fg_lock);
5973         while (fg->fg_lflags & FG_RMMSGQ) {
5974                 lck_mtx_convert_spin(&fg->fg_lock);
5975
5976                 fg->fg_lflags |= FG_WRMMSGQ;
5977                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
5978         }
5979
5980         fg->fg_count++;
5981         fg->fg_msgcount++;
5982         if (fg->fg_msgcount == 1) {
5983                 fg->fg_lflags |= FG_INSMSGQ;
5984                 insert = TRUE;
5985         }
5986         lck_mtx_unlock(&fg->fg_lock);
5987         return insert;
5988 }
5989
5990 /*
5991  * fg_insertuipc
5992  *
5993  * Description: Insert marked fileglob onto message queue
5994  *
5995  * Parameters:  fg      Fileglob pointer to insert
5996  *
5997  * Returns:     void
5998  *
5999  * Locks:       Takes and drops fg_lock & uipc_lock
6000  *              DO NOT call this function with proc_fdlock held as unp_gc()
6001  *              can potentially try to acquire proc_fdlock, which can result
6002  *              in a deadlock if this function is in unp_gc_wait().
6003  */
6004 void
6005 fg_insertuipc(struct fileglob * fg)
6006 {
6007         if (fg->fg_lflags & FG_INSMSGQ) {
6008                 lck_mtx_lock_spin(uipc_lock);
6009                 unp_gc_wait();
6010                 LIST_INSERT_HEAD(&fmsghead, fg, f_msglist);
6011                 lck_mtx_unlock(uipc_lock);
6012                 lck_mtx_lock(&fg->fg_lock);
6013                 fg->fg_lflags &= ~FG_INSMSGQ;
6014                 if (fg->fg_lflags & FG_WINSMSGQ) {
6015                         fg->fg_lflags &= ~FG_WINSMSGQ;
6016                         wakeup(&fg->fg_lflags);
6017                 }
6018                 lck_mtx_unlock(&fg->fg_lock);
6019         }
6020 }
6021
6022 /*
6023  * fg_removeuipc_mark
6024  *
6025  * Description: Mark the fileglob for removal from message queue if needed
6026  *              Also releases fileglob message queue reference
6027  *
6028  * Parameters:  fg      Fileglob pointer to remove
6029  *
6030  * Returns:     true, if the fileglob needs to be removed from msg queue
6031  *
6032  * Locks:       Takes and drops fg_lock, potentially many times
6033  */
6034 boolean_t
6035 fg_removeuipc_mark(struct fileglob * fg)
6036 {
6037         boolean_t remove = FALSE;
6038
6039         lck_mtx_lock_spin(&fg->fg_lock);
6040         while (fg->fg_lflags & FG_INSMSGQ) {
6041                 lck_mtx_convert_spin(&fg->fg_lock);
6042
6043                 fg->fg_lflags |= FG_WINSMSGQ;
6044                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
6045         }
6046         fg->fg_msgcount--;
6047         if (fg->fg_msgcount == 0) {
6048                 fg->fg_lflags |= FG_RMMSGQ;
6049                 remove = TRUE;
6050         }
6051         lck_mtx_unlock(&fg->fg_lock);
6052         return remove;
6053 }
6054
6055 /*
6056  * fg_removeuipc
6057  *
6058  * Description: Remove marked fileglob from message queue
6059  *
6060  * Parameters:  fg      Fileglob pointer to remove
6061  *
6062  * Returns:     void
6063  *
6064  * Locks:       Takes and drops fg_lock & uipc_lock
6065  *              DO NOT call this function with proc_fdlock held as unp_gc()
6066  *              can potentially try to acquire proc_fdlock, which can result
6067  *              in a deadlock if this function is in unp_gc_wait().
6068  */
6069 void
6070 fg_removeuipc(struct fileglob * fg)
6071 {
6072         if (fg->fg_lflags & FG_RMMSGQ) {
6073                 lck_mtx_lock_spin(uipc_lock);
6074                 unp_gc_wait();
6075                 LIST_REMOVE(fg, f_msglist);
6076                 lck_mtx_unlock(uipc_lock);
6077                 lck_mtx_lock(&fg->fg_lock);
6078                 fg->fg_lflags &= ~FG_RMMSGQ;
6079                 if (fg->fg_lflags & FG_WRMMSGQ) {
6080                         fg->fg_lflags &= ~FG_WRMMSGQ;
6081                         wakeup(&fg->fg_lflags);
6082                 }
6083                 lck_mtx_unlock(&fg->fg_lock);
6084         }
6085 }
6086 #endif /* SOCKETS */
6087
6088 /*
6089  * fo_read
6090  *
6091  * Description: Generic fileops read indirected through the fileops pointer
6092  *              in the fileproc structure
6093  *
6094  * Parameters:  fp                              fileproc structure pointer
6095  *              uio                             user I/O structure pointer
6096  *              flags                           FOF_ flags
6097  *              ctx                             VFS context for operation
6098  *
6099  * Returns:     0                               Success
6100  *              !0                              Errno from read
6101  */
6102 int
6103 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6104 {
6105         return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
6106 }
6107
6108 int
6109 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6110 {
6111 #pragma unused(fp, uio, flags, ctx)
6112         return ENXIO;
6113 }
6114
6115
6116 /*
6117  * fo_write
6118  *
6119  * Description: Generic fileops write indirected through the fileops pointer
6120  *              in the fileproc structure
6121  *
6122  * Parameters:  fp                              fileproc structure pointer
6123  *              uio                             user I/O structure pointer
6124  *              flags                           FOF_ flags
6125  *              ctx                             VFS context for operation
6126  *
6127  * Returns:     0                               Success
6128  *              !0                              Errno from write
6129  */
6130 int
6131 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6132 {
6133         return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
6134 }
6135
6136 int
6137 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
6138 {
6139 #pragma unused(fp, uio, flags, ctx)
6140         return ENXIO;
6141 }
6142
6143
6144 /*
6145  * fo_ioctl
6146  *
6147  * Description: Generic fileops ioctl indirected through the fileops pointer
6148  *              in the fileproc structure
6149  *
6150  * Parameters:  fp                              fileproc structure pointer
6151  *              com                             ioctl command
6152  *              data                            pointer to internalized copy
6153  *                                              of user space ioctl command
6154  *                                              parameter data in kernel space
6155  *              ctx                             VFS context for operation
6156  *
6157  * Returns:     0                               Success
6158  *              !0                              Errno from ioctl
6159  *
6160  * Locks:       The caller is assumed to have held the proc_fdlock; this
6161  *              function releases and reacquires this lock.  If the caller
6162  *              accesses data protected by this lock prior to calling this
6163  *              function, it will need to revalidate/reacquire any cached
6164  *              protected data obtained prior to the call.
6165  */
6166 int
6167 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6168 {
6169         int error;
6170
6171         proc_fdunlock(vfs_context_proc(ctx));
6172         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
6173         proc_fdlock(vfs_context_proc(ctx));
6174         return error;
6175 }
6176
6177 int
6178 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
6179 {
6180 #pragma unused(fp, com, data, ctx)
6181         return ENOTTY;
6182 }
6183
6184
6185 /*
6186  * fo_select
6187  *
6188  * Description: Generic fileops select indirected through the fileops pointer
6189  *              in the fileproc structure
6190  *
6191  * Parameters:  fp                              fileproc structure pointer
6192  *              which                           select which
6193  *              wql                             pointer to wait queue list
6194  *              ctx                             VFS context for operation
6195  *
6196  * Returns:     0                               Success
6197  *              !0                              Errno from select
6198  */
6199 int
6200 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6201 {
6202         return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
6203 }
6204
6205 int
6206 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
6207 {
6208 #pragma unused(fp, which, wql, ctx)
6209         return ENOTSUP;
6210 }
6211
6212
6213 /*
6214  * fo_close
6215  *
6216  * Description: Generic fileops close indirected through the fileops pointer
6217  *              in the fileproc structure
6218  *
6219  * Parameters:  fp                              fileproc structure pointer for
6220  *                                              file to close
6221  *              ctx                             VFS context for operation
6222  *
6223  * Returns:     0                               Success
6224  *              !0                              Errno from close
6225  */
6226 int
6227 fo_close(struct fileglob *fg, vfs_context_t ctx)
6228 {
6229         return (*fg->fg_ops->fo_close)(fg, ctx);
6230 }
6231
6232
6233 /*
6234  * fo_drain
6235  *
6236  * Description: Generic fileops kqueue filter indirected through the fileops
6237  *              pointer in the fileproc structure
6238  *
6239  * Parameters:  fp                              fileproc structure pointer
6240  *              ctx                             VFS context for operation
6241  *
6242  * Returns:     0                               Success
6243  *              !0                              errno from drain
6244  */
6245 int
6246 fo_drain(struct fileproc *fp, vfs_context_t ctx)
6247 {
6248         return (*fp->f_ops->fo_drain)(fp, ctx);
6249 }
6250
6251 int
6252 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
6253 {
6254 #pragma unused(fp, ctx)
6255         return ENOTSUP;
6256 }
6257
6258
6259 /*
6260  * fo_kqfilter
6261  *
6262  * Description: Generic fileops kqueue filter indirected through the fileops
6263  *              pointer in the fileproc structure
6264  *
6265  * Parameters:  fp                              fileproc structure pointer
6266  *              kn                              pointer to knote to filter on
6267  *
6268  * Returns:     (kn->kn_flags & EV_ERROR)       error in kn->kn_data
6269  *              0                               Filter is not active
6270  *              !0                              Filter is active
6271  */
6272 int
6273 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6274 {
6275         return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
6276 }
6277
6278 int
6279 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6280 {
6281 #pragma unused(fp, kev)
6282         knote_set_error(kn, ENOTSUP);
6283         return 0;
6284 }
6285
6286
6287 /*
6288  * The ability to send a file descriptor to another
6289  * process is opt-in by file type.
6290  */
6291 boolean_t
6292 file_issendable(proc_t p, struct fileproc *fp)
6293 {
6294         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
6295
6296         switch (fp->f_type) {
6297         case DTYPE_VNODE:
6298         case DTYPE_SOCKET:
6299         case DTYPE_PIPE:
6300         case DTYPE_PSXSHM:
6301         case DTYPE_NETPOLICY:
6302                 return 0 == (fp->f_fglob->fg_lflags & FG_CONFINED);
6303         default:
6304                 /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSEM */
6305                 return FALSE;
6306         }
6307 }
6308
6309 os_refgrp_decl(, f_iocount_refgrp, "f_iocount", NULL);
6310
6311 struct fileproc *
6312 fileproc_alloc_init(__unused void *arg)
6313 {
6314         struct fileproc *fp;
6315
6316         MALLOC_ZONE(fp, struct fileproc *, sizeof(*fp), M_FILEPROC, M_WAITOK);
6317         if (fp) {
6318                 bzero(fp, sizeof(*fp));
6319                 os_ref_init(&fp->f_iocount, &f_iocount_refgrp);
6320         }
6321
6322         return fp;
6323 }
6324
6325
6326 void
6327 fileproc_free(struct fileproc *fp)
6328 {
6329         os_ref_count_t __unused refc = os_ref_release(&fp->f_iocount);
6330 #if DEVELOPMENT || DEBUG
6331         if (0 != refc) {
6332                 panic("%s: pid %d refc: %u != 0",
6333                     __func__, proc_pid(current_proc()), refc);
6334         }
6335 #endif
6336         switch (FILEPROC_TYPE(fp)) {
6337         case FTYPE_SIMPLE:
6338                 FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
6339                 break;
6340         case FTYPE_GUARDED:
6341                 guarded_fileproc_free(fp);
6342                 break;
6343         default:
6344                 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags);
6345         }
6346 }
6347
6348 void
6349 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
6350 {
6351         if (clearflags) {
6352                 os_atomic_andnot(&fp->f_vflags, vflags, relaxed);
6353         } else {
6354                 os_atomic_or(&fp->f_vflags, vflags, relaxed);
6355         }
6356 }
6357
6358 fileproc_vflags_t
6359 fileproc_get_vflags(struct fileproc *fp)
6360 {
6361         return os_atomic_load(&fp->f_vflags, relaxed);
6362 }