bsd/kern/kern_descrip.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/file_internal.h>
  83 #include <sys/guarded.h>
  84 #include <sys/priv.h>
  85 #include <sys/socket.h>
  86 #include <sys/socketvar.h>
  87 #include <sys/stat.h>
  88 #include <sys/ioctl.h>
  89 #include <sys/fcntl.h>
  90 #include <sys/fsctl.h>
  91 #include <sys/malloc.h>
  92 #include <sys/mman.h>
  93 #include <sys/syslog.h>
  94 #include <sys/unistd.h>
  95 #include <sys/resourcevar.h>
  96 #include <sys/aio_kern.h>
  97 #include <sys/ev.h>
  98 #include <kern/locks.h>
  99 #include <sys/uio_internal.h>
 100 #include <sys/codesign.h>
 101 #include <sys/codedir_internal.h>
 102 #include <sys/mount_internal.h>
 103 #include <sys/kdebug.h>
 104 #include <sys/sysproto.h>
 105 #include <sys/pipe.h>
 106 #include <sys/spawn.h>
 107 #include <sys/cprotect.h>
 108 #include <sys/ubc_internal.h>
 109
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112 #include <kern/waitq.h>
 113 #include <kern/ipc_misc.h>
 114
 115 #include <vm/vm_protos.h>
 116 #include <mach/mach_port.h>
 117
 118 #include <security/audit/audit.h>
 119 #if CONFIG_MACF
 120 #include <security/mac_framework.h>
 121 #endif
 122
 123 #include <stdbool.h>
 124 #include <os/atomic_private.h>
 125 #include <IOKit/IOBSD.h>
 126
 127 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
 128 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
 129     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
 130 void ipc_port_release_send(ipc_port_t);
 131
 132 static void fileproc_drain(proc_t, struct fileproc *);
 133 static int finishdup(proc_t p,
 134     struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 135
 136 void fileport_releasefg(struct fileglob *fg);
 137
 138 /* flags for fp_close_and_unlock */
 139 #define FD_DUP2RESV 1
 140
 141 /* We don't want these exported */
 142
 143 __private_extern__
 144 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
 145
 146 static void fdrelse(struct proc * p, int fd);
 147
 148 extern void file_lock_init(void);
 149
 150 extern kauth_scope_t    kauth_scope_fileop;
 151
 152 /* Conflict wait queue for when selects collide (opaque type) */
 153 extern struct waitq select_conflict_queue;
 154
 155 #ifndef HFS_GET_BOOT_INFO
 156 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
 157 #endif
 158
 159 #ifndef HFS_SET_BOOT_INFO
 160 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
 161 #endif
 162
 163 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
 164 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
 165 #endif
 166
 167 #define f_flag fp_glob->fg_flag
 168 #define f_type fp_glob->fg_ops->fo_type
 169 #define f_cred fp_glob->fg_cred
 170 #define f_ops fp_glob->fg_ops
 171 #define f_offset fp_glob->fg_offset
 172 #define f_data fp_glob->fg_data
 173 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
 174                 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
 175                 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
 176                 ? 1 : 0)
 177
 178 ZONE_DECLARE(fg_zone, "fileglob",
 179     sizeof(struct fileglob), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 180 ZONE_DECLARE(fp_zone, "fileproc",
 181     sizeof(struct fileproc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 182 ZONE_DECLARE(fdp_zone, "filedesc",
 183     sizeof(struct filedesc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 184
 185 /*
 186  * Descriptor management.
 187  */
 188 int nfiles;                     /* actual number of open files */
 189 /*
 190  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
 191  */
 192 static const struct fileops uninitops;
 193
 194 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
 195 lck_grp_attr_t * file_lck_grp_attr;
 196 lck_grp_t * file_lck_grp;
 197 lck_attr_t * file_lck_attr;
 198
 199 #pragma mark fileglobs
 200
 201 /*!
 202  * @function fg_free
 203  *
 204  * @brief
 205  * Free a file structure.
 206  */
 207 static void
 208 fg_free(struct fileglob *fg)
 209 {
 210         os_atomic_dec(&nfiles, relaxed);
 211
 212         if (fg->fg_vn_data) {
 213                 fg_vn_data_free(fg->fg_vn_data);
 214                 fg->fg_vn_data = NULL;
 215         }
 216
 217         if (IS_VALID_CRED(fg->fg_cred)) {
 218                 kauth_cred_unref(&fg->fg_cred);
 219         }
 220         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
 221
 222 #if CONFIG_MACF
 223         mac_file_label_destroy(fg);
 224 #endif
 225         zfree(fg_zone, fg);
 226 }
 227
 228 OS_ALWAYS_INLINE
 229 void
 230 fg_ref(proc_t p, struct fileglob *fg)
 231 {
 232 #if DEBUG || DEVELOPMENT
 233         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 234 #else
 235         (void)p;
 236 #endif
 237         os_ref_retain_raw(&fg->fg_count, &f_refgrp);
 238 }
 239
 240 void
 241 fg_drop_live(struct fileglob *fg)
 242 {
 243         os_ref_release_live_raw(&fg->fg_count, &f_refgrp);
 244 }
 245
 246 int
 247 fg_drop(proc_t p, struct fileglob *fg)
 248 {
 249         struct vnode *vp;
 250         struct vfs_context context;
 251         int error = 0;
 252
 253         if (fg == NULL) {
 254                 return 0;
 255         }
 256
 257         /* Set up context with cred stashed in fg */
 258         if (p == current_proc()) {
 259                 context.vc_thread = current_thread();
 260         } else {
 261                 context.vc_thread = NULL;
 262         }
 263         context.vc_ucred = fg->fg_cred;
 264
 265         /*
 266          * POSIX record locking dictates that any close releases ALL
 267          * locks owned by this process.  This is handled by setting
 268          * a flag in the unlock to free ONLY locks obeying POSIX
 269          * semantics, and not to free BSD-style file locks.
 270          * If the descriptor was in a message, POSIX-style locks
 271          * aren't passed with the descriptor.
 272          */
 273         if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
 274             (p->p_ladvflag & P_LADVLOCK)) {
 275                 struct flock lf = {
 276                         .l_whence = SEEK_SET,
 277                         .l_type = F_UNLCK,
 278                 };
 279
 280                 vp = (struct vnode *)fg->fg_data;
 281                 if ((error = vnode_getwithref(vp)) == 0) {
 282                         (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
 283                         (void)vnode_put(vp);
 284                 }
 285         }
 286
 287         if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
 288                 /*
 289                  * Since we ensure that fg->fg_ops is always initialized,
 290                  * it is safe to invoke fo_close on the fg
 291                  */
 292                 error = fo_close(fg, &context);
 293
 294                 fg_free(fg);
 295         }
 296
 297         return error;
 298 }
 299
 300 /*
 301  * fg_get_vnode
 302  *
 303  * Description: Return vnode associated with the file structure, if
 304  *              any.  The lifetime of the returned vnode is bound to
 305  *              the lifetime of the file structure.
 306  *
 307  * Parameters:  fg                              Pointer to fileglob to
 308  *                                              inspect
 309  *
 310  * Returns:     vnode_t
 311  */
 312 vnode_t
 313 fg_get_vnode(struct fileglob *fg)
 314 {
 315         if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
 316                 return (vnode_t)fg->fg_data;
 317         } else {
 318                 return NULL;
 319         }
 320 }
 321
 322 bool
 323 fg_sendable(struct fileglob *fg)
 324 {
 325         switch (FILEGLOB_DTYPE(fg)) {
 326         case DTYPE_VNODE:
 327         case DTYPE_SOCKET:
 328         case DTYPE_PIPE:
 329         case DTYPE_PSXSHM:
 330         case DTYPE_NETPOLICY:
 331                 return (fg->fg_lflags & FG_CONFINED) == 0;
 332
 333         default:
 334                 return false;
 335         }
 336 }
 337
 338
 339 #pragma mark fileprocs
 340
 341 /*
 342  * check_file_seek_range
 343  *
 344  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
 345  *
 346  * Parameters:  fl              Flock structure.
 347  *              cur_file_offset Current offset in the file.
 348  *
 349  * Returns:     0               on Success.
 350  *              EOVERFLOW       on overflow.
 351  *              EINVAL          on offset less than zero.
 352  */
 353
 354 static int
 355 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
 356 {
 357         if (fl->l_whence == SEEK_CUR) {
 358                 /* Check if the start marker is beyond LLONG_MAX. */
 359                 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
 360                         /* Check if start marker is negative */
 361                         if (fl->l_start < 0) {
 362                                 return EINVAL;
 363                         }
 364                         return EOVERFLOW;
 365                 }
 366                 /* Check if the start marker is negative. */
 367                 if (fl->l_start + cur_file_offset < 0) {
 368                         return EINVAL;
 369                 }
 370                 /* Check if end marker is beyond LLONG_MAX. */
 371                 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
 372                     cur_file_offset, fl->l_len - 1))) {
 373                         return EOVERFLOW;
 374                 }
 375                 /* Check if the end marker is negative. */
 376                 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
 377                     fl->l_len < 0)) {
 378                         return EINVAL;
 379                 }
 380         } else if (fl->l_whence == SEEK_SET) {
 381                 /* Check if the start marker is negative. */
 382                 if (fl->l_start < 0) {
 383                         return EINVAL;
 384                 }
 385                 /* Check if the end marker is beyond LLONG_MAX. */
 386                 if ((fl->l_len > 0) &&
 387                     CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
 388                         return EOVERFLOW;
 389                 }
 390                 /* Check if the end marker is negative. */
 391                 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
 392                         return EINVAL;
 393                 }
 394         }
 395         return 0;
 396 }
 397
 398
 399 /*
 400  * file_lock_init
 401  *
 402  * Description: Initialize the file lock group and the uipc and flist locks
 403  *
 404  * Parameters:  (void)
 405  *
 406  * Returns:     void
 407  *
 408  * Notes:       Called at system startup from bsd_init().
 409  */
 410 void
 411 file_lock_init(void)
 412 {
 413         /* allocate file lock group attribute and group */
 414         file_lck_grp_attr = lck_grp_attr_alloc_init();
 415
 416         file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
 417
 418         /* Allocate file lock attribute */
 419         file_lck_attr = lck_attr_alloc_init();
 420 }
 421
 422
 423 void
 424 proc_dirs_lock_shared(proc_t p)
 425 {
 426         lck_rw_lock_shared(&p->p_dirs_lock);
 427 }
 428
 429 void
 430 proc_dirs_unlock_shared(proc_t p)
 431 {
 432         lck_rw_unlock_shared(&p->p_dirs_lock);
 433 }
 434
 435 void
 436 proc_dirs_lock_exclusive(proc_t p)
 437 {
 438         lck_rw_lock_exclusive(&p->p_dirs_lock);
 439 }
 440
 441 void
 442 proc_dirs_unlock_exclusive(proc_t p)
 443 {
 444         lck_rw_unlock_exclusive(&p->p_dirs_lock);
 445 }
 446
 447 /*
 448  * proc_fdlock, proc_fdlock_spin
 449  *
 450  * Description: Lock to control access to the per process struct fileproc
 451  *              and struct filedesc
 452  *
 453  * Parameters:  p                               Process to take the lock on
 454  *
 455  * Returns:     void
 456  *
 457  * Notes:       The lock is initialized in forkproc() and destroyed in
 458  *              reap_child_process().
 459  */
 460 void
 461 proc_fdlock(proc_t p)
 462 {
 463         lck_mtx_lock(&p->p_fdmlock);
 464 }
 465
 466 void
 467 proc_fdlock_spin(proc_t p)
 468 {
 469         lck_mtx_lock_spin(&p->p_fdmlock);
 470 }
 471
 472 void
 473 proc_fdlock_assert(proc_t p, int assertflags)
 474 {
 475         lck_mtx_assert(&p->p_fdmlock, assertflags);
 476 }
 477
 478
 479 /*
 480  * proc_fdunlock
 481  *
 482  * Description: Unlock the lock previously locked by a call to proc_fdlock()
 483  *
 484  * Parameters:  p                               Process to drop the lock on
 485  *
 486  * Returns:     void
 487  */
 488 void
 489 proc_fdunlock(proc_t p)
 490 {
 491         lck_mtx_unlock(&p->p_fdmlock);
 492 }
 493
 494 struct fdt_iterator
 495 fdt_next(proc_t p, int fd, bool only_settled)
 496 {
 497         struct fdt_iterator it;
 498         struct filedesc *fdp = p->p_fd;
 499         struct fileproc *fp;
 500         int nfds = min(fdp->fd_lastfile + 1, fdp->fd_nfiles);
 501
 502         while (++fd < nfds) {
 503                 fp = fdp->fd_ofiles[fd];
 504                 if (fp == NULL || fp->fp_glob == NULL) {
 505                         continue;
 506                 }
 507                 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 508                         continue;
 509                 }
 510                 it.fdti_fd = fd;
 511                 it.fdti_fp = fp;
 512                 return it;
 513         }
 514
 515         it.fdti_fd = nfds;
 516         it.fdti_fp = NULL;
 517         return it;
 518 }
 519
 520 struct fdt_iterator
 521 fdt_prev(proc_t p, int fd, bool only_settled)
 522 {
 523         struct fdt_iterator it;
 524         struct filedesc *fdp = p->p_fd;
 525         struct fileproc *fp;
 526
 527         while (--fd >= 0) {
 528                 fp = fdp->fd_ofiles[fd];
 529                 if (fp == NULL || fp->fp_glob == NULL) {
 530                         continue;
 531                 }
 532                 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 533                         continue;
 534                 }
 535                 it.fdti_fd = fd;
 536                 it.fdti_fp = fp;
 537                 return it;
 538         }
 539
 540         it.fdti_fd = -1;
 541         it.fdti_fp = NULL;
 542         return it;
 543 }
 544
 545 /*
 546  * System calls on descriptors.
 547  */
 548
 549
 550 /*
 551  * sys_getdtablesize
 552  *
 553  * Description: Returns the per process maximum size of the descriptor table
 554  *
 555  * Parameters:  p                               Process being queried
 556  *              retval                          Pointer to the call return area
 557  *
 558  * Returns:     0                               Success
 559  *
 560  * Implicit returns:
 561  *              *retval (modified)              Size of dtable
 562  */
 563 int
 564 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 565 {
 566         *retval = (int32_t)MIN(proc_limitgetcur(p, RLIMIT_NOFILE, TRUE), maxfilesperproc);
 567
 568         return 0;
 569 }
 570
 571
 572 static void
 573 procfdtbl_reservefd(struct proc * p, int fd)
 574 {
 575         p->p_fd->fd_ofiles[fd] = NULL;
 576         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
 577 }
 578
 579 void
 580 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
 581 {
 582         if (fp != NULL) {
 583                 p->p_fd->fd_ofiles[fd] = fp;
 584         }
 585         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
 586         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
 587                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
 588                 wakeup(&p->p_fd);
 589         }
 590 }
 591
 592 static void
 593 procfdtbl_waitfd(struct proc * p, int fd)
 594 {
 595         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
 596         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
 597 }
 598
 599 static void
 600 procfdtbl_clearfd(struct proc * p, int fd)
 601 {
 602         int waiting;
 603
 604         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
 605         p->p_fd->fd_ofiles[fd] = NULL;
 606         p->p_fd->fd_ofileflags[fd] = 0;
 607         if (waiting == UF_RESVWAIT) {
 608                 wakeup(&p->p_fd);
 609         }
 610 }
 611
 612 /*
 613  * fdrelse
 614  *
 615  * Description: Inline utility function to free an fd in a filedesc
 616  *
 617  * Parameters:  fdp                             Pointer to filedesc fd lies in
 618  *              fd                              fd to free
 619  *              reserv                          fd should be reserved
 620  *
 621  * Returns:     void
 622  *
 623  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 624  *              the caller
 625  */
 626 static void
 627 fdrelse(struct proc * p, int fd)
 628 {
 629         struct filedesc *fdp = p->p_fd;
 630         int nfd = 0;
 631
 632         if (fd < fdp->fd_freefile) {
 633                 fdp->fd_freefile = fd;
 634         }
 635 #if DIAGNOSTIC
 636         if (fd > fdp->fd_lastfile) {
 637                 panic("fdrelse: fd_lastfile inconsistent");
 638         }
 639 #endif
 640         procfdtbl_clearfd(p, fd);
 641
 642         while ((nfd = fdp->fd_lastfile) > 0 &&
 643             fdp->fd_ofiles[nfd] == NULL &&
 644             !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
 645                 /* JMM - What about files with lingering EV_VANISHED knotes? */
 646                 fdp->fd_lastfile--;
 647         }
 648 }
 649
 650
 651 int
 652 fd_rdwr(
 653         int fd,
 654         enum uio_rw rw,
 655         uint64_t base,
 656         int64_t len,
 657         enum uio_seg segflg,
 658         off_t   offset,
 659         int     io_flg,
 660         int64_t *aresid)
 661 {
 662         struct fileproc *fp;
 663         proc_t  p;
 664         int error = 0;
 665         int flags = 0;
 666         int spacetype;
 667         uio_t auio = NULL;
 668         char uio_buf[UIO_SIZEOF(1)];
 669         struct vfs_context context = *(vfs_context_current());
 670
 671         p = current_proc();
 672
 673         error = fp_lookup(p, fd, &fp, 0);
 674         if (error) {
 675                 return error;
 676         }
 677
 678         switch (FILEGLOB_DTYPE(fp->fp_glob)) {
 679         case DTYPE_VNODE:
 680         case DTYPE_PIPE:
 681         case DTYPE_SOCKET:
 682                 break;
 683         default:
 684                 error = EINVAL;
 685                 goto out;
 686         }
 687         if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
 688                 error = EBADF;
 689                 goto out;
 690         }
 691
 692         if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
 693                 error = EBADF;
 694                 goto out;
 695         }
 696
 697         context.vc_ucred = fp->fp_glob->fg_cred;
 698
 699         if (UIO_SEG_IS_USER_SPACE(segflg)) {
 700                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 701         } else {
 702                 spacetype = UIO_SYSSPACE;
 703         }
 704
 705         auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
 706
 707         uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
 708
 709         if (!(io_flg & IO_APPEND)) {
 710                 flags = FOF_OFFSET;
 711         }
 712
 713         if (rw == UIO_WRITE) {
 714                 user_ssize_t orig_resid = uio_resid(auio);
 715                 error = fo_write(fp, auio, flags, &context);
 716                 if (uio_resid(auio) < orig_resid) {
 717                         os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
 718                 }
 719         } else {
 720                 error = fo_read(fp, auio, flags, &context);
 721         }
 722
 723         if (aresid) {
 724                 *aresid = uio_resid(auio);
 725         } else if (uio_resid(auio) && error == 0) {
 726                 error = EIO;
 727         }
 728 out:
 729         fp_drop(p, fd, fp, 0);
 730         return error;
 731 }
 732
 733
 734
 735 /*
 736  * sys_dup
 737  *
 738  * Description: Duplicate a file descriptor.
 739  *
 740  * Parameters:  p                               Process performing the dup
 741  *              uap->fd                         The fd to dup
 742  *              retval                          Pointer to the call return area
 743  *
 744  * Returns:     0                               Success
 745  *              !0                              Errno
 746  *
 747  * Implicit returns:
 748  *              *retval (modified)              The new descriptor
 749  */
 750 int
 751 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
 752 {
 753         struct filedesc *fdp = p->p_fd;
 754         int old = uap->fd;
 755         int new, error;
 756         struct fileproc *fp;
 757
 758         proc_fdlock(p);
 759         if ((error = fp_lookup(p, old, &fp, 1))) {
 760                 proc_fdunlock(p);
 761                 return error;
 762         }
 763         if (fp_isguarded(fp, GUARD_DUP)) {
 764                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 765                 (void) fp_drop(p, old, fp, 1);
 766                 proc_fdunlock(p);
 767                 return error;
 768         }
 769         if ((error = fdalloc(p, 0, &new))) {
 770                 fp_drop(p, old, fp, 1);
 771                 proc_fdunlock(p);
 772                 return error;
 773         }
 774         error = finishdup(p, fdp, old, new, 0, retval);
 775         fp_drop(p, old, fp, 1);
 776         proc_fdunlock(p);
 777
 778         if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
 779                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
 780                     new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
 781         }
 782
 783         return error;
 784 }
 785
 786 /*
 787  * sys_dup2
 788  *
 789  * Description: Duplicate a file descriptor to a particular value.
 790  *
 791  * Parameters:  p                               Process performing the dup
 792  *              uap->from                       The fd to dup
 793  *              uap->to                         The fd to dup it to
 794  *              retval                          Pointer to the call return area
 795  *
 796  * Returns:     0                               Success
 797  *              !0                              Errno
 798  *
 799  * Implicit returns:
 800  *              *retval (modified)              The new descriptor
 801  */
 802 int
 803 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 804 {
 805         return dup2(p, uap->from, uap->to, retval);
 806 }
 807
 808 int
 809 dup2(proc_t p, int old, int new, int *retval)
 810 {
 811         struct filedesc *fdp = p->p_fd;
 812         struct fileproc *fp, *nfp;
 813         int i, error;
 814         rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
 815
 816         proc_fdlock(p);
 817
 818 startover:
 819         if ((error = fp_lookup(p, old, &fp, 1))) {
 820                 proc_fdunlock(p);
 821                 return error;
 822         }
 823         if (fp_isguarded(fp, GUARD_DUP)) {
 824                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 825                 (void) fp_drop(p, old, fp, 1);
 826                 proc_fdunlock(p);
 827                 return error;
 828         }
 829         if (new < 0 ||
 830             (rlim_t)new >= nofile ||
 831             new >= maxfilesperproc) {
 832                 fp_drop(p, old, fp, 1);
 833                 proc_fdunlock(p);
 834                 return EBADF;
 835         }
 836         if (old == new) {
 837                 fp_drop(p, old, fp, 1);
 838                 *retval = new;
 839                 proc_fdunlock(p);
 840                 return 0;
 841         }
 842         if (new < 0 || new >= fdp->fd_nfiles) {
 843                 if ((error = fdalloc(p, new, &i))) {
 844                         fp_drop(p, old, fp, 1);
 845                         proc_fdunlock(p);
 846                         return error;
 847                 }
 848                 if (new != i) {
 849                         fdrelse(p, i);
 850                         goto closeit;
 851                 }
 852         } else {
 853 closeit:
 854                 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
 855                         fp_drop(p, old, fp, 1);
 856                         procfdtbl_waitfd(p, new);
 857 #if DIAGNOSTIC
 858                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 859 #endif
 860                         goto startover;
 861                 }
 862
 863                 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
 864                         if (fp_isguarded(nfp, GUARD_CLOSE)) {
 865                                 fp_drop(p, old, fp, 1);
 866                                 error = fp_guard_exception(p,
 867                                     new, nfp, kGUARD_EXC_CLOSE);
 868                                 proc_fdunlock(p);
 869                                 return error;
 870                         }
 871                         (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
 872                         proc_fdlock(p);
 873                         assert(fdp->fd_ofileflags[new] & UF_RESERVED);
 874                 } else {
 875 #if DIAGNOSTIC
 876                         if (fdp->fd_ofiles[new] != NULL) {
 877                                 panic("dup2: no ref on fileproc %d", new);
 878                         }
 879 #endif
 880                         procfdtbl_reservefd(p, new);
 881                 }
 882         }
 883 #if DIAGNOSTIC
 884         if (fdp->fd_ofiles[new] != 0) {
 885                 panic("dup2: overwriting fd_ofiles with new %d", new);
 886         }
 887         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
 888                 panic("dup2: unreserved fileflags with new %d", new);
 889         }
 890 #endif
 891         error = finishdup(p, fdp, old, new, 0, retval);
 892         fp_drop(p, old, fp, 1);
 893         proc_fdunlock(p);
 894
 895         return error;
 896 }
 897
 898
 899 /*
 900  * fcntl
 901  *
 902  * Description: The file control system call.
 903  *
 904  * Parameters:  p                               Process performing the fcntl
 905  *              uap->fd                         The fd to operate against
 906  *              uap->cmd                        The command to perform
 907  *              uap->arg                        Pointer to the command argument
 908  *              retval                          Pointer to the call return area
 909  *
 910  * Returns:     0                               Success
 911  *              !0                              Errno (see fcntl_nocancel)
 912  *
 913  * Implicit returns:
 914  *              *retval (modified)              fcntl return value (if any)
 915  *
 916  * Notes:       This system call differs from fcntl_nocancel() in that it
 917  *              tests for cancellation prior to performing a potentially
 918  *              blocking operation.
 919  */
 920 int
 921 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 922 {
 923         __pthread_testcancel(1);
 924         return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
 925 }
 926
 927 #define ACCOUNT_OPENFROM_ENTITLEMENT \
 928         "com.apple.private.vfs.role-account-openfrom"
 929
 930 /*
 931  * sys_fcntl_nocancel
 932  *
 933  * Description: A non-cancel-testing file control system call.
 934  *
 935  * Parameters:  p                               Process performing the fcntl
 936  *              uap->fd                         The fd to operate against
 937  *              uap->cmd                        The command to perform
 938  *              uap->arg                        Pointer to the command argument
 939  *              retval                          Pointer to the call return area
 940  *
 941  * Returns:     0                               Success
 942  *              EINVAL
 943  *      fp_lookup:EBADF                         Bad file descriptor
 944  * [F_DUPFD]
 945  *      fdalloc:EMFILE
 946  *      fdalloc:ENOMEM
 947  *      finishdup:EBADF
 948  *      finishdup:ENOMEM
 949  * [F_SETOWN]
 950  *              ESRCH
 951  * [F_SETLK]
 952  *              EBADF
 953  *              EOVERFLOW
 954  *      copyin:EFAULT
 955  *      vnode_getwithref:???
 956  *      VNOP_ADVLOCK:???
 957  *      msleep:ETIMEDOUT
 958  * [F_GETLK]
 959  *              EBADF
 960  *              EOVERFLOW
 961  *      copyin:EFAULT
 962  *      copyout:EFAULT
 963  *      vnode_getwithref:???
 964  *      VNOP_ADVLOCK:???
 965  * [F_PREALLOCATE]
 966  *              EBADF
 967  *              EINVAL
 968  *      copyin:EFAULT
 969  *      copyout:EFAULT
 970  *      vnode_getwithref:???
 971  *      VNOP_ALLOCATE:???
 972  * [F_SETSIZE,F_RDADVISE]
 973  *              EBADF
 974  *              EINVAL
 975  *      copyin:EFAULT
 976  *      vnode_getwithref:???
 977  * [F_RDAHEAD,F_NOCACHE]
 978  *              EBADF
 979  *      vnode_getwithref:???
 980  * [???]
 981  *
 982  * Implicit returns:
 983  *              *retval (modified)              fcntl return value (if any)
 984  */
 985 int
 986 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 987 {
 988         int fd = uap->fd;
 989         struct filedesc *fdp = p->p_fd;
 990         struct fileproc *fp;
 991         char *pop;
 992         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 993         unsigned int oflags, nflags;
 994         int i, tmp, error, error2, flg = 0;
 995         struct flock fl = {};
 996         struct flocktimeout fltimeout;
 997         struct timespec *timeout = NULL;
 998         struct vfs_context context;
 999         off_t offset;
1000         int newmin;
1001         daddr64_t lbn, bn;
1002         unsigned int fflag;
1003         user_addr_t argp;
1004         boolean_t is64bit;
1005         rlim_t nofile;
1006         int has_entitlement = 0;
1007
1008         AUDIT_ARG(fd, uap->fd);
1009         AUDIT_ARG(cmd, uap->cmd);
1010
1011         nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
1012
1013         proc_fdlock(p);
1014         if ((error = fp_lookup(p, fd, &fp, 1))) {
1015                 proc_fdunlock(p);
1016                 return error;
1017         }
1018         context.vc_thread = current_thread();
1019         context.vc_ucred = fp->f_cred;
1020
1021         is64bit = proc_is64bit(p);
1022         if (is64bit) {
1023                 argp = uap->arg;
1024         } else {
1025                 /*
1026                  * Since the arg parameter is defined as a long but may be
1027                  * either a long or a pointer we must take care to handle
1028                  * sign extension issues.  Our sys call munger will sign
1029                  * extend a long when we are called from a 32-bit process.
1030                  * Since we can never have an address greater than 32-bits
1031                  * from a 32-bit process we lop off the top 32-bits to avoid
1032                  * getting the wrong address
1033                  */
1034                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
1035         }
1036
1037 #if CONFIG_MACF
1038         error = mac_file_check_fcntl(proc_ucred(p), fp->fp_glob, uap->cmd,
1039             uap->arg);
1040         if (error) {
1041                 goto out;
1042         }
1043 #endif
1044
1045         pop = &fdp->fd_ofileflags[fd];
1046
1047         switch (uap->cmd) {
1048         case F_DUPFD:
1049         case F_DUPFD_CLOEXEC:
1050                 if (fp_isguarded(fp, GUARD_DUP)) {
1051                         error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
1052                         goto out;
1053                 }
1054                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1055                 AUDIT_ARG(value32, newmin);
1056                 if ((rlim_t)newmin >= nofile ||
1057                     newmin >= maxfilesperproc) {
1058                         error = EINVAL;
1059                         goto out;
1060                 }
1061                 if ((error = fdalloc(p, newmin, &i))) {
1062                         goto out;
1063                 }
1064                 error = finishdup(p, fdp, fd, i,
1065                     uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
1066                 goto out;
1067
1068         case F_GETFD:
1069                 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
1070                 error = 0;
1071                 goto out;
1072
1073         case F_SETFD:
1074                 AUDIT_ARG(value32, (uint32_t)uap->arg);
1075                 if (uap->arg & FD_CLOEXEC) {
1076                         *pop |= UF_EXCLOSE;
1077                 } else {
1078                         if (fp_isguarded(fp, 0)) {
1079                                 error = fp_guard_exception(p,
1080                                     fd, fp, kGUARD_EXC_NOCLOEXEC);
1081                                 goto out;
1082                         }
1083                         *pop &= ~UF_EXCLOSE;
1084                 }
1085                 error = 0;
1086                 goto out;
1087
1088         case F_GETFL:
1089                 *retval = OFLAGS(fp->f_flag);
1090                 error = 0;
1091                 goto out;
1092
1093         case F_SETFL:
1094                 // FIXME (rdar://54898652)
1095                 //
1096                 // this code is broken if fnctl(F_SETFL), ioctl() are
1097                 // called concurrently for the same fileglob.
1098
1099                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1100                 AUDIT_ARG(value32, tmp);
1101
1102                 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
1103                         nflags  = oflags & ~FCNTLFLAGS;
1104                         nflags |= FFLAGS(tmp) & FCNTLFLAGS;
1105                 });
1106                 tmp = nflags & FNONBLOCK;
1107                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1108                 if (error) {
1109                         goto out;
1110                 }
1111                 tmp = nflags & FASYNC;
1112                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1113                 if (!error) {
1114                         goto out;
1115                 }
1116                 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1117                 tmp = 0;
1118                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1119                 goto out;
1120
1121         case F_GETOWN:
1122                 if (fp->f_type == DTYPE_SOCKET) {
1123                         *retval = ((struct socket *)fp->f_data)->so_pgid;
1124                         error = 0;
1125                         goto out;
1126                 }
1127                 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
1128                 *retval = -*retval;
1129                 goto out;
1130
1131         case F_SETOWN:
1132                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
1133                 AUDIT_ARG(value32, tmp);
1134                 if (fp->f_type == DTYPE_SOCKET) {
1135                         ((struct socket *)fp->f_data)->so_pgid = tmp;
1136                         error = 0;
1137                         goto out;
1138                 }
1139                 if (fp->f_type == DTYPE_PIPE) {
1140                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1141                         goto out;
1142                 }
1143
1144                 if (tmp <= 0) {
1145                         tmp = -tmp;
1146                 } else {
1147                         proc_t p1 = proc_find(tmp);
1148                         if (p1 == 0) {
1149                                 error = ESRCH;
1150                                 goto out;
1151                         }
1152                         tmp = (int)p1->p_pgrpid;
1153                         proc_rele(p1);
1154                 }
1155                 error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1156                 goto out;
1157
1158         case F_SETNOSIGPIPE:
1159                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
1160                 if (fp->f_type == DTYPE_SOCKET) {
1161 #if SOCKETS
1162                         error = sock_setsockopt((struct socket *)fp->f_data,
1163                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
1164 #else
1165                         error = EINVAL;
1166 #endif
1167                 } else {
1168                         struct fileglob *fg = fp->fp_glob;
1169
1170                         lck_mtx_lock_spin(&fg->fg_lock);
1171                         if (tmp) {
1172                                 fg->fg_lflags |= FG_NOSIGPIPE;
1173                         } else {
1174                                 fg->fg_lflags &= ~FG_NOSIGPIPE;
1175                         }
1176                         lck_mtx_unlock(&fg->fg_lock);
1177                         error = 0;
1178                 }
1179                 goto out;
1180
1181         case F_GETNOSIGPIPE:
1182                 if (fp->f_type == DTYPE_SOCKET) {
1183 #if SOCKETS
1184                         int retsize = sizeof(*retval);
1185                         error = sock_getsockopt((struct socket *)fp->f_data,
1186                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
1187 #else
1188                         error = EINVAL;
1189 #endif
1190                 } else {
1191                         *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
1192                             1 : 0;
1193                         error = 0;
1194                 }
1195                 goto out;
1196
1197         case F_SETCONFINED:
1198                 /*
1199                  * If this is the only reference to this fglob in the process
1200                  * and it's already marked as close-on-fork then mark it as
1201                  * (immutably) "confined" i.e. any fd that points to it will
1202                  * forever be close-on-fork, and attempts to use an IPC
1203                  * mechanism to move the descriptor elsewhere will fail.
1204                  */
1205                 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1206                         struct fileglob *fg = fp->fp_glob;
1207
1208                         lck_mtx_lock_spin(&fg->fg_lock);
1209                         if (fg->fg_lflags & FG_CONFINED) {
1210                                 error = 0;
1211                         } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
1212                                 error = EAGAIN; /* go close the dup .. */
1213                         } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1214                                 fg->fg_lflags |= FG_CONFINED;
1215                                 error = 0;
1216                         } else {
1217                                 error = EBADF;  /* open without O_CLOFORK? */
1218                         }
1219                         lck_mtx_unlock(&fg->fg_lock);
1220                 } else {
1221                         /*
1222                          * Other subsystems may have built on the immutability
1223                          * of FG_CONFINED; clearing it may be tricky.
1224                          */
1225                         error = EPERM;          /* immutable */
1226                 }
1227                 goto out;
1228
1229         case F_GETCONFINED:
1230                 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
1231                 error = 0;
1232                 goto out;
1233
1234         case F_SETLKWTIMEOUT:
1235         case F_SETLKW:
1236         case F_OFD_SETLKWTIMEOUT:
1237         case F_OFD_SETLKW:
1238                 flg |= F_WAIT;
1239                 OS_FALLTHROUGH;
1240
1241         case F_SETLK:
1242         case F_OFD_SETLK:
1243                 if (fp->f_type != DTYPE_VNODE) {
1244                         error = EBADF;
1245                         goto out;
1246                 }
1247                 vp = (struct vnode *)fp->f_data;
1248
1249                 fflag = fp->f_flag;
1250                 offset = fp->f_offset;
1251                 proc_fdunlock(p);
1252
1253                 /* Copy in the lock structure */
1254                 if (F_SETLKWTIMEOUT == uap->cmd ||
1255                     F_OFD_SETLKWTIMEOUT == uap->cmd) {
1256                         error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1257                         if (error) {
1258                                 goto outdrop;
1259                         }
1260                         fl = fltimeout.fl;
1261                         timeout = &fltimeout.timeout;
1262                 } else {
1263                         error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1264                         if (error) {
1265                                 goto outdrop;
1266                         }
1267                 }
1268
1269                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1270                 /* and ending byte for EOVERFLOW in SEEK_SET */
1271                 error = check_file_seek_range(&fl, offset);
1272                 if (error) {
1273                         goto outdrop;
1274                 }
1275
1276                 if ((error = vnode_getwithref(vp))) {
1277                         goto outdrop;
1278                 }
1279                 if (fl.l_whence == SEEK_CUR) {
1280                         fl.l_start += offset;
1281                 }
1282
1283 #if CONFIG_MACF
1284                 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1285                     F_SETLK, &fl);
1286                 if (error) {
1287                         (void)vnode_put(vp);
1288                         goto outdrop;
1289                 }
1290 #endif
1291                 switch (uap->cmd) {
1292                 case F_OFD_SETLK:
1293                 case F_OFD_SETLKW:
1294                 case F_OFD_SETLKWTIMEOUT:
1295                         flg |= F_OFD_LOCK;
1296                         switch (fl.l_type) {
1297                         case F_RDLCK:
1298                                 if ((fflag & FREAD) == 0) {
1299                                         error = EBADF;
1300                                         break;
1301                                 }
1302                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1303                                     F_SETLK, &fl, flg, &context, timeout);
1304                                 break;
1305                         case F_WRLCK:
1306                                 if ((fflag & FWRITE) == 0) {
1307                                         error = EBADF;
1308                                         break;
1309                                 }
1310                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1311                                     F_SETLK, &fl, flg, &context, timeout);
1312                                 break;
1313                         case F_UNLCK:
1314                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1315                                     F_UNLCK, &fl, F_OFD_LOCK, &context,
1316                                     timeout);
1317                                 break;
1318                         default:
1319                                 error = EINVAL;
1320                                 break;
1321                         }
1322                         if (0 == error &&
1323                             (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1324                                 struct fileglob *fg = fp->fp_glob;
1325
1326                                 /*
1327                                  * arrange F_UNLCK on last close (once
1328                                  * set, FG_HAS_OFDLOCK is immutable)
1329                                  */
1330                                 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1331                                         lck_mtx_lock_spin(&fg->fg_lock);
1332                                         fg->fg_lflags |= FG_HAS_OFDLOCK;
1333                                         lck_mtx_unlock(&fg->fg_lock);
1334                                 }
1335                         }
1336                         break;
1337                 default:
1338                         flg |= F_POSIX;
1339                         switch (fl.l_type) {
1340                         case F_RDLCK:
1341                                 if ((fflag & FREAD) == 0) {
1342                                         error = EBADF;
1343                                         break;
1344                                 }
1345                                 // XXX UInt32 unsafe for LP64 kernel
1346                                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1347                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1348                                     F_SETLK, &fl, flg, &context, timeout);
1349                                 break;
1350                         case F_WRLCK:
1351                                 if ((fflag & FWRITE) == 0) {
1352                                         error = EBADF;
1353                                         break;
1354                                 }
1355                                 // XXX UInt32 unsafe for LP64 kernel
1356                                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1357                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1358                                     F_SETLK, &fl, flg, &context, timeout);
1359                                 break;
1360                         case F_UNLCK:
1361                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1362                                     F_UNLCK, &fl, F_POSIX, &context, timeout);
1363                                 break;
1364                         default:
1365                                 error = EINVAL;
1366                                 break;
1367                         }
1368                         break;
1369                 }
1370                 (void) vnode_put(vp);
1371                 goto outdrop;
1372
1373         case F_GETLK:
1374         case F_OFD_GETLK:
1375         case F_GETLKPID:
1376         case F_OFD_GETLKPID:
1377                 if (fp->f_type != DTYPE_VNODE) {
1378                         error = EBADF;
1379                         goto out;
1380                 }
1381                 vp = (struct vnode *)fp->f_data;
1382
1383                 offset = fp->f_offset;
1384                 proc_fdunlock(p);
1385
1386                 /* Copy in the lock structure */
1387                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1388                 if (error) {
1389                         goto outdrop;
1390                 }
1391
1392                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1393                 /* and ending byte for EOVERFLOW in SEEK_SET */
1394                 error = check_file_seek_range(&fl, offset);
1395                 if (error) {
1396                         goto outdrop;
1397                 }
1398
1399                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1400                         error = EINVAL;
1401                         goto outdrop;
1402                 }
1403
1404                 switch (fl.l_type) {
1405                 case F_RDLCK:
1406                 case F_UNLCK:
1407                 case F_WRLCK:
1408                         break;
1409                 default:
1410                         error = EINVAL;
1411                         goto outdrop;
1412                 }
1413
1414                 switch (fl.l_whence) {
1415                 case SEEK_CUR:
1416                 case SEEK_SET:
1417                 case SEEK_END:
1418                         break;
1419                 default:
1420                         error = EINVAL;
1421                         goto outdrop;
1422                 }
1423
1424                 if ((error = vnode_getwithref(vp)) == 0) {
1425                         if (fl.l_whence == SEEK_CUR) {
1426                                 fl.l_start += offset;
1427                         }
1428
1429 #if CONFIG_MACF
1430                         error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1431                             uap->cmd, &fl);
1432                         if (error == 0)
1433 #endif
1434                         switch (uap->cmd) {
1435                         case F_OFD_GETLK:
1436                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1437                                     F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1438                                 break;
1439                         case F_OFD_GETLKPID:
1440                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1441                                     F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1442                                 break;
1443                         default:
1444                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1445                                     uap->cmd, &fl, F_POSIX, &context, NULL);
1446                                 break;
1447                         }
1448
1449                         (void)vnode_put(vp);
1450
1451                         if (error == 0) {
1452                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1453                         }
1454                 }
1455                 goto outdrop;
1456
1457         case F_PREALLOCATE: {
1458                 fstore_t alloc_struct;    /* structure for allocate command */
1459                 u_int32_t alloc_flags = 0;
1460
1461                 if (fp->f_type != DTYPE_VNODE) {
1462                         error = EBADF;
1463                         goto out;
1464                 }
1465
1466                 vp = (struct vnode *)fp->f_data;
1467                 proc_fdunlock(p);
1468
1469                 /* make sure that we have write permission */
1470                 if ((fp->f_flag & FWRITE) == 0) {
1471                         error = EBADF;
1472                         goto outdrop;
1473                 }
1474
1475                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1476                 if (error) {
1477                         goto outdrop;
1478                 }
1479
1480                 /* now set the space allocated to 0 */
1481                 alloc_struct.fst_bytesalloc = 0;
1482
1483                 /*
1484                  * Do some simple parameter checking
1485                  */
1486
1487                 /* set up the flags */
1488
1489                 alloc_flags |= PREALLOCATE;
1490
1491                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1492                         alloc_flags |= ALLOCATECONTIG;
1493                 }
1494
1495                 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1496                         alloc_flags |= ALLOCATEALL;
1497                 }
1498
1499                 /*
1500                  * Do any position mode specific stuff.  The only
1501                  * position mode  supported now is PEOFPOSMODE
1502                  */
1503
1504                 switch (alloc_struct.fst_posmode) {
1505                 case F_PEOFPOSMODE:
1506                         if (alloc_struct.fst_offset != 0) {
1507                                 error = EINVAL;
1508                                 goto outdrop;
1509                         }
1510
1511                         alloc_flags |= ALLOCATEFROMPEOF;
1512                         break;
1513
1514                 case F_VOLPOSMODE:
1515                         if (alloc_struct.fst_offset <= 0) {
1516                                 error = EINVAL;
1517                                 goto outdrop;
1518                         }
1519
1520                         alloc_flags |= ALLOCATEFROMVOL;
1521                         break;
1522
1523                 default: {
1524                         error = EINVAL;
1525                         goto outdrop;
1526                 }
1527                 }
1528                 if ((error = vnode_getwithref(vp)) == 0) {
1529                         /*
1530                          * call allocate to get the space
1531                          */
1532                         error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1533                             &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1534                             &context);
1535                         (void)vnode_put(vp);
1536
1537                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1538
1539                         if (error == 0) {
1540                                 error = error2;
1541                         }
1542                 }
1543                 goto outdrop;
1544         }
1545         case F_PUNCHHOLE: {
1546                 fpunchhole_t args;
1547
1548                 if (fp->f_type != DTYPE_VNODE) {
1549                         error = EBADF;
1550                         goto out;
1551                 }
1552
1553                 vp = (struct vnode *)fp->f_data;
1554                 proc_fdunlock(p);
1555
1556                 /* need write permissions */
1557                 if ((fp->f_flag & FWRITE) == 0) {
1558                         error = EPERM;
1559                         goto outdrop;
1560                 }
1561
1562                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1563                         goto outdrop;
1564                 }
1565
1566                 if ((error = vnode_getwithref(vp))) {
1567                         goto outdrop;
1568                 }
1569
1570 #if CONFIG_MACF
1571                 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
1572                         (void)vnode_put(vp);
1573                         goto outdrop;
1574                 }
1575 #endif
1576
1577                 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1578                 (void)vnode_put(vp);
1579
1580                 goto outdrop;
1581         }
1582         case F_TRIM_ACTIVE_FILE: {
1583                 ftrimactivefile_t args;
1584
1585                 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1586                         error = EACCES;
1587                         goto out;
1588                 }
1589
1590                 if (fp->f_type != DTYPE_VNODE) {
1591                         error = EBADF;
1592                         goto out;
1593                 }
1594
1595                 vp = (struct vnode *)fp->f_data;
1596                 proc_fdunlock(p);
1597
1598                 /* need write permissions */
1599                 if ((fp->f_flag & FWRITE) == 0) {
1600                         error = EPERM;
1601                         goto outdrop;
1602                 }
1603
1604                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1605                         goto outdrop;
1606                 }
1607
1608                 if ((error = vnode_getwithref(vp))) {
1609                         goto outdrop;
1610                 }
1611
1612                 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1613                 (void)vnode_put(vp);
1614
1615                 goto outdrop;
1616         }
1617         case F_SPECULATIVE_READ: {
1618                 fspecread_t args;
1619
1620                 if (fp->f_type != DTYPE_VNODE) {
1621                         error = EBADF;
1622                         goto out;
1623                 }
1624
1625                 vp = (struct vnode *)fp->f_data;
1626                 proc_fdunlock(p);
1627
1628                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1629                         goto outdrop;
1630                 }
1631
1632                 /* Discard invalid offsets or lengths */
1633                 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1634                         error = EINVAL;
1635                         goto outdrop;
1636                 }
1637
1638                 /*
1639                  * Round the file offset down to a page-size boundary (or to 0).
1640                  * The filesystem will need to round the length up to the end of the page boundary
1641                  * or to the EOF of the file.
1642                  */
1643                 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1644                 uint64_t foff_delta = args.fsr_offset - foff;
1645                 args.fsr_offset = (off_t) foff;
1646
1647                 /*
1648                  * Now add in the delta to the supplied length. Since we may have adjusted the
1649                  * offset, increase it by the amount that we adjusted.
1650                  */
1651                 args.fsr_length += foff_delta;
1652
1653                 if ((error = vnode_getwithref(vp))) {
1654                         goto outdrop;
1655                 }
1656                 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1657                 (void)vnode_put(vp);
1658
1659                 goto outdrop;
1660         }
1661         case F_SETSIZE:
1662                 if (fp->f_type != DTYPE_VNODE) {
1663                         error = EBADF;
1664                         goto out;
1665                 }
1666                 vp = (struct vnode *)fp->f_data;
1667                 proc_fdunlock(p);
1668
1669                 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1670                 if (error) {
1671                         goto outdrop;
1672                 }
1673                 AUDIT_ARG(value64, offset);
1674
1675                 error = vnode_getwithref(vp);
1676                 if (error) {
1677                         goto outdrop;
1678                 }
1679
1680 #if CONFIG_MACF
1681                 error = mac_vnode_check_truncate(&context,
1682                     fp->fp_glob->fg_cred, vp);
1683                 if (error) {
1684                         (void)vnode_put(vp);
1685                         goto outdrop;
1686                 }
1687 #endif
1688                 /*
1689                  * Make sure that we are root.  Growing a file
1690                  * without zero filling the data is a security hole.
1691                  */
1692                 if (!kauth_cred_issuser(kauth_cred_get())) {
1693                         error = EACCES;
1694                 } else {
1695                         /*
1696                          * Require privilege to change file size without zerofill,
1697                          * else will change the file size and zerofill it.
1698                          */
1699                         error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
1700                         if (error == 0) {
1701                                 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
1702                         } else {
1703                                 error = vnode_setsize(vp, offset, 0, &context);
1704                         }
1705
1706 #if CONFIG_MACF
1707                         if (error == 0) {
1708                                 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
1709                         }
1710 #endif
1711                 }
1712
1713                 (void)vnode_put(vp);
1714                 goto outdrop;
1715
1716         case F_RDAHEAD:
1717                 if (fp->f_type != DTYPE_VNODE) {
1718                         error = EBADF;
1719                         goto out;
1720                 }
1721                 if (uap->arg) {
1722                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1723                 } else {
1724                         os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1725                 }
1726                 goto out;
1727
1728         case F_NOCACHE:
1729                 if (fp->f_type != DTYPE_VNODE) {
1730                         error = EBADF;
1731                         goto out;
1732                 }
1733                 if (uap->arg) {
1734                         os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1735                 } else {
1736                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1737                 }
1738                 goto out;
1739
1740         case F_NODIRECT:
1741                 if (fp->f_type != DTYPE_VNODE) {
1742                         error = EBADF;
1743                         goto out;
1744                 }
1745                 if (uap->arg) {
1746                         os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1747                 } else {
1748                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1749                 }
1750                 goto out;
1751
1752         case F_SINGLE_WRITER:
1753                 if (fp->f_type != DTYPE_VNODE) {
1754                         error = EBADF;
1755                         goto out;
1756                 }
1757                 if (uap->arg) {
1758                         os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1759                 } else {
1760                         os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1761                 }
1762                 goto out;
1763
1764         case F_GLOBAL_NOCACHE:
1765                 if (fp->f_type != DTYPE_VNODE) {
1766                         error = EBADF;
1767                         goto out;
1768                 }
1769                 vp = (struct vnode *)fp->f_data;
1770                 proc_fdunlock(p);
1771
1772                 if ((error = vnode_getwithref(vp)) == 0) {
1773                         *retval = vnode_isnocache(vp);
1774
1775                         if (uap->arg) {
1776                                 vnode_setnocache(vp);
1777                         } else {
1778                                 vnode_clearnocache(vp);
1779                         }
1780
1781                         (void)vnode_put(vp);
1782                 }
1783                 goto outdrop;
1784
1785         case F_CHECK_OPENEVT:
1786                 if (fp->f_type != DTYPE_VNODE) {
1787                         error = EBADF;
1788                         goto out;
1789                 }
1790                 vp = (struct vnode *)fp->f_data;
1791                 proc_fdunlock(p);
1792
1793                 if ((error = vnode_getwithref(vp)) == 0) {
1794                         *retval = vnode_is_openevt(vp);
1795
1796                         if (uap->arg) {
1797                                 vnode_set_openevt(vp);
1798                         } else {
1799                                 vnode_clear_openevt(vp);
1800                         }
1801
1802                         (void)vnode_put(vp);
1803                 }
1804                 goto outdrop;
1805
1806         case F_RDADVISE: {
1807                 struct radvisory ra_struct;
1808
1809                 if (fp->f_type != DTYPE_VNODE) {
1810                         error = EBADF;
1811                         goto out;
1812                 }
1813                 vp = (struct vnode *)fp->f_data;
1814                 proc_fdunlock(p);
1815
1816                 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1817                         goto outdrop;
1818                 }
1819                 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
1820                         error = EINVAL;
1821                         goto outdrop;
1822                 }
1823                 if ((error = vnode_getwithref(vp)) == 0) {
1824                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1825
1826                         (void)vnode_put(vp);
1827                 }
1828                 goto outdrop;
1829         }
1830
1831         case F_FLUSH_DATA:
1832
1833                 if (fp->f_type != DTYPE_VNODE) {
1834                         error = EBADF;
1835                         goto out;
1836                 }
1837                 vp = (struct vnode *)fp->f_data;
1838                 proc_fdunlock(p);
1839
1840                 if ((error = vnode_getwithref(vp)) == 0) {
1841                         error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1842
1843                         (void)vnode_put(vp);
1844                 }
1845                 goto outdrop;
1846
1847         case F_LOG2PHYS:
1848         case F_LOG2PHYS_EXT: {
1849                 struct log2phys l2p_struct = {};    /* structure for allocate command */
1850                 int devBlockSize;
1851
1852                 off_t file_offset = 0;
1853                 size_t a_size = 0;
1854                 size_t run = 0;
1855
1856                 if (uap->cmd == F_LOG2PHYS_EXT) {
1857                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1858                         if (error) {
1859                                 goto out;
1860                         }
1861                         file_offset = l2p_struct.l2p_devoffset;
1862                 } else {
1863                         file_offset = fp->f_offset;
1864                 }
1865                 if (fp->f_type != DTYPE_VNODE) {
1866                         error = EBADF;
1867                         goto out;
1868                 }
1869                 vp = (struct vnode *)fp->f_data;
1870                 proc_fdunlock(p);
1871                 if ((error = vnode_getwithref(vp))) {
1872                         goto outdrop;
1873                 }
1874                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1875                 if (error) {
1876                         (void)vnode_put(vp);
1877                         goto outdrop;
1878                 }
1879                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1880                 if (error) {
1881                         (void)vnode_put(vp);
1882                         goto outdrop;
1883                 }
1884                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1885                 if (uap->cmd == F_LOG2PHYS_EXT) {
1886                         if (l2p_struct.l2p_contigbytes < 0) {
1887                                 vnode_put(vp);
1888                                 error = EINVAL;
1889                                 goto outdrop;
1890                         }
1891
1892                         a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1893                 } else {
1894                         a_size = devBlockSize;
1895                 }
1896
1897                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1898
1899                 (void)vnode_put(vp);
1900
1901                 if (!error) {
1902                         l2p_struct.l2p_flags = 0;       /* for now */
1903                         if (uap->cmd == F_LOG2PHYS_EXT) {
1904                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1905                         } else {
1906                                 l2p_struct.l2p_contigbytes = 0; /* for now */
1907                         }
1908
1909                         /*
1910                          * The block number being -1 suggests that the file offset is not backed
1911                          * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
1912                          */
1913                         if (bn == -1) {
1914                                 /* Don't multiply it by the block size */
1915                                 l2p_struct.l2p_devoffset = bn;
1916                         } else {
1917                                 l2p_struct.l2p_devoffset = bn * devBlockSize;
1918                                 l2p_struct.l2p_devoffset += file_offset - offset;
1919                         }
1920                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1921                 }
1922                 goto outdrop;
1923         }
1924         case F_GETPATH:
1925         case F_GETPATH_NOFIRMLINK: {
1926                 char *pathbufp;
1927                 int pathlen;
1928
1929                 if (fp->f_type != DTYPE_VNODE) {
1930                         error = EBADF;
1931                         goto out;
1932                 }
1933                 vp = (struct vnode *)fp->f_data;
1934                 proc_fdunlock(p);
1935
1936                 pathlen = MAXPATHLEN;
1937                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1938                 if (pathbufp == NULL) {
1939                         error = ENOMEM;
1940                         goto outdrop;
1941                 }
1942                 if ((error = vnode_getwithref(vp)) == 0) {
1943                         if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1944                                 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1945                         } else {
1946                                 error = vn_getpath(vp, pathbufp, &pathlen);
1947                         }
1948                         (void)vnode_put(vp);
1949
1950                         if (error == 0) {
1951                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
1952                         }
1953                 }
1954                 FREE(pathbufp, M_TEMP);
1955                 goto outdrop;
1956         }
1957
1958         case F_PATHPKG_CHECK: {
1959                 char *pathbufp;
1960                 size_t pathlen;
1961
1962                 if (fp->f_type != DTYPE_VNODE) {
1963                         error = EBADF;
1964                         goto out;
1965                 }
1966                 vp = (struct vnode *)fp->f_data;
1967                 proc_fdunlock(p);
1968
1969                 pathlen = MAXPATHLEN;
1970                 pathbufp = zalloc(ZV_NAMEI);
1971
1972                 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1973                         if ((error = vnode_getwithref(vp)) == 0) {
1974                                 AUDIT_ARG(text, pathbufp);
1975                                 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
1976
1977                                 (void)vnode_put(vp);
1978                         }
1979                 }
1980                 zfree(ZV_NAMEI, pathbufp);
1981                 goto outdrop;
1982         }
1983
1984         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1985         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
1986         case F_BARRIERFSYNC:  // fsync + barrier
1987         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1988         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1989                 if (fp->f_type != DTYPE_VNODE) {
1990                         error = EBADF;
1991                         goto out;
1992                 }
1993                 vp = (struct vnode *)fp->f_data;
1994                 proc_fdunlock(p);
1995
1996                 if ((error = vnode_getwithref(vp)) == 0) {
1997                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1998
1999                         (void)vnode_put(vp);
2000                 }
2001                 break;
2002         }
2003
2004         /*
2005          * SPI (private) for opening a file starting from a dir fd
2006          */
2007         case F_OPENFROM: {
2008                 struct user_fopenfrom fopen;
2009                 struct vnode_attr va;
2010                 struct nameidata nd;
2011                 int cmode;
2012
2013                 /* Check if this isn't a valid file descriptor */
2014                 if ((fp->f_type != DTYPE_VNODE) ||
2015                     (fp->f_flag & FREAD) == 0) {
2016                         error = EBADF;
2017                         goto out;
2018                 }
2019                 vp = (struct vnode *)fp->f_data;
2020                 proc_fdunlock(p);
2021
2022                 if (vnode_getwithref(vp)) {
2023                         error = ENOENT;
2024                         goto outdrop;
2025                 }
2026
2027                 /* Only valid for directories */
2028                 if (vp->v_type != VDIR) {
2029                         vnode_put(vp);
2030                         error = ENOTDIR;
2031                         goto outdrop;
2032                 }
2033
2034                 /*
2035                  * Only entitled apps may use the credentials of the thread
2036                  * that opened the file descriptor.
2037                  * Non-entitled threads will use their own context.
2038                  */
2039                 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2040                         has_entitlement = 1;
2041                 }
2042
2043                 /* Get flags, mode and pathname arguments. */
2044                 if (IS_64BIT_PROCESS(p)) {
2045                         error = copyin(argp, &fopen, sizeof(fopen));
2046                 } else {
2047                         struct user32_fopenfrom fopen32;
2048
2049                         error = copyin(argp, &fopen32, sizeof(fopen32));
2050                         fopen.o_flags = fopen32.o_flags;
2051                         fopen.o_mode = fopen32.o_mode;
2052                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2053                 }
2054                 if (error) {
2055                         vnode_put(vp);
2056                         goto outdrop;
2057                 }
2058                 AUDIT_ARG(fflags, fopen.o_flags);
2059                 AUDIT_ARG(mode, fopen.o_mode);
2060                 VATTR_INIT(&va);
2061                 /* Mask off all but regular access permissions */
2062                 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2063                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2064
2065                 /* Start the lookup relative to the file descriptor's vnode. */
2066                 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2067                     fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2068                 nd.ni_dvp = vp;
2069
2070                 error = open1(has_entitlement ? &context : vfs_context_current(),
2071                     &nd, fopen.o_flags, &va, fileproc_alloc_init, NULL, retval);
2072
2073                 vnode_put(vp);
2074                 break;
2075         }
2076         /*
2077          * SPI (private) for unlinking a file starting from a dir fd
2078          */
2079         case F_UNLINKFROM: {
2080                 user_addr_t pathname;
2081
2082                 /* Check if this isn't a valid file descriptor */
2083                 if ((fp->f_type != DTYPE_VNODE) ||
2084                     (fp->f_flag & FREAD) == 0) {
2085                         error = EBADF;
2086                         goto out;
2087                 }
2088                 vp = (struct vnode *)fp->f_data;
2089                 proc_fdunlock(p);
2090
2091                 if (vnode_getwithref(vp)) {
2092                         error = ENOENT;
2093                         goto outdrop;
2094                 }
2095
2096                 /* Only valid for directories */
2097                 if (vp->v_type != VDIR) {
2098                         vnode_put(vp);
2099                         error = ENOTDIR;
2100                         goto outdrop;
2101                 }
2102
2103                 /*
2104                  * Only entitled apps may use the credentials of the thread
2105                  * that opened the file descriptor.
2106                  * Non-entitled threads will use their own context.
2107                  */
2108                 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2109                         has_entitlement = 1;
2110                 }
2111
2112                 /* Get flags, mode and pathname arguments. */
2113                 if (IS_64BIT_PROCESS(p)) {
2114                         pathname = (user_addr_t)argp;
2115                 } else {
2116                         pathname = CAST_USER_ADDR_T(argp);
2117                 }
2118
2119                 /* Start the lookup relative to the file descriptor's vnode. */
2120                 error = unlink1(has_entitlement ? &context : vfs_context_current(),
2121                     vp, pathname, UIO_USERSPACE, 0);
2122
2123                 vnode_put(vp);
2124                 break;
2125         }
2126
2127         case F_ADDSIGS:
2128         case F_ADDFILESIGS:
2129         case F_ADDFILESIGS_FOR_DYLD_SIM:
2130         case F_ADDFILESIGS_RETURN:
2131         case F_ADDFILESIGS_INFO:
2132         {
2133                 struct cs_blob *blob = NULL;
2134                 struct user_fsignatures fs;
2135                 kern_return_t kr;
2136                 vm_offset_t kernel_blob_addr;
2137                 vm_size_t kernel_blob_size;
2138                 int blob_add_flags = 0;
2139                 const size_t sizeof_fs = (uap->cmd == F_ADDFILESIGS_INFO ?
2140                     offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
2141                     offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
2142
2143                 if (fp->f_type != DTYPE_VNODE) {
2144                         error = EBADF;
2145                         goto out;
2146                 }
2147                 vp = (struct vnode *)fp->f_data;
2148                 proc_fdunlock(p);
2149
2150                 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2151                         blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
2152                         if ((p->p_csflags & CS_KILL) == 0) {
2153                                 proc_lock(p);
2154                                 p->p_csflags |= CS_KILL;
2155                                 proc_unlock(p);
2156                         }
2157                 }
2158
2159                 error = vnode_getwithref(vp);
2160                 if (error) {
2161                         goto outdrop;
2162                 }
2163
2164                 if (IS_64BIT_PROCESS(p)) {
2165                         error = copyin(argp, &fs, sizeof_fs);
2166                 } else {
2167                         if (uap->cmd == F_ADDFILESIGS_INFO) {
2168                                 error = EINVAL;
2169                                 vnode_put(vp);
2170                                 goto outdrop;
2171                         }
2172
2173                         struct user32_fsignatures fs32;
2174
2175                         error = copyin(argp, &fs32, sizeof(fs32));
2176                         fs.fs_file_start = fs32.fs_file_start;
2177                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
2178                         fs.fs_blob_size = fs32.fs_blob_size;
2179                 }
2180
2181                 if (error) {
2182                         vnode_put(vp);
2183                         goto outdrop;
2184                 }
2185
2186                 /*
2187                  * First check if we have something loaded a this offset
2188                  */
2189                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
2190                 if (blob != NULL) {
2191                         /* If this is for dyld_sim revalidate the blob */
2192                         if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2193                                 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
2194                                 if (error) {
2195                                         blob = NULL;
2196                                         if (error != EAGAIN) {
2197                                                 vnode_put(vp);
2198                                                 goto outdrop;
2199                                         }
2200                                 }
2201                         }
2202                 }
2203
2204                 if (blob == NULL) {
2205                         /*
2206                          * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
2207                          * our use cases for the immediate future, but note that at the time of this commit, some
2208                          * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
2209                          *
2210                          * We should consider how we can manage this more effectively; the above means that some
2211                          * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
2212                          * threshold considered ridiculous at the time of this change.
2213                          */
2214 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
2215                         if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2216                                 error = E2BIG;
2217                                 vnode_put(vp);
2218                                 goto outdrop;
2219                         }
2220
2221                         kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2222                         kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2223                         if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2224                                 error = ENOMEM;
2225                                 vnode_put(vp);
2226                                 goto outdrop;
2227                         }
2228
2229                         if (uap->cmd == F_ADDSIGS) {
2230                                 error = copyin(fs.fs_blob_start,
2231                                     (void *) kernel_blob_addr,
2232                                     fs.fs_blob_size);
2233                         } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
2234                                 int resid;
2235
2236                                 error = vn_rdwr(UIO_READ,
2237                                     vp,
2238                                     (caddr_t) kernel_blob_addr,
2239                                     (int)kernel_blob_size,
2240                                     fs.fs_file_start + fs.fs_blob_start,
2241                                     UIO_SYSSPACE,
2242                                     0,
2243                                     kauth_cred_get(),
2244                                     &resid,
2245                                     p);
2246                                 if ((error == 0) && resid) {
2247                                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2248                                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2249                                 }
2250                         }
2251
2252                         if (error) {
2253                                 ubc_cs_blob_deallocate(kernel_blob_addr,
2254                                     kernel_blob_size);
2255                                 vnode_put(vp);
2256                                 goto outdrop;
2257                         }
2258
2259                         blob = NULL;
2260                         error = ubc_cs_blob_add(vp,
2261                             proc_platform(p),
2262                             CPU_TYPE_ANY,                       /* not for a specific architecture */
2263                             CPU_SUBTYPE_ANY,
2264                             fs.fs_file_start,
2265                             &kernel_blob_addr,
2266                             kernel_blob_size,
2267                             NULL,
2268                             blob_add_flags,
2269                             &blob);
2270
2271                         /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2272                         if (error) {
2273                                 if (kernel_blob_addr) {
2274                                         ubc_cs_blob_deallocate(kernel_blob_addr,
2275                                             kernel_blob_size);
2276                                 }
2277                                 vnode_put(vp);
2278                                 goto outdrop;
2279                         } else {
2280 #if CHECK_CS_VALIDATION_BITMAP
2281                                 ubc_cs_validation_bitmap_allocate( vp );
2282 #endif
2283                         }
2284                 }
2285
2286                 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
2287                     uap->cmd == F_ADDFILESIGS_INFO) {
2288                         /*
2289                          * The first element of the structure is a
2290                          * off_t that happen to have the same size for
2291                          * all archs. Lets overwrite that.
2292                          */
2293                         off_t end_offset = 0;
2294                         if (blob) {
2295                                 end_offset = blob->csb_end_offset;
2296                         }
2297                         error = copyout(&end_offset, argp, sizeof(end_offset));
2298
2299                         if (error) {
2300                                 vnode_put(vp);
2301                                 goto outdrop;
2302                         }
2303                 }
2304
2305                 if (uap->cmd == F_ADDFILESIGS_INFO) {
2306                         /* Return information. What we copy out depends on the size of the
2307                          * passed in structure, to keep binary compatibility. */
2308
2309                         if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
2310                                 // enough room for fs_cdhash[20]+fs_hash_type
2311
2312                                 if (blob != NULL) {
2313                                         error = copyout(blob->csb_cdhash,
2314                                             (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
2315                                             USER_FSIGNATURES_CDHASH_LEN);
2316                                         if (error) {
2317                                                 vnode_put(vp);
2318                                                 goto outdrop;
2319                                         }
2320                                         int hashtype = cs_hash_type(blob->csb_hashtype);
2321                                         error = copyout(&hashtype,
2322                                             (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
2323                                             sizeof(int));
2324                                         if (error) {
2325                                                 vnode_put(vp);
2326                                                 goto outdrop;
2327                                         }
2328                                 }
2329                         }
2330                 }
2331
2332                 (void) vnode_put(vp);
2333                 break;
2334         }
2335 #if CONFIG_SUPPLEMENTAL_SIGNATURES
2336         case F_ADDFILESUPPL:
2337         {
2338                 struct vnode *ivp;
2339                 struct cs_blob *blob = NULL;
2340                 struct user_fsupplement fs;
2341                 int orig_fd;
2342                 struct fileproc* orig_fp = NULL;
2343                 kern_return_t kr;
2344                 vm_offset_t kernel_blob_addr;
2345                 vm_size_t kernel_blob_size;
2346
2347                 if (!IS_64BIT_PROCESS(p)) {
2348                         error = EINVAL;
2349                         goto out; // drop fp and unlock fds
2350                 }
2351
2352                 if (fp->f_type != DTYPE_VNODE) {
2353                         error = EBADF;
2354                         goto out;
2355                 }
2356
2357                 error = copyin(argp, &fs, sizeof(fs));
2358                 if (error) {
2359                         goto out;
2360                 }
2361
2362                 orig_fd = fs.fs_orig_fd;
2363                 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
2364                         printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
2365                         goto out;
2366                 }
2367
2368                 if (orig_fp->f_type != DTYPE_VNODE) {
2369                         error = EBADF;
2370                         fp_drop(p, orig_fd, orig_fp, 1);
2371                         goto out;
2372                 }
2373
2374                 ivp = (struct vnode *)orig_fp->f_data;
2375
2376                 vp = (struct vnode *)fp->f_data;
2377
2378                 proc_fdunlock(p);
2379
2380                 error = vnode_getwithref(ivp);
2381                 if (error) {
2382                         fp_drop(p, orig_fd, orig_fp, 0);
2383                         goto outdrop; //drop fp
2384                 }
2385
2386                 error = vnode_getwithref(vp);
2387                 if (error) {
2388                         vnode_put(ivp);
2389                         fp_drop(p, orig_fd, orig_fp, 0);
2390                         goto outdrop;
2391                 }
2392
2393                 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2394                         error = E2BIG;
2395                         goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
2396                 }
2397
2398                 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2399                 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2400                 if (kr != KERN_SUCCESS) {
2401                         error = ENOMEM;
2402                         goto dropboth;
2403                 }
2404
2405                 int resid;
2406                 error = vn_rdwr(UIO_READ, vp,
2407                     (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
2408                     fs.fs_file_start + fs.fs_blob_start,
2409                     UIO_SYSSPACE, 0,
2410                     kauth_cred_get(), &resid, p);
2411                 if ((error == 0) && resid) {
2412                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2413                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2414                 }
2415
2416                 if (error) {
2417                         ubc_cs_blob_deallocate(kernel_blob_addr,
2418                             kernel_blob_size);
2419                         goto dropboth;
2420                 }
2421
2422                 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
2423                     &kernel_blob_addr, kernel_blob_size, &blob);
2424
2425                 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
2426                 if (error) {
2427                         if (kernel_blob_addr) {
2428                                 ubc_cs_blob_deallocate(kernel_blob_addr,
2429                                     kernel_blob_size);
2430                         }
2431                         goto dropboth;
2432                 }
2433                 vnode_put(ivp);
2434                 vnode_put(vp);
2435                 fp_drop(p, orig_fd, orig_fp, 0);
2436                 break;
2437
2438 dropboth:
2439                 vnode_put(ivp);
2440                 vnode_put(vp);
2441                 fp_drop(p, orig_fd, orig_fp, 0);
2442                 goto outdrop;
2443         }
2444 #endif
2445         case F_GETCODEDIR:
2446         case F_FINDSIGS: {
2447                 error = ENOTSUP;
2448                 goto out;
2449         }
2450         case F_CHECK_LV: {
2451                 struct fileglob *fg;
2452                 fchecklv_t lv = {};
2453
2454                 if (fp->f_type != DTYPE_VNODE) {
2455                         error = EBADF;
2456                         goto out;
2457                 }
2458                 fg = fp->fp_glob;
2459                 proc_fdunlock(p);
2460
2461                 if (IS_64BIT_PROCESS(p)) {
2462                         error = copyin(argp, &lv, sizeof(lv));
2463                 } else {
2464                         struct user32_fchecklv lv32 = {};
2465
2466                         error = copyin(argp, &lv32, sizeof(lv32));
2467                         lv.lv_file_start = lv32.lv_file_start;
2468                         lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2469                         lv.lv_error_message_size = lv32.lv_error_message_size;
2470                 }
2471                 if (error) {
2472                         goto outdrop;
2473                 }
2474
2475 #if CONFIG_MACF
2476                 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2477                     (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2478 #endif
2479
2480                 break;
2481         }
2482         case F_GETSIGSINFO: {
2483                 struct cs_blob *blob = NULL;
2484                 fgetsigsinfo_t sigsinfo = {};
2485
2486                 if (fp->f_type != DTYPE_VNODE) {
2487                         error = EBADF;
2488                         goto out;
2489                 }
2490                 vp = (struct vnode *)fp->f_data;
2491                 proc_fdunlock(p);
2492
2493                 error = vnode_getwithref(vp);
2494                 if (error) {
2495                         goto outdrop;
2496                 }
2497
2498                 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
2499                 if (error) {
2500                         vnode_put(vp);
2501                         goto outdrop;
2502                 }
2503
2504                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
2505                 if (blob == NULL) {
2506                         error = ENOENT;
2507                         vnode_put(vp);
2508                         goto outdrop;
2509                 }
2510                 switch (sigsinfo.fg_info_request) {
2511                 case GETSIGSINFO_PLATFORM_BINARY:
2512                         sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
2513                         error = copyout(&sigsinfo.fg_sig_is_platform,
2514                             (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
2515                             sizeof(sigsinfo.fg_sig_is_platform));
2516                         if (error) {
2517                                 vnode_put(vp);
2518                                 goto outdrop;
2519                         }
2520                         break;
2521                 default:
2522                         error = EINVAL;
2523                         vnode_put(vp);
2524                         goto outdrop;
2525                 }
2526                 vnode_put(vp);
2527                 break;
2528         }
2529 #if CONFIG_PROTECT
2530         case F_GETPROTECTIONCLASS: {
2531                 if (fp->f_type != DTYPE_VNODE) {
2532                         error = EBADF;
2533                         goto out;
2534                 }
2535                 vp = (struct vnode *)fp->f_data;
2536
2537                 proc_fdunlock(p);
2538
2539                 if (vnode_getwithref(vp)) {
2540                         error = ENOENT;
2541                         goto outdrop;
2542                 }
2543
2544                 struct vnode_attr va;
2545
2546                 VATTR_INIT(&va);
2547                 VATTR_WANTED(&va, va_dataprotect_class);
2548                 error = VNOP_GETATTR(vp, &va, &context);
2549                 if (!error) {
2550                         if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2551                                 *retval = va.va_dataprotect_class;
2552                         } else {
2553                                 error = ENOTSUP;
2554                         }
2555                 }
2556
2557                 vnode_put(vp);
2558                 break;
2559         }
2560
2561         case F_SETPROTECTIONCLASS: {
2562                 /* tmp must be a valid PROTECTION_CLASS_* */
2563                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2564
2565                 if (fp->f_type != DTYPE_VNODE) {
2566                         error = EBADF;
2567                         goto out;
2568                 }
2569                 vp = (struct vnode *)fp->f_data;
2570
2571                 proc_fdunlock(p);
2572
2573                 if (vnode_getwithref(vp)) {
2574                         error = ENOENT;
2575                         goto outdrop;
2576                 }
2577
2578                 /* Only go forward if you have write access */
2579                 vfs_context_t ctx = vfs_context_current();
2580                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2581                         vnode_put(vp);
2582                         error = EBADF;
2583                         goto outdrop;
2584                 }
2585
2586                 struct vnode_attr va;
2587
2588                 VATTR_INIT(&va);
2589                 VATTR_SET(&va, va_dataprotect_class, tmp);
2590
2591                 error = VNOP_SETATTR(vp, &va, ctx);
2592
2593                 vnode_put(vp);
2594                 break;
2595         }
2596
2597         case F_TRANSCODEKEY: {
2598                 if (fp->f_type != DTYPE_VNODE) {
2599                         error = EBADF;
2600                         goto out;
2601                 }
2602
2603                 vp = (struct vnode *)fp->f_data;
2604                 proc_fdunlock(p);
2605
2606                 if (vnode_getwithref(vp)) {
2607                         error = ENOENT;
2608                         goto outdrop;
2609                 }
2610
2611                 cp_key_t k = {
2612                         .len = CP_MAX_WRAPPEDKEYSIZE,
2613                 };
2614
2615                 MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
2616
2617                 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2618
2619                 vnode_put(vp);
2620
2621                 if (error == 0) {
2622                         error = copyout(k.key, argp, k.len);
2623                         *retval = k.len;
2624                 }
2625
2626                 FREE(k.key, M_TEMP);
2627
2628                 break;
2629         }
2630
2631         case F_GETPROTECTIONLEVEL:  {
2632                 if (fp->f_type != DTYPE_VNODE) {
2633                         error = EBADF;
2634                         goto out;
2635                 }
2636
2637                 vp = (struct vnode*) fp->f_data;
2638                 proc_fdunlock(p);
2639
2640                 if (vnode_getwithref(vp)) {
2641                         error = ENOENT;
2642                         goto outdrop;
2643                 }
2644
2645                 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2646
2647                 vnode_put(vp);
2648                 break;
2649         }
2650
2651         case F_GETDEFAULTPROTLEVEL:  {
2652                 if (fp->f_type != DTYPE_VNODE) {
2653                         error = EBADF;
2654                         goto out;
2655                 }
2656
2657                 vp = (struct vnode*) fp->f_data;
2658                 proc_fdunlock(p);
2659
2660                 if (vnode_getwithref(vp)) {
2661                         error = ENOENT;
2662                         goto outdrop;
2663                 }
2664
2665                 /*
2666                  * if cp_get_major_vers fails, error will be set to proper errno
2667                  * and cp_version will still be 0.
2668                  */
2669
2670                 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2671
2672                 vnode_put(vp);
2673                 break;
2674         }
2675
2676 #endif /* CONFIG_PROTECT */
2677
2678         case F_MOVEDATAEXTENTS: {
2679                 struct fileproc *fp2 = NULL;
2680                 struct vnode *src_vp = NULLVP;
2681                 struct vnode *dst_vp = NULLVP;
2682                 /* We need to grab the 2nd FD out of the argments before moving on. */
2683                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2684
2685                 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2686                 if (error) {
2687                         goto out;
2688                 }
2689
2690                 if (fp->f_type != DTYPE_VNODE) {
2691                         error = EBADF;
2692                         goto out;
2693                 }
2694
2695                 /*
2696                  * For now, special case HFS+ and APFS only, since this
2697                  * is SPI.
2698                  */
2699                 src_vp = (struct vnode *)fp->f_data;
2700                 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2701                         error = ENOTSUP;
2702                         goto out;
2703                 }
2704
2705                 /*
2706                  * Get the references before we start acquiring iocounts on the vnodes,
2707                  * while we still hold the proc fd lock
2708                  */
2709                 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2710                         error = EBADF;
2711                         goto out;
2712                 }
2713                 if (fp2->f_type != DTYPE_VNODE) {
2714                         fp_drop(p, fd2, fp2, 1);
2715                         error = EBADF;
2716                         goto out;
2717                 }
2718                 dst_vp = (struct vnode *)fp2->f_data;
2719                 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2720                         fp_drop(p, fd2, fp2, 1);
2721                         error = ENOTSUP;
2722                         goto out;
2723                 }
2724
2725 #if CONFIG_MACF
2726                 /* Re-do MAC checks against the new FD, pass in a fake argument */
2727                 error = mac_file_check_fcntl(proc_ucred(p), fp2->fp_glob, uap->cmd, 0);
2728                 if (error) {
2729                         fp_drop(p, fd2, fp2, 1);
2730                         goto out;
2731                 }
2732 #endif
2733                 /* Audit the 2nd FD */
2734                 AUDIT_ARG(fd, fd2);
2735
2736                 proc_fdunlock(p);
2737
2738                 if (vnode_getwithref(src_vp)) {
2739                         fp_drop(p, fd2, fp2, 0);
2740                         error = ENOENT;
2741                         goto outdrop;
2742                 }
2743                 if (vnode_getwithref(dst_vp)) {
2744                         vnode_put(src_vp);
2745                         fp_drop(p, fd2, fp2, 0);
2746                         error = ENOENT;
2747                         goto outdrop;
2748                 }
2749
2750                 /*
2751                  * Basic asserts; validate they are not the same and that
2752                  * both live on the same filesystem.
2753                  */
2754                 if (dst_vp == src_vp) {
2755                         vnode_put(src_vp);
2756                         vnode_put(dst_vp);
2757                         fp_drop(p, fd2, fp2, 0);
2758                         error = EINVAL;
2759                         goto outdrop;
2760                 }
2761
2762                 if (dst_vp->v_mount != src_vp->v_mount) {
2763                         vnode_put(src_vp);
2764                         vnode_put(dst_vp);
2765                         fp_drop(p, fd2, fp2, 0);
2766                         error = EXDEV;
2767                         goto outdrop;
2768                 }
2769
2770                 /* Now we have a legit pair of FDs.  Go to work */
2771
2772                 /* Now check for write access to the target files */
2773                 if (vnode_authorize(src_vp, NULLVP,
2774                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2775                         vnode_put(src_vp);
2776                         vnode_put(dst_vp);
2777                         fp_drop(p, fd2, fp2, 0);
2778                         error = EBADF;
2779                         goto outdrop;
2780                 }
2781
2782                 if (vnode_authorize(dst_vp, NULLVP,
2783                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2784                         vnode_put(src_vp);
2785                         vnode_put(dst_vp);
2786                         fp_drop(p, fd2, fp2, 0);
2787                         error = EBADF;
2788                         goto outdrop;
2789                 }
2790
2791                 /* Verify that both vps point to files and not directories */
2792                 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2793                         error = EINVAL;
2794                         vnode_put(src_vp);
2795                         vnode_put(dst_vp);
2796                         fp_drop(p, fd2, fp2, 0);
2797                         goto outdrop;
2798                 }
2799
2800                 /*
2801                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2802                  * We'll pass in our special bit indicating that the new behavior is expected
2803                  */
2804
2805                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2806
2807                 vnode_put(src_vp);
2808                 vnode_put(dst_vp);
2809                 fp_drop(p, fd2, fp2, 0);
2810                 break;
2811         }
2812
2813         /*
2814          * SPI for making a file compressed.
2815          */
2816         case F_MAKECOMPRESSED: {
2817                 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2818
2819                 if (fp->f_type != DTYPE_VNODE) {
2820                         error = EBADF;
2821                         goto out;
2822                 }
2823
2824                 vp = (struct vnode*) fp->f_data;
2825                 proc_fdunlock(p);
2826
2827                 /* get the vnode */
2828                 if (vnode_getwithref(vp)) {
2829                         error = ENOENT;
2830                         goto outdrop;
2831                 }
2832
2833                 /* Is it a file? */
2834                 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2835                         vnode_put(vp);
2836                         error = EBADF;
2837                         goto outdrop;
2838                 }
2839
2840                 /* invoke ioctl to pass off to FS */
2841                 /* Only go forward if you have write access */
2842                 vfs_context_t ctx = vfs_context_current();
2843                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2844                         vnode_put(vp);
2845                         error = EBADF;
2846                         goto outdrop;
2847                 }
2848
2849                 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2850
2851                 vnode_put(vp);
2852                 break;
2853         }
2854
2855         /*
2856          * SPI (private) for indicating to a filesystem that subsequent writes to
2857          * the open FD will written to the Fastflow.
2858          */
2859         case F_SET_GREEDY_MODE:
2860         /* intentionally drop through to the same handler as F_SETSTATIC.
2861          * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2862          */
2863
2864         /*
2865          * SPI (private) for indicating to a filesystem that subsequent writes to
2866          * the open FD will represent static content.
2867          */
2868         case F_SETSTATICCONTENT: {
2869                 caddr_t ioctl_arg = NULL;
2870
2871                 if (uap->arg) {
2872                         ioctl_arg = (caddr_t) 1;
2873                 }
2874
2875                 if (fp->f_type != DTYPE_VNODE) {
2876                         error = EBADF;
2877                         goto out;
2878                 }
2879                 vp = (struct vnode *)fp->f_data;
2880                 proc_fdunlock(p);
2881
2882                 error = vnode_getwithref(vp);
2883                 if (error) {
2884                         error = ENOENT;
2885                         goto outdrop;
2886                 }
2887
2888                 /* Only go forward if you have write access */
2889                 vfs_context_t ctx = vfs_context_current();
2890                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2891                         vnode_put(vp);
2892                         error = EBADF;
2893                         goto outdrop;
2894                 }
2895
2896                 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2897                 (void)vnode_put(vp);
2898
2899                 break;
2900         }
2901
2902         /*
2903          * SPI (private) for indicating to the lower level storage driver that the
2904          * subsequent writes should be of a particular IO type (burst, greedy, static),
2905          * or other flavors that may be necessary.
2906          */
2907         case F_SETIOTYPE: {
2908                 caddr_t param_ptr;
2909                 uint32_t param;
2910
2911                 if (uap->arg) {
2912                         /* extract 32 bits of flags from userland */
2913                         param_ptr = (caddr_t) uap->arg;
2914                         param = (uint32_t) param_ptr;
2915                 } else {
2916                         /* If no argument is specified, error out */
2917                         error = EINVAL;
2918                         goto out;
2919                 }
2920
2921                 /*
2922                  * Validate the different types of flags that can be specified:
2923                  * all of them are mutually exclusive for now.
2924                  */
2925                 switch (param) {
2926                 case F_IOTYPE_ISOCHRONOUS:
2927                         break;
2928
2929                 default:
2930                         error = EINVAL;
2931                         goto out;
2932                 }
2933
2934
2935                 if (fp->f_type != DTYPE_VNODE) {
2936                         error = EBADF;
2937                         goto out;
2938                 }
2939                 vp = (struct vnode *)fp->f_data;
2940                 proc_fdunlock(p);
2941
2942                 error = vnode_getwithref(vp);
2943                 if (error) {
2944                         error = ENOENT;
2945                         goto outdrop;
2946                 }
2947
2948                 /* Only go forward if you have write access */
2949                 vfs_context_t ctx = vfs_context_current();
2950                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2951                         vnode_put(vp);
2952                         error = EBADF;
2953                         goto outdrop;
2954                 }
2955
2956                 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2957                 (void)vnode_put(vp);
2958
2959                 break;
2960         }
2961
2962         /*
2963          * Set the vnode pointed to by 'fd'
2964          * and tag it as the (potentially future) backing store
2965          * for another filesystem
2966          */
2967         case F_SETBACKINGSTORE: {
2968                 if (fp->f_type != DTYPE_VNODE) {
2969                         error = EBADF;
2970                         goto out;
2971                 }
2972
2973                 vp = (struct vnode *)fp->f_data;
2974
2975                 if (vp->v_tag != VT_HFS) {
2976                         error = EINVAL;
2977                         goto out;
2978                 }
2979                 proc_fdunlock(p);
2980
2981                 if (vnode_getwithref(vp)) {
2982                         error = ENOENT;
2983                         goto outdrop;
2984                 }
2985
2986                 /* only proceed if you have write access */
2987                 vfs_context_t ctx = vfs_context_current();
2988                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2989                         vnode_put(vp);
2990                         error = EBADF;
2991                         goto outdrop;
2992                 }
2993
2994
2995                 /* If arg != 0, set, otherwise unset */
2996                 if (uap->arg) {
2997                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2998                 } else {
2999                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
3000                 }
3001
3002                 vnode_put(vp);
3003                 break;
3004         }
3005
3006         /*
3007          * like F_GETPATH, but special semantics for
3008          * the mobile time machine handler.
3009          */
3010         case F_GETPATH_MTMINFO: {
3011                 char *pathbufp;
3012                 int pathlen;
3013
3014                 if (fp->f_type != DTYPE_VNODE) {
3015                         error = EBADF;
3016                         goto out;
3017                 }
3018                 vp = (struct vnode *)fp->f_data;
3019                 proc_fdunlock(p);
3020
3021                 pathlen = MAXPATHLEN;
3022                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
3023                 if (pathbufp == NULL) {
3024                         error = ENOMEM;
3025                         goto outdrop;
3026                 }
3027                 if ((error = vnode_getwithref(vp)) == 0) {
3028                         int backingstore = 0;
3029
3030                         /* Check for error from vn_getpath before moving on */
3031                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
3032                                 if (vp->v_tag == VT_HFS) {
3033                                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
3034                                 }
3035                                 (void)vnode_put(vp);
3036
3037                                 if (error == 0) {
3038                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
3039                                 }
3040                                 if (error == 0) {
3041                                         /*
3042                                          * If the copyout was successful, now check to ensure
3043                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
3044                                          * wants the path regardless.
3045                                          */
3046                                         if (backingstore) {
3047                                                 error = EBUSY;
3048                                         }
3049                                 }
3050                         } else {
3051                                 (void)vnode_put(vp);
3052                         }
3053                 }
3054                 FREE(pathbufp, M_TEMP);
3055                 goto outdrop;
3056         }
3057
3058 #if DEBUG || DEVELOPMENT
3059         case F_RECYCLE:
3060                 if (fp->f_type != DTYPE_VNODE) {
3061                         error = EBADF;
3062                         goto out;
3063                 }
3064                 vp = (struct vnode *)fp->f_data;
3065                 proc_fdunlock(p);
3066
3067                 vnode_recycle(vp);
3068                 break;
3069 #endif
3070
3071         default:
3072                 /*
3073                  * This is an fcntl() that we d not recognize at this level;
3074                  * if this is a vnode, we send it down into the VNOP_IOCTL
3075                  * for this vnode; this can include special devices, and will
3076                  * effectively overload fcntl() to send ioctl()'s.
3077                  */
3078                 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
3079                         error = EINVAL;
3080                         goto out;
3081                 }
3082
3083                 /* Catch any now-invalid fcntl() selectors */
3084                 switch (uap->cmd) {
3085                 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
3086                 case (int)FSIOC_FIOSEEKHOLE:
3087                 case (int)FSIOC_FIOSEEKDATA:
3088                 case (int)FSIOC_CAS_BSDFLAGS:
3089                 case HFS_GET_BOOT_INFO:
3090                 case HFS_SET_BOOT_INFO:
3091                 case FIOPINSWAP:
3092                 case F_MARKDEPENDENCY:
3093                 case TIOCREVOKE:
3094                 case TIOCREVOKECLEAR:
3095                         error = EINVAL;
3096                         goto out;
3097                 default:
3098                         break;
3099                 }
3100
3101                 if (fp->f_type != DTYPE_VNODE) {
3102                         error = EBADF;
3103                         goto out;
3104                 }
3105                 vp = (struct vnode *)fp->f_data;
3106                 proc_fdunlock(p);
3107
3108                 if ((error = vnode_getwithref(vp)) == 0) {
3109 #define STK_PARAMS 128
3110                         char stkbuf[STK_PARAMS] = {0};
3111                         unsigned int size;
3112                         caddr_t data, memp;
3113                         /*
3114                          * For this to work properly, we have to copy in the
3115                          * ioctl() cmd argument if there is one; we must also
3116                          * check that a command parameter, if present, does
3117                          * not exceed the maximum command length dictated by
3118                          * the number of bits we have available in the command
3119                          * to represent a structure length.  Finally, we have
3120                          * to copy the results back out, if it is that type of
3121                          * ioctl().
3122                          */
3123                         size = IOCPARM_LEN(uap->cmd);
3124                         if (size > IOCPARM_MAX) {
3125                                 (void)vnode_put(vp);
3126                                 error = EINVAL;
3127                                 break;
3128                         }
3129
3130                         memp = NULL;
3131                         if (size > sizeof(stkbuf)) {
3132                                 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
3133                                 if (memp == 0) {
3134                                         (void)vnode_put(vp);
3135                                         error = ENOMEM;
3136                                         goto outdrop;
3137                                 }
3138                                 data = memp;
3139                         } else {
3140                                 data = &stkbuf[0];
3141                         }
3142
3143                         if (uap->cmd & IOC_IN) {
3144                                 if (size) {
3145                                         /* structure */
3146                                         error = copyin(argp, data, size);
3147                                         if (error) {
3148                                                 (void)vnode_put(vp);
3149                                                 if (memp) {
3150                                                         kheap_free(KHEAP_TEMP, memp, size);
3151                                                 }
3152                                                 goto outdrop;
3153                                         }
3154
3155                                         /* Bzero the section beyond that which was needed */
3156                                         if (size <= sizeof(stkbuf)) {
3157                                                 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
3158                                         }
3159                                 } else {
3160                                         /* int */
3161                                         if (is64bit) {
3162                                                 *(user_addr_t *)data = argp;
3163                                         } else {
3164                                                 *(uint32_t *)data = (uint32_t)argp;
3165                                         }
3166                                 };
3167                         } else if ((uap->cmd & IOC_OUT) && size) {
3168                                 /*
3169                                  * Zero the buffer so the user always
3170                                  * gets back something deterministic.
3171                                  */
3172                                 bzero(data, size);
3173                         } else if (uap->cmd & IOC_VOID) {
3174                                 if (is64bit) {
3175                                         *(user_addr_t *)data = argp;
3176                                 } else {
3177                                         *(uint32_t *)data = (uint32_t)argp;
3178                                 }
3179                         }
3180
3181                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
3182
3183                         (void)vnode_put(vp);
3184
3185                         /* Copy any output data to user */
3186                         if (error == 0 && (uap->cmd & IOC_OUT) && size) {
3187                                 error = copyout(data, argp, size);
3188                         }
3189                         if (memp) {
3190                                 kheap_free(KHEAP_TEMP, memp, size);
3191                         }
3192                 }
3193                 break;
3194         }
3195
3196 outdrop:
3197         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3198         fp_drop(p, fd, fp, 0);
3199         return error;
3200 out:
3201         fp_drop(p, fd, fp, 1);
3202         proc_fdunlock(p);
3203         return error;
3204 }
3205
3206
3207 /*
3208  * finishdup
3209  *
3210  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
3211  *
3212  * Parameters:  p                               Process performing the dup
3213  *              old                             The fd to dup
3214  *              new                             The fd to dup it to
3215  *              fd_flags                        Flags to augment the new fd
3216  *              retval                          Pointer to the call return area
3217  *
3218  * Returns:     0                               Success
3219  *              EBADF
3220  *              ENOMEM
3221  *
3222  * Implicit returns:
3223  *              *retval (modified)              The new descriptor
3224  *
3225  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
3226  *              the caller
3227  *
3228  * Notes:       This function may drop and reacquire this lock; it is unsafe
3229  *              for a caller to assume that other state protected by the lock
3230  *              has not been subsequently changed out from under it.
3231  */
3232 int
3233 finishdup(proc_t p,
3234     struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
3235 {
3236         struct fileproc *nfp;
3237         struct fileproc *ofp;
3238 #if CONFIG_MACF
3239         int error;
3240 #endif
3241
3242 #if DIAGNOSTIC
3243         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3244 #endif
3245         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
3246             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
3247                 fdrelse(p, new);
3248                 return EBADF;
3249         }
3250
3251 #if CONFIG_MACF
3252         error = mac_file_check_dup(proc_ucred(p), ofp->fp_glob, new);
3253         if (error) {
3254                 fdrelse(p, new);
3255                 return error;
3256         }
3257 #endif
3258
3259         proc_fdunlock(p);
3260
3261         nfp = fileproc_alloc_init(NULL);
3262
3263         proc_fdlock(p);
3264
3265         if (nfp == NULL) {
3266                 fdrelse(p, new);
3267                 return ENOMEM;
3268         }
3269
3270         fg_ref(p, ofp->fp_glob);
3271         nfp->fp_glob = ofp->fp_glob;
3272
3273 #if DIAGNOSTIC
3274         if (fdp->fd_ofiles[new] != 0) {
3275                 panic("finishdup: overwriting fd_ofiles with new %d", new);
3276         }
3277         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
3278                 panic("finishdup: unreserved fileflags with new %d", new);
3279         }
3280 #endif
3281
3282         if (new > fdp->fd_lastfile) {
3283                 fdp->fd_lastfile = new;
3284         }
3285         *fdflags(p, new) |= fd_flags;
3286         procfdtbl_releasefd(p, new, nfp);
3287         *retval = new;
3288         return 0;
3289 }
3290
3291
3292 /*
3293  * sys_close
3294  *
3295  * Description: The implementation of the close(2) system call
3296  *
3297  * Parameters:  p                       Process in whose per process file table
3298  *                                      the close is to occur
3299  *              uap->fd                 fd to be closed
3300  *              retval                  <unused>
3301  *
3302  * Returns:     0                       Success
3303  *      fp_lookup:EBADF                 Bad file descriptor
3304  *      fp_guard_exception:???          Guarded file descriptor
3305  *      close_internal:EBADF
3306  *      close_internal:???              Anything returnable by a per-fileops
3307  *                                      close function
3308  */
3309 int
3310 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
3311 {
3312         __pthread_testcancel(1);
3313         return close_nocancel(p, uap->fd);
3314 }
3315
3316 int
3317 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
3318 {
3319         return close_nocancel(p, uap->fd);
3320 }
3321
3322 int
3323 close_nocancel(proc_t p, int fd)
3324 {
3325         struct fileproc *fp;
3326
3327         AUDIT_SYSCLOSE(p, fd);
3328
3329         proc_fdlock(p);
3330         if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
3331                 proc_fdunlock(p);
3332                 return EBADF;
3333         }
3334
3335         if (fp_isguarded(fp, GUARD_CLOSE)) {
3336                 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
3337                 proc_fdunlock(p);
3338                 return error;
3339         }
3340
3341         return fp_close_and_unlock(p, fd, fp, 0);
3342 }
3343
3344
3345 int
3346 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
3347 {
3348         struct filedesc *fdp = p->p_fd;
3349         struct fileglob *fg = fp->fp_glob;
3350
3351 #if DIAGNOSTIC
3352         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3353 #endif
3354
3355         /*
3356          * Keep most people from finding the filedesc while we are closing it.
3357          *
3358          * Callers are:
3359          *
3360          * - dup2() which always waits for UF_RESERVED to clear
3361          *
3362          * - close/guarded_close/... who will fail the fileproc lookup if
3363          *   UF_RESERVED is set,
3364          *
3365          * - fdexec()/fdfree() who only run once all threads in the proc
3366          *   are properly canceled, hence no fileproc in this proc should
3367          *   be in flux.
3368          *
3369          * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
3370          *
3371          * Callers of fp_get_noref_locked_with_iocount() can still find
3372          * this entry so that they can drop their I/O reference despite
3373          * not having remembered the fileproc pointer (namely select() and
3374          * file_drop()).
3375          */
3376         if (p->p_fd->fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
3377                 panic("%s: called with fileproc in flux (%d/:%p)",
3378                     __func__, fd, fp);
3379         }
3380         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
3381
3382         if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
3383                 proc_fdunlock(p);
3384
3385                 if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
3386                         /*
3387                          * call out to allow 3rd party notification of close.
3388                          * Ignore result of kauth_authorize_fileop call.
3389                          */
3390                         if (vnode_getwithref((vnode_t)fg->fg_data) == 0) {
3391                                 u_int   fileop_flags = 0;
3392                                 if (fg->fg_flag & FWASWRITTEN) {
3393                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
3394                                 }
3395                                 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
3396                                     (uintptr_t)fg->fg_data, (uintptr_t)fileop_flags);
3397 #if CONFIG_MACF
3398                                 mac_file_notify_close(proc_ucred(p), fp->fp_glob);
3399 #endif
3400                                 vnode_put((vnode_t)fg->fg_data);
3401                         }
3402                 }
3403                 if (fp->fp_flags & FP_AIOISSUED) {
3404                         /*
3405                          * cancel all async IO requests that can be cancelled.
3406                          */
3407                         _aio_close( p, fd );
3408                 }
3409
3410                 proc_fdlock(p);
3411         }
3412
3413         if (fd < fdp->fd_knlistsize) {
3414                 knote_fdclose(p, fd);
3415         }
3416
3417         fileproc_drain(p, fp);
3418
3419         if (flags & FD_DUP2RESV) {
3420                 fdp->fd_ofiles[fd] = NULL;
3421                 fdp->fd_ofileflags[fd] &= ~(UF_CLOSING | UF_EXCLOSE | UF_FORKCLOSE);
3422         } else {
3423                 fdrelse(p, fd);
3424         }
3425
3426         proc_fdunlock(p);
3427
3428         if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
3429                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3430                     fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg->fg_data));
3431         }
3432
3433         fileproc_free(fp);
3434
3435         return fg_drop(p, fg);
3436 }
3437
3438
3439 /*
3440  * fstat
3441  *
3442  * Description: Return status information about a file descriptor.
3443  *
3444  * Parameters:  p                               The process doing the fstat
3445  *              fd                              The fd to stat
3446  *              ub                              The user stat buffer
3447  *              xsecurity                       The user extended security
3448  *                                              buffer, or 0 if none
3449  *              xsecurity_size                  The size of xsecurity, or 0
3450  *                                              if no xsecurity
3451  *              isstat64                        Flag to indicate 64 bit version
3452  *                                              for inode size, etc.
3453  *
3454  * Returns:     0                               Success
3455  *              EBADF
3456  *              EFAULT
3457  *      fp_lookup:EBADF                         Bad file descriptor
3458  *      vnode_getwithref:???
3459  *      copyout:EFAULT
3460  *      vnode_getwithref:???
3461  *      vn_stat:???
3462  *      soo_stat:???
3463  *      pipe_stat:???
3464  *      pshm_stat:???
3465  *      kqueue_stat:???
3466  *
3467  * Notes:       Internal implementation for all other fstat() related
3468  *              functions
3469  *
3470  *              XXX switch on node type is bogus; need a stat in struct
3471  *              XXX fileops instead.
3472  */
3473 static int
3474 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3475 {
3476         struct fileproc *fp;
3477         union {
3478                 struct stat sb;
3479                 struct stat64 sb64;
3480         } source;
3481         union {
3482                 struct user64_stat user64_sb;
3483                 struct user32_stat user32_sb;
3484                 struct user64_stat64 user64_sb64;
3485                 struct user32_stat64 user32_sb64;
3486         } dest;
3487         int error, my_size;
3488         file_type_t type;
3489         caddr_t data;
3490         kauth_filesec_t fsec;
3491         user_size_t xsecurity_bufsize;
3492         vfs_context_t ctx = vfs_context_current();
3493         void * sbptr;
3494
3495
3496         AUDIT_ARG(fd, fd);
3497
3498         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3499                 return error;
3500         }
3501         type = fp->f_type;
3502         data = fp->f_data;
3503         fsec = KAUTH_FILESEC_NONE;
3504
3505         sbptr = (void *)&source;
3506
3507         switch (type) {
3508         case DTYPE_VNODE:
3509                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3510                         /*
3511                          * If the caller has the file open, and is not
3512                          * requesting extended security information, we are
3513                          * going to let them get the basic stat information.
3514                          */
3515                         if (xsecurity == USER_ADDR_NULL) {
3516                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3517                                     fp->fp_glob->fg_cred);
3518                         } else {
3519                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3520                         }
3521
3522                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3523                         (void)vnode_put((vnode_t)data);
3524                 }
3525                 break;
3526
3527 #if SOCKETS
3528         case DTYPE_SOCKET:
3529                 error = soo_stat((struct socket *)data, sbptr, isstat64);
3530                 break;
3531 #endif /* SOCKETS */
3532
3533         case DTYPE_PIPE:
3534                 error = pipe_stat((void *)data, sbptr, isstat64);
3535                 break;
3536
3537         case DTYPE_PSXSHM:
3538                 error = pshm_stat((void *)data, sbptr, isstat64);
3539                 break;
3540
3541         case DTYPE_KQUEUE:
3542                 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3543                 break;
3544
3545         default:
3546                 error = EBADF;
3547                 goto out;
3548         }
3549         if (error == 0) {
3550                 caddr_t sbp;
3551
3552                 if (isstat64 != 0) {
3553                         source.sb64.st_lspare = 0;
3554                         source.sb64.st_qspare[0] = 0LL;
3555                         source.sb64.st_qspare[1] = 0LL;
3556
3557                         if (IS_64BIT_PROCESS(current_proc())) {
3558                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3559                                 my_size = sizeof(dest.user64_sb64);
3560                                 sbp = (caddr_t)&dest.user64_sb64;
3561                         } else {
3562                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3563                                 my_size = sizeof(dest.user32_sb64);
3564                                 sbp = (caddr_t)&dest.user32_sb64;
3565                         }
3566                 } else {
3567                         source.sb.st_lspare = 0;
3568                         source.sb.st_qspare[0] = 0LL;
3569                         source.sb.st_qspare[1] = 0LL;
3570                         if (IS_64BIT_PROCESS(current_proc())) {
3571                                 munge_user64_stat(&source.sb, &dest.user64_sb);
3572                                 my_size = sizeof(dest.user64_sb);
3573                                 sbp = (caddr_t)&dest.user64_sb;
3574                         } else {
3575                                 munge_user32_stat(&source.sb, &dest.user32_sb);
3576                                 my_size = sizeof(dest.user32_sb);
3577                                 sbp = (caddr_t)&dest.user32_sb;
3578                         }
3579                 }
3580
3581                 error = copyout(sbp, ub, my_size);
3582         }
3583
3584         /* caller wants extended security information? */
3585         if (xsecurity != USER_ADDR_NULL) {
3586                 /* did we get any? */
3587                 if (fsec == KAUTH_FILESEC_NONE) {
3588                         if (susize(xsecurity_size, 0) != 0) {
3589                                 error = EFAULT;
3590                                 goto out;
3591                         }
3592                 } else {
3593                         /* find the user buffer size */
3594                         xsecurity_bufsize = fusize(xsecurity_size);
3595
3596                         /* copy out the actual data size */
3597                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3598                                 error = EFAULT;
3599                                 goto out;
3600                         }
3601
3602                         /* if the caller supplied enough room, copy out to it */
3603                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3604                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3605                         }
3606                 }
3607         }
3608 out:
3609         fp_drop(p, fd, fp, 0);
3610         if (fsec != NULL) {
3611                 kauth_filesec_free(fsec);
3612         }
3613         return error;
3614 }
3615
3616
3617 /*
3618  * sys_fstat_extended
3619  *
3620  * Description: Extended version of fstat supporting returning extended
3621  *              security information
3622  *
3623  * Parameters:  p                               The process doing the fstat
3624  *              uap->fd                         The fd to stat
3625  *              uap->ub                         The user stat buffer
3626  *              uap->xsecurity                  The user extended security
3627  *                                              buffer, or 0 if none
3628  *              uap->xsecurity_size             The size of xsecurity, or 0
3629  *
3630  * Returns:     0                               Success
3631  *              !0                              Errno (see fstat)
3632  */
3633 int
3634 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3635 {
3636         return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3637 }
3638
3639
3640 /*
3641  * sys_fstat
3642  *
3643  * Description: Get file status for the file associated with fd
3644  *
3645  * Parameters:  p                               The process doing the fstat
3646  *              uap->fd                         The fd to stat
3647  *              uap->ub                         The user stat buffer
3648  *
3649  * Returns:     0                               Success
3650  *              !0                              Errno (see fstat)
3651  */
3652 int
3653 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3654 {
3655         return fstat(p, uap->fd, uap->ub, 0, 0, 0);
3656 }
3657
3658
3659 /*
3660  * sys_fstat64_extended
3661  *
3662  * Description: Extended version of fstat64 supporting returning extended
3663  *              security information
3664  *
3665  * Parameters:  p                               The process doing the fstat
3666  *              uap->fd                         The fd to stat
3667  *              uap->ub                         The user stat buffer
3668  *              uap->xsecurity                  The user extended security
3669  *                                              buffer, or 0 if none
3670  *              uap->xsecurity_size             The size of xsecurity, or 0
3671  *
3672  * Returns:     0                               Success
3673  *              !0                              Errno (see fstat)
3674  */
3675 int
3676 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3677 {
3678         return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3679 }
3680
3681
3682 /*
3683  * sys_fstat64
3684  *
3685  * Description: Get 64 bit version of the file status for the file associated
3686  *              with fd
3687  *
3688  * Parameters:  p                               The process doing the fstat
3689  *              uap->fd                         The fd to stat
3690  *              uap->ub                         The user stat buffer
3691  *
3692  * Returns:     0                               Success
3693  *              !0                              Errno (see fstat)
3694  */
3695 int
3696 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3697 {
3698         return fstat(p, uap->fd, uap->ub, 0, 0, 1);
3699 }
3700
3701
3702 /*
3703  * sys_fpathconf
3704  *
3705  * Description: Return pathconf information about a file descriptor.
3706  *
3707  * Parameters:  p                               Process making the request
3708  *              uap->fd                         fd to get information about
3709  *              uap->name                       Name of information desired
3710  *              retval                          Pointer to the call return area
3711  *
3712  * Returns:     0                               Success
3713  *              EINVAL
3714  *      fp_lookup:EBADF                         Bad file descriptor
3715  *      vnode_getwithref:???
3716  *      vn_pathconf:???
3717  *
3718  * Implicit returns:
3719  *              *retval (modified)              Returned information (numeric)
3720  */
3721 int
3722 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3723 {
3724         int fd = uap->fd;
3725         struct fileproc *fp;
3726         struct vnode *vp;
3727         int error = 0;
3728         file_type_t type;
3729         caddr_t data;
3730
3731
3732         AUDIT_ARG(fd, uap->fd);
3733         if ((error = fp_lookup(p, fd, &fp, 0))) {
3734                 return error;
3735         }
3736         type = fp->f_type;
3737         data = fp->f_data;
3738
3739         switch (type) {
3740         case DTYPE_SOCKET:
3741                 if (uap->name != _PC_PIPE_BUF) {
3742                         error = EINVAL;
3743                         goto out;
3744                 }
3745                 *retval = PIPE_BUF;
3746                 error = 0;
3747                 goto out;
3748
3749         case DTYPE_PIPE:
3750                 if (uap->name != _PC_PIPE_BUF) {
3751                         error = EINVAL;
3752                         goto out;
3753                 }
3754                 *retval = PIPE_BUF;
3755                 error = 0;
3756                 goto out;
3757
3758         case DTYPE_VNODE:
3759                 vp = (struct vnode *)data;
3760
3761                 if ((error = vnode_getwithref(vp)) == 0) {
3762                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3763
3764                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3765
3766                         (void)vnode_put(vp);
3767                 }
3768                 goto out;
3769
3770         default:
3771                 error = EINVAL;
3772                 goto out;
3773         }
3774         /*NOTREACHED*/
3775 out:
3776         fp_drop(p, fd, fp, 0);
3777         return error;
3778 }
3779
3780 /*
3781  * Statistics counter for the number of times a process calling fdalloc()
3782  * has resulted in an expansion of the per process open file table.
3783  *
3784  * XXX This would likely be of more use if it were per process
3785  */
3786 int fdexpand;
3787
3788
3789 /*
3790  * fdalloc
3791  *
3792  * Description: Allocate a file descriptor for the process.
3793  *
3794  * Parameters:  p                               Process to allocate the fd in
3795  *              want                            The fd we would prefer to get
3796  *              result                          Pointer to fd we got
3797  *
3798  * Returns:     0                               Success
3799  *              EMFILE
3800  *              ENOMEM
3801  *
3802  * Implicit returns:
3803  *              *result (modified)              The fd which was allocated
3804  */
3805 int
3806 fdalloc(proc_t p, int want, int *result)
3807 {
3808         struct filedesc *fdp = p->p_fd;
3809         int i;
3810         int last, numfiles, oldnfiles;
3811         struct fileproc **newofiles, **ofiles;
3812         char *newofileflags;
3813         rlim_t lim;
3814         rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3815
3816         nofile = MIN(nofile, INT_MAX);
3817
3818         /*
3819          * Search for a free descriptor starting at the higher
3820          * of want or fd_freefile.  If that fails, consider
3821          * expanding the ofile array.
3822          */
3823 #if DIAGNOSTIC
3824         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3825 #endif
3826
3827         lim = MIN(nofile, maxfilesperproc);
3828         for (;;) {
3829                 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
3830                 if ((i = want) < fdp->fd_freefile) {
3831                         i = fdp->fd_freefile;
3832                 }
3833                 for (; i < last; i++) {
3834                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3835                                 procfdtbl_reservefd(p, i);
3836                                 if (i > fdp->fd_lastfile) {
3837                                         fdp->fd_lastfile = i;
3838                                 }
3839                                 if (want <= fdp->fd_freefile) {
3840                                         fdp->fd_freefile = i;
3841                                 }
3842                                 *result = i;
3843                                 return 0;
3844                         }
3845                 }
3846
3847                 /*
3848                  * No space in current array.  Expand?
3849                  */
3850                 if ((rlim_t)fdp->fd_nfiles >= lim) {
3851                         return EMFILE;
3852                 }
3853                 if (fdp->fd_nfiles < NDEXTENT) {
3854                         numfiles = NDEXTENT;
3855                 } else {
3856                         numfiles = 2 * fdp->fd_nfiles;
3857                 }
3858                 /* Enforce lim */
3859                 if ((rlim_t)numfiles > lim) {
3860                         numfiles = (int)lim;
3861                 }
3862                 proc_fdunlock(p);
3863                 MALLOC(newofiles, struct fileproc **,
3864                     numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3865                 proc_fdlock(p);
3866                 if (newofiles == NULL) {
3867                         return ENOMEM;
3868                 }
3869                 if (fdp->fd_nfiles >= numfiles) {
3870                         FREE(newofiles, M_OFILETABL);
3871                         continue;
3872                 }
3873                 newofileflags = (char *) &newofiles[numfiles];
3874                 /*
3875                  * Copy the existing ofile and ofileflags arrays
3876                  * and zero the new portion of each array.
3877                  */
3878                 oldnfiles = fdp->fd_nfiles;
3879                 (void) memcpy(newofiles, fdp->fd_ofiles,
3880                     oldnfiles * sizeof(*fdp->fd_ofiles));
3881                 (void) memset(&newofiles[oldnfiles], 0,
3882                     (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3883
3884                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3885                     oldnfiles * sizeof(*fdp->fd_ofileflags));
3886                 (void) memset(&newofileflags[oldnfiles], 0,
3887                     (numfiles - oldnfiles) *
3888                     sizeof(*fdp->fd_ofileflags));
3889                 ofiles = fdp->fd_ofiles;
3890                 fdp->fd_ofiles = newofiles;
3891                 fdp->fd_ofileflags = newofileflags;
3892                 fdp->fd_nfiles = numfiles;
3893                 FREE(ofiles, M_OFILETABL);
3894                 fdexpand++;
3895         }
3896 }
3897
3898
3899 /*
3900  * fdavail
3901  *
3902  * Description: Check to see whether n user file descriptors are available
3903  *              to the process p.
3904  *
3905  * Parameters:  p                               Process to check in
3906  *              n                               The number of fd's desired
3907  *
3908  * Returns:     0                               No
3909  *              1                               Yes
3910  *
3911  * Locks:       Assumes proc_fdlock for process is held by the caller
3912  *
3913  * Notes:       The answer only remains valid so long as the proc_fdlock is
3914  *              held by the caller.
3915  */
3916 int
3917 fdavail(proc_t p, int n)
3918 {
3919         struct filedesc *fdp = p->p_fd;
3920         struct fileproc **fpp;
3921         char *flags;
3922         int i;
3923         int lim;
3924         rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3925
3926         lim = (int)MIN(nofile, maxfilesperproc);
3927         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3928                 return 1;
3929         }
3930         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3931         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3932         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3933                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3934                         return 1;
3935                 }
3936         }
3937         return 0;
3938 }
3939
3940
3941 struct fileproc *
3942 fp_get_noref_locked(proc_t p, int fd)
3943 {
3944         struct filedesc *fdp = p->p_fd;
3945         struct fileproc *fp;
3946
3947         if (fd < 0 || fd >= fdp->fd_nfiles ||
3948             (fp = fdp->fd_ofiles[fd]) == NULL ||
3949             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3950                 return NULL;
3951         }
3952         return fp;
3953 }
3954
3955 struct fileproc *
3956 fp_get_noref_locked_with_iocount(proc_t p, int fd)
3957 {
3958         struct filedesc *fdp = p->p_fd;
3959         struct fileproc *fp = NULL;
3960
3961         if (fd < 0 || fd >= fdp->fd_nfiles ||
3962             (fp = fdp->fd_ofiles[fd]) == NULL ||
3963             os_ref_get_count(&fp->fp_iocount) <= 1 ||
3964             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3965             !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
3966                 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
3967                     __func__, fd, fp);
3968         }
3969
3970         return fp;
3971 }
3972
3973 int
3974 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
3975 {
3976         struct filedesc *fdp = p->p_fd;
3977         struct fileproc *fp;
3978
3979         proc_fdlock_spin(p);
3980         if (fd < 0 || fd >= fdp->fd_nfiles ||
3981             (fp = fdp->fd_ofiles[fd]) == NULL ||
3982             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3983                 proc_fdunlock(p);
3984                 return EBADF;
3985         }
3986
3987         if (fp->f_type != ftype) {
3988                 proc_fdunlock(p);
3989                 return err;
3990         }
3991
3992         os_ref_retain_locked(&fp->fp_iocount);
3993         proc_fdunlock(p);
3994
3995         *fpp = fp;
3996         return 0;
3997 }
3998
3999
4000 /*
4001  * fp_getfvp
4002  *
4003  * Description: Get fileproc and vnode pointer for a given fd from the per
4004  *              process open file table of the specified process, and if
4005  *              successful, increment the fp_iocount
4006  *
4007  * Parameters:  p                               Process in which fd lives
4008  *              fd                              fd to get information for
4009  *              resultfp                        Pointer to result fileproc
4010  *                                              pointer area, or 0 if none
4011  *              resultvp                        Pointer to result vnode pointer
4012  *                                              area, or 0 if none
4013  *
4014  * Returns:     0                               Success
4015  *              EBADF                           Bad file descriptor
4016  *              ENOTSUP                         fd does not refer to a vnode
4017  *
4018  * Implicit returns:
4019  *              *resultfp (modified)            Fileproc pointer
4020  *              *resultvp (modified)            vnode pointer
4021  *
4022  * Notes:       The resultfp and resultvp fields are optional, and may be
4023  *              independently specified as NULL to skip returning information
4024  *
4025  * Locks:       Internally takes and releases proc_fdlock
4026  */
4027 int
4028 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
4029 {
4030         struct fileproc *fp;
4031         int error;
4032
4033         error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
4034         if (error == 0) {
4035                 if (resultfp) {
4036                         *resultfp = fp;
4037                 }
4038                 if (resultvp) {
4039                         *resultvp = (struct vnode *)fp->f_data;
4040                 }
4041         }
4042
4043         return error;
4044 }
4045
4046
4047 /*
4048  * fp_get_pipe_id
4049  *
4050  * Description: Get pipe id for a given fd from the per process open file table
4051  *              of the specified process.
4052  *
4053  * Parameters:  p                               Process in which fd lives
4054  *              fd                              fd to get information for
4055  *              result_pipe_id                  Pointer to result pipe id
4056  *
4057  * Returns:     0                               Success
4058  *              EIVAL                           NULL pointer arguments passed
4059  *              fp_lookup:EBADF                 Bad file descriptor
4060  *              ENOTSUP                         fd does not refer to a pipe
4061  *
4062  * Implicit returns:
4063  *              *result_pipe_id (modified)      pipe id
4064  *
4065  * Locks:       Internally takes and releases proc_fdlock
4066  */
4067 int
4068 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
4069 {
4070         struct fileproc *fp = FILEPROC_NULL;
4071         struct fileglob *fg = NULL;
4072         int error = 0;
4073
4074         if (p == NULL || result_pipe_id == NULL) {
4075                 return EINVAL;
4076         }
4077
4078         proc_fdlock(p);
4079         if ((error = fp_lookup(p, fd, &fp, 1))) {
4080                 proc_fdunlock(p);
4081                 return error;
4082         }
4083         fg = fp->fp_glob;
4084
4085         if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
4086                 *result_pipe_id = pipe_id((struct pipe*)fg->fg_data);
4087         } else {
4088                 error = ENOTSUP;
4089         }
4090
4091         fp_drop(p, fd, fp, 1);
4092         proc_fdunlock(p);
4093         return error;
4094 }
4095
4096
4097 /*
4098  * fp_lookup
4099  *
4100  * Description: Get fileproc pointer for a given fd from the per process
4101  *              open file table of the specified process and if successful,
4102  *              increment the fp_iocount
4103  *
4104  * Parameters:  p                               Process in which fd lives
4105  *              fd                              fd to get information for
4106  *              resultfp                        Pointer to result fileproc
4107  *                                              pointer area, or 0 if none
4108  *              locked                          !0 if the caller holds the
4109  *                                              proc_fdlock, 0 otherwise
4110  *
4111  * Returns:     0                       Success
4112  *              EBADF                   Bad file descriptor
4113  *
4114  * Implicit returns:
4115  *              *resultfp (modified)            Fileproc pointer
4116  *
4117  * Locks:       If the argument 'locked' is non-zero, then the caller is
4118  *              expected to have taken and held the proc_fdlock; if it is
4119  *              zero, than this routine internally takes and drops this lock.
4120  */
4121 int
4122 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4123 {
4124         struct filedesc *fdp = p->p_fd;
4125         struct fileproc *fp;
4126
4127         if (!locked) {
4128                 proc_fdlock_spin(p);
4129         }
4130         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4131             (fp = fdp->fd_ofiles[fd]) == NULL ||
4132             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4133                 if (!locked) {
4134                         proc_fdunlock(p);
4135                 }
4136                 return EBADF;
4137         }
4138         os_ref_retain_locked(&fp->fp_iocount);
4139
4140         if (resultfp) {
4141                 *resultfp = fp;
4142         }
4143         if (!locked) {
4144                 proc_fdunlock(p);
4145         }
4146
4147         return 0;
4148 }
4149
4150
4151 /*
4152  * fp_tryswap
4153  *
4154  * Description: Swap the fileproc pointer for a given fd with a new
4155  *              fileproc pointer in the per-process open file table of
4156  *              the specified process.  The fdlock must be held at entry.
4157  *              Iff the swap is successful, the old fileproc pointer is freed.
4158  *
4159  * Parameters:  p               Process containing the fd
4160  *              fd              The fd of interest
4161  *              nfp             Pointer to the newfp
4162  *
4163  * Returns:     0               Success
4164  *              EBADF           Bad file descriptor
4165  *              EINTR           Interrupted
4166  *              EKEEPLOOKING    Other references were active, try again.
4167  */
4168 int
4169 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4170 {
4171         struct fileproc *fp;
4172         int error;
4173
4174         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4175
4176         if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4177                 return error;
4178         }
4179         /*
4180          * At this point, our caller (change_guardedfd_np) has
4181          * one fp_iocount reference, and we just took another
4182          * one to begin the replacement.
4183          * fp and nfp have a +1 reference from allocation.
4184          * Thus if no-one else is looking, fp_iocount should be 3.
4185          */
4186         if (os_ref_get_count(&fp->fp_iocount) < 3 ||
4187             1 != os_ref_get_count(&nfp->fp_iocount)) {
4188                 panic("%s: fp_iocount", __func__);
4189         } else if (3 == os_ref_get_count(&fp->fp_iocount)) {
4190                 /* Copy the contents of *fp, preserving the "type" of *nfp */
4191
4192                 nfp->fp_flags = (nfp->fp_flags & FP_TYPEMASK) |
4193                     (fp->fp_flags & ~FP_TYPEMASK);
4194                 os_ref_retain_locked(&nfp->fp_iocount);
4195                 os_ref_retain_locked(&nfp->fp_iocount);
4196                 nfp->fp_glob = fp->fp_glob;
4197                 nfp->fp_wset = fp->fp_wset;
4198
4199                 p->p_fd->fd_ofiles[fd] = nfp;
4200                 fp_drop(p, fd, nfp, 1);
4201
4202                 os_ref_release_live(&fp->fp_iocount);
4203                 os_ref_release_live(&fp->fp_iocount);
4204                 fileproc_free(fp);
4205         } else {
4206                 /*
4207                  * Wait for all other active references to evaporate.
4208                  */
4209                 p->p_fpdrainwait = 1;
4210                 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4211                     PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4212                 if (0 == error) {
4213                         /*
4214                          * Return an "internal" errno to trigger a full
4215                          * reevaluation of the change-guard attempt.
4216                          */
4217                         error = EKEEPLOOKING;
4218                 }
4219                 (void) fp_drop(p, fd, fp, 1);
4220         }
4221         return error;
4222 }
4223
4224
4225 /*
4226  * fp_drop
4227  *
4228  * Description: Drop the I/O reference previously taken by calling fp_lookup
4229  *              et. al.
4230  *
4231  * Parameters:  p                               Process in which the fd lives
4232  *              fd                              fd associated with the fileproc
4233  *              fp                              fileproc on which to set the
4234  *                                              flag and drop the reference
4235  *              locked                          flag to internally take and
4236  *                                              drop proc_fdlock if it is not
4237  *                                              already held by the caller
4238  *
4239  * Returns:     0                               Success
4240  *              EBADF                           Bad file descriptor
4241  *
4242  * Locks:       This function internally takes and drops the proc_fdlock for
4243  *              the supplied process if 'locked' is non-zero, and assumes that
4244  *              the caller already holds this lock if 'locked' is non-zero.
4245  *
4246  * Notes:       The fileproc must correspond to the fd in the supplied proc
4247  */
4248 int
4249 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4250 {
4251         struct filedesc *fdp = p->p_fd;
4252         int     needwakeup = 0;
4253
4254         if (!locked) {
4255                 proc_fdlock_spin(p);
4256         }
4257         if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4258             (fp = fdp->fd_ofiles[fd]) == NULL ||
4259             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4260             !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4261                 if (!locked) {
4262                         proc_fdunlock(p);
4263                 }
4264                 return EBADF;
4265         }
4266
4267         if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4268                 if (fp->fp_flags & FP_SELCONFLICT) {
4269                         fp->fp_flags &= ~FP_SELCONFLICT;
4270                 }
4271
4272                 if (p->p_fpdrainwait) {
4273                         p->p_fpdrainwait = 0;
4274                         needwakeup = 1;
4275                 }
4276         }
4277         if (!locked) {
4278                 proc_fdunlock(p);
4279         }
4280         if (needwakeup) {
4281                 wakeup(&p->p_fpdrainwait);
4282         }
4283
4284         return 0;
4285 }
4286
4287
4288 /*
4289  * file_vnode
4290  *
4291  * Description: Given an fd, look it up in the current process's per process
4292  *              open file table, and return its internal vnode pointer.
4293  *
4294  * Parameters:  fd                              fd to obtain vnode from
4295  *              vpp                             pointer to vnode return area
4296  *
4297  * Returns:     0                               Success
4298  *              EINVAL                          The fd does not refer to a
4299  *                                              vnode fileproc entry
4300  *      fp_lookup:EBADF                         Bad file descriptor
4301  *
4302  * Implicit returns:
4303  *              *vpp (modified)                 Returned vnode pointer
4304  *
4305  * Locks:       This function internally takes and drops the proc_fdlock for
4306  *              the current process
4307  *
4308  * Notes:       If successful, this function increments the fp_iocount on the
4309  *              fd's corresponding fileproc.
4310  *
4311  *              The fileproc referenced is not returned; because of this, care
4312  *              must be taken to not drop the last reference (e.g. by closing
4313  *              the file).  This is inherently unsafe, since the reference may
4314  *              not be recoverable from the vnode, if there is a subsequent
4315  *              close that destroys the associate fileproc.  The caller should
4316  *              therefore retain their own reference on the fileproc so that
4317  *              the fp_iocount can be dropped subsequently.  Failure to do this
4318  *              can result in the returned pointer immediately becoming invalid
4319  *              following the call.
4320  *
4321  *              Use of this function is discouraged.
4322  */
4323 int
4324 file_vnode(int fd, struct vnode **vpp)
4325 {
4326         return file_vnode_withvid(fd, vpp, NULL);
4327 }
4328
4329 /*
4330  * file_vnode_withvid
4331  *
4332  * Description: Given an fd, look it up in the current process's per process
4333  *              open file table, and return its internal vnode pointer.
4334  *
4335  * Parameters:  fd                              fd to obtain vnode from
4336  *              vpp                             pointer to vnode return area
4337  *              vidp                            pointer to vid of the returned vnode
4338  *
4339  * Returns:     0                               Success
4340  *              EINVAL                          The fd does not refer to a
4341  *                                              vnode fileproc entry
4342  *      fp_lookup:EBADF                         Bad file descriptor
4343  *
4344  * Implicit returns:
4345  *              *vpp (modified)                 Returned vnode pointer
4346  *
4347  * Locks:       This function internally takes and drops the proc_fdlock for
4348  *              the current process
4349  *
4350  * Notes:       If successful, this function increments the fp_iocount on the
4351  *              fd's corresponding fileproc.
4352  *
4353  *              The fileproc referenced is not returned; because of this, care
4354  *              must be taken to not drop the last reference (e.g. by closing
4355  *              the file).  This is inherently unsafe, since the reference may
4356  *              not be recoverable from the vnode, if there is a subsequent
4357  *              close that destroys the associate fileproc.  The caller should
4358  *              therefore retain their own reference on the fileproc so that
4359  *              the fp_iocount can be dropped subsequently.  Failure to do this
4360  *              can result in the returned pointer immediately becoming invalid
4361  *              following the call.
4362  *
4363  *              Use of this function is discouraged.
4364  */
4365 int
4366 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
4367 {
4368         struct fileproc *fp;
4369         int error;
4370
4371         error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
4372         if (error == 0) {
4373                 if (vpp) {
4374                         *vpp = fp->f_data;
4375                 }
4376                 if (vidp) {
4377                         *vidp = vnode_vid(fp->f_data);
4378                 }
4379         }
4380         return error;
4381 }
4382
4383 /*
4384  * file_socket
4385  *
4386  * Description: Given an fd, look it up in the current process's per process
4387  *              open file table, and return its internal socket pointer.
4388  *
4389  * Parameters:  fd                              fd to obtain vnode from
4390  *              sp                              pointer to socket return area
4391  *
4392  * Returns:     0                               Success
4393  *              ENOTSOCK                        Not a socket
4394  *              fp_lookup:EBADF                 Bad file descriptor
4395  *
4396  * Implicit returns:
4397  *              *sp (modified)                  Returned socket pointer
4398  *
4399  * Locks:       This function internally takes and drops the proc_fdlock for
4400  *              the current process
4401  *
4402  * Notes:       If successful, this function increments the fp_iocount on the
4403  *              fd's corresponding fileproc.
4404  *
4405  *              The fileproc referenced is not returned; because of this, care
4406  *              must be taken to not drop the last reference (e.g. by closing
4407  *              the file).  This is inherently unsafe, since the reference may
4408  *              not be recoverable from the socket, if there is a subsequent
4409  *              close that destroys the associate fileproc.  The caller should
4410  *              therefore retain their own reference on the fileproc so that
4411  *              the fp_iocount can be dropped subsequently.  Failure to do this
4412  *              can result in the returned pointer immediately becoming invalid
4413  *              following the call.
4414  *
4415  *              Use of this function is discouraged.
4416  */
4417 int
4418 file_socket(int fd, struct socket **sp)
4419 {
4420         struct fileproc *fp;
4421         int error;
4422
4423         error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
4424         if (error == 0) {
4425                 if (sp) {
4426                         *sp = (struct socket *)fp->f_data;
4427                 }
4428         }
4429         return error;
4430 }
4431
4432
4433 /*
4434  * file_flags
4435  *
4436  * Description: Given an fd, look it up in the current process's per process
4437  *              open file table, and return its fileproc's flags field.
4438  *
4439  * Parameters:  fd                              fd whose flags are to be
4440  *                                              retrieved
4441  *              flags                           pointer to flags data area
4442  *
4443  * Returns:     0                               Success
4444  *              ENOTSOCK                        Not a socket
4445  *              fp_lookup:EBADF                 Bad file descriptor
4446  *
4447  * Implicit returns:
4448  *              *flags (modified)               Returned flags field
4449  *
4450  * Locks:       This function internally takes and drops the proc_fdlock for
4451  *              the current process
4452  */
4453 int
4454 file_flags(int fd, int *flags)
4455 {
4456         proc_t p = current_proc();
4457         struct fileproc *fp;
4458         int error = EBADF;
4459
4460         proc_fdlock_spin(p);
4461         fp = fp_get_noref_locked(p, fd);
4462         if (fp) {
4463                 *flags = (int)fp->f_flag;
4464                 error = 0;
4465         }
4466         proc_fdunlock(p);
4467
4468         return error;
4469 }
4470
4471
4472 /*
4473  * file_drop
4474  *
4475  * Description: Drop an iocount reference on an fd, and wake up any waiters
4476  *              for draining (i.e. blocked in fileproc_drain() called during
4477  *              the last attempt to close a file).
4478  *
4479  * Parameters:  fd                              fd on which an ioreference is
4480  *                                              to be dropped
4481  *
4482  * Returns:     0                               Success
4483  *
4484  * Description: Given an fd, look it up in the current process's per process
4485  *              open file table, and drop it's fileproc's fp_iocount by one
4486  *
4487  * Notes:       This is intended as a corresponding operation to the functions
4488  *              file_vnode() and file_socket() operations.
4489  *
4490  *              If the caller can't possibly hold an I/O reference,
4491  *              this function will panic the kernel rather than allowing
4492  *              for memory corruption. Callers should always call this
4493  *              because they acquired an I/O reference on this file before.
4494  *
4495  *              Use of this function is discouraged.
4496  */
4497 int
4498 file_drop(int fd)
4499 {
4500         struct fileproc *fp;
4501         proc_t p = current_proc();
4502         int     needwakeup = 0;
4503
4504         proc_fdlock_spin(p);
4505         fp = fp_get_noref_locked_with_iocount(p, fd);
4506
4507         if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4508                 if (fp->fp_flags & FP_SELCONFLICT) {
4509                         fp->fp_flags &= ~FP_SELCONFLICT;
4510                 }
4511
4512                 if (p->p_fpdrainwait) {
4513                         p->p_fpdrainwait = 0;
4514                         needwakeup = 1;
4515                 }
4516         }
4517         proc_fdunlock(p);
4518
4519         if (needwakeup) {
4520                 wakeup(&p->p_fpdrainwait);
4521         }
4522         return 0;
4523 }
4524
4525
4526
4527 /*
4528  * falloc_withalloc
4529  *
4530  * Create a new open file structure and allocate
4531  * a file descriptor for the process that refers to it.
4532  *
4533  * Returns:     0                       Success
4534  *
4535  * Description: Allocate an entry in the per process open file table and
4536  *              return the corresponding fileproc and fd.
4537  *
4538  * Parameters:  p                               The process in whose open file
4539  *                                              table the fd is to be allocated
4540  *              resultfp                        Pointer to fileproc pointer
4541  *                                              return area
4542  *              resultfd                        Pointer to fd return area
4543  *              ctx                             VFS context
4544  *              fp_zalloc                       fileproc allocator to use
4545  *              crarg                           allocator args
4546  *
4547  * Returns:     0                               Success
4548  *              ENFILE                          Too many open files in system
4549  *              fdalloc:EMFILE                  Too many open files in process
4550  *              fdalloc:ENOMEM                  M_OFILETABL zone exhausted
4551  *              ENOMEM                          fp_zone or fg_zone zone
4552  *                                              exhausted
4553  *
4554  * Implicit returns:
4555  *              *resultfd (modified)            Returned fileproc pointer
4556  *              *resultfd (modified)            Returned fd
4557  *
4558  * Notes:       This function takes separate process and context arguments
4559  *              solely to support kern_exec.c; otherwise, it would take
4560  *              neither, and use the vfs_context_current() routine internally.
4561  */
4562 int
4563 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4564     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg)
4565 {
4566         struct fileproc *fp;
4567         struct fileglob *fg;
4568         int error, nfd;
4569
4570         /* Make sure we don't go beyond the system-wide limit */
4571         if (nfiles >= maxfiles) {
4572                 tablefull("file");
4573                 return ENFILE;
4574         }
4575
4576         proc_fdlock(p);
4577
4578         /* fdalloc will make sure the process stays below per-process limit */
4579         if ((error = fdalloc(p, 0, &nfd))) {
4580                 proc_fdunlock(p);
4581                 return error;
4582         }
4583
4584 #if CONFIG_MACF
4585         error = mac_file_check_create(proc_ucred(p));
4586         if (error) {
4587                 proc_fdunlock(p);
4588                 return error;
4589         }
4590 #endif
4591
4592         /*
4593          * Allocate a new file descriptor.
4594          * If the process has file descriptor zero open, add to the list
4595          * of open files at that point, otherwise put it at the front of
4596          * the list of open files.
4597          */
4598         proc_fdunlock(p);
4599
4600         fp = (*fp_zalloc)(crarg);
4601         if (fp == NULL) {
4602                 return ENOMEM;
4603         }
4604         fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
4605         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4606
4607         os_ref_retain_locked(&fp->fp_iocount);
4608         os_ref_init_raw(&fg->fg_count, &f_refgrp);
4609         fg->fg_ops = &uninitops;
4610         fp->fp_glob = fg;
4611 #if CONFIG_MACF
4612         mac_file_label_init(fg);
4613 #endif
4614
4615         kauth_cred_ref(ctx->vc_ucred);
4616
4617         fp->f_cred = ctx->vc_ucred;
4618
4619 #if CONFIG_MACF
4620         mac_file_label_associate(fp->f_cred, fg);
4621 #endif
4622
4623         os_atomic_inc(&nfiles, relaxed);
4624
4625         proc_fdlock(p);
4626
4627         p->p_fd->fd_ofiles[nfd] = fp;
4628
4629         proc_fdunlock(p);
4630
4631         if (resultfp) {
4632                 *resultfp = fp;
4633         }
4634         if (resultfd) {
4635                 *resultfd = nfd;
4636         }
4637
4638         return 0;
4639 }
4640
4641 int
4642 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4643 {
4644         return falloc_withalloc(p, resultfp, resultfd, ctx,
4645                    fileproc_alloc_init, NULL);
4646 }
4647
4648 /*
4649  * fdexec
4650  *
4651  * Description: Perform close-on-exec processing for all files in a process
4652  *              that are either marked as close-on-exec, or which were in the
4653  *              process of being opened at the time of the execve
4654  *
4655  *              Also handles the case (via posix_spawn()) where -all-
4656  *              files except those marked with "inherit" as treated as
4657  *              close-on-exec.
4658  *
4659  * Parameters:  p                               Pointer to process calling
4660  *                                              execve
4661  *
4662  * Returns:     void
4663  *
4664  * Locks:       This function internally takes and drops proc_fdlock()
4665  *          But assumes tables don't grow/change while unlocked.
4666  *
4667  */
4668 void
4669 fdexec(proc_t p, short flags, int self_exec)
4670 {
4671         struct filedesc *fdp = p->p_fd;
4672         int i;
4673         boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4674         thread_t self = current_thread();
4675         struct uthread *ut = get_bsdthread_info(self);
4676         struct kqworkq *dealloc_kqwq = NULL;
4677
4678         /*
4679          * If the current thread is bound as a workq/workloop
4680          * servicing thread, we need to unbind it first.
4681          */
4682         if (ut->uu_kqr_bound && self_exec) {
4683                 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4684         }
4685
4686         proc_fdlock(p);
4687
4688         /*
4689          * Deallocate the knotes for this process
4690          * and mark the tables non-existent so
4691          * subsequent kqueue closes go faster.
4692          */
4693         knotes_dealloc(p);
4694         assert(fdp->fd_knlistsize == 0);
4695         assert(fdp->fd_knhashmask == 0);
4696
4697         for (i = fdp->fd_lastfile; i >= 0; i--) {
4698                 struct fileproc *fp = fdp->fd_ofiles[i];
4699                 char *flagp = &fdp->fd_ofileflags[i];
4700
4701                 if (fp && cloexec_default) {
4702                         /*
4703                          * Reverse the usual semantics of file descriptor
4704                          * inheritance - all of them should be closed
4705                          * except files marked explicitly as "inherit" and
4706                          * not marked close-on-exec.
4707                          */
4708                         if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4709                                 *flagp |= UF_EXCLOSE;
4710                         }
4711                         *flagp &= ~UF_INHERIT;
4712                 }
4713
4714                 if (
4715                         ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4716 #if CONFIG_MACF
4717                         || (fp && mac_file_check_inherit(proc_ucred(p), fp->fp_glob))
4718 #endif
4719                         ) {
4720                         fp_close_and_unlock(p, i, fp, 0);
4721                         proc_fdlock(p);
4722                 }
4723         }
4724
4725         /* release the per-process workq kq */
4726         if (fdp->fd_wqkqueue) {
4727                 dealloc_kqwq = fdp->fd_wqkqueue;
4728                 fdp->fd_wqkqueue = NULL;
4729         }
4730
4731         proc_fdunlock(p);
4732
4733         /* Anything to free? */
4734         if (dealloc_kqwq) {
4735                 kqworkq_dealloc(dealloc_kqwq);
4736         }
4737 }
4738
4739
4740 /*
4741  * fdcopy
4742  *
4743  * Description: Copy a filedesc structure.  This is normally used as part of
4744  *              forkproc() when forking a new process, to copy the per process
4745  *              open file table over to the new process.
4746  *
4747  * Parameters:  p                               Process whose open file table
4748  *                                              is to be copied (parent)
4749  *              uth_cdir                        Per thread current working
4750  *                                              cirectory, or NULL
4751  *
4752  * Returns:     NULL                            Copy failed
4753  *              !NULL                           Pointer to new struct filedesc
4754  *
4755  * Locks:       This function internally takes and drops proc_fdlock()
4756  *
4757  * Notes:       Files are copied directly, ignoring the new resource limits
4758  *              for the process that's being copied into.  Since the descriptor
4759  *              references are just additional references, this does not count
4760  *              against the number of open files on the system.
4761  *
4762  *              The struct filedesc includes the current working directory,
4763  *              and the current root directory, if the process is chroot'ed.
4764  *
4765  *              If the exec was called by a thread using a per thread current
4766  *              working directory, we inherit the working directory from the
4767  *              thread making the call, rather than from the process.
4768  *
4769  *              In the case of a failure to obtain a reference, for most cases,
4770  *              the file entry will be silently dropped.  There's an exception
4771  *              for the case of a chroot dir, since a failure to to obtain a
4772  *              reference there would constitute an "escape" from the chroot
4773  *              environment, which must not be allowed.  In that case, we will
4774  *              deny the execve() operation, rather than allowing the escape.
4775  */
4776 struct filedesc *
4777 fdcopy(proc_t p, vnode_t uth_cdir)
4778 {
4779         struct filedesc *newfdp, *fdp = p->p_fd;
4780         int i;
4781         struct fileproc *ofp, *fp;
4782         vnode_t v_dir;
4783
4784         newfdp = zalloc(fdp_zone);
4785
4786         proc_fdlock(p);
4787
4788         /*
4789          * the FD_CHROOT flag will be inherited via this copy
4790          */
4791         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4792
4793         /*
4794          * If we are running with per-thread current working directories,
4795          * inherit the new current working directory from the current thread
4796          * instead, before we take our references.
4797          */
4798         if (uth_cdir != NULLVP) {
4799                 newfdp->fd_cdir = uth_cdir;
4800         }
4801
4802         /*
4803          * For both fd_cdir and fd_rdir make sure we get
4804          * a valid reference... if we can't, than set
4805          * set the pointer(s) to NULL in the child... this
4806          * will keep us from using a non-referenced vp
4807          * and allows us to do the vnode_rele only on
4808          * a properly referenced vp
4809          */
4810         if ((v_dir = newfdp->fd_cdir)) {
4811                 if (vnode_getwithref(v_dir) == 0) {
4812                         if ((vnode_ref(v_dir))) {
4813                                 newfdp->fd_cdir = NULL;
4814                         }
4815                         vnode_put(v_dir);
4816                 } else {
4817                         newfdp->fd_cdir = NULL;
4818                 }
4819         }
4820         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4821                 /*
4822                  * we couldn't get a new reference on
4823                  * the current working directory being
4824                  * inherited... we might as well drop
4825                  * our reference from the parent also
4826                  * since the vnode has gone DEAD making
4827                  * it useless... by dropping it we'll
4828                  * be that much closer to recycling it
4829                  */
4830                 vnode_rele(fdp->fd_cdir);
4831                 fdp->fd_cdir = NULL;
4832         }
4833
4834         if ((v_dir = newfdp->fd_rdir)) {
4835                 if (vnode_getwithref(v_dir) == 0) {
4836                         if ((vnode_ref(v_dir))) {
4837                                 newfdp->fd_rdir = NULL;
4838                         }
4839                         vnode_put(v_dir);
4840                 } else {
4841                         newfdp->fd_rdir = NULL;
4842                 }
4843         }
4844         /* Coming from a chroot environment and unable to get a reference... */
4845         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4846                 proc_fdunlock(p);
4847                 /*
4848                  * We couldn't get a new reference on
4849                  * the chroot directory being
4850                  * inherited... this is fatal, since
4851                  * otherwise it would constitute an
4852                  * escape from a chroot environment by
4853                  * the new process.
4854                  */
4855                 if (newfdp->fd_cdir) {
4856                         vnode_rele(newfdp->fd_cdir);
4857                 }
4858                 zfree(fdp_zone, newfdp);
4859                 return NULL;
4860         }
4861
4862         /*
4863          * If the number of open files fits in the internal arrays
4864          * of the open file structure, use them, otherwise allocate
4865          * additional memory for the number of descriptors currently
4866          * in use.
4867          */
4868         if (newfdp->fd_lastfile < NDFILE) {
4869                 i = NDFILE;
4870         } else {
4871                 /*
4872                  * Compute the smallest multiple of NDEXTENT needed
4873                  * for the file descriptors currently in use,
4874                  * allowing the table to shrink.
4875                  */
4876                 i = newfdp->fd_nfiles;
4877                 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
4878                         i /= 2;
4879                 }
4880         }
4881         proc_fdunlock(p);
4882
4883         MALLOC(newfdp->fd_ofiles, struct fileproc **,
4884             i * OFILESIZE, M_OFILETABL, M_WAITOK);
4885         if (newfdp->fd_ofiles == NULL) {
4886                 if (newfdp->fd_cdir) {
4887                         vnode_rele(newfdp->fd_cdir);
4888                 }
4889                 if (newfdp->fd_rdir) {
4890                         vnode_rele(newfdp->fd_rdir);
4891                 }
4892
4893                 zfree(fdp_zone, newfdp);
4894                 return NULL;
4895         }
4896         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4897         proc_fdlock(p);
4898
4899         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4900         newfdp->fd_nfiles = i;
4901
4902         if (fdp->fd_nfiles > 0) {
4903                 struct fileproc **fpp;
4904                 char *flags;
4905
4906                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4907                     (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4908                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4909                     (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4910
4911                 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4912                 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4913                 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
4914                         if ((ofp = *fpp) != NULL &&
4915                             0 == (ofp->fp_glob->fg_lflags & FG_CONFINED) &&
4916                             0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
4917 #if DEBUG
4918                                 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
4919                                         panic("complex fileproc");
4920                                 }
4921 #endif
4922                                 fp = fileproc_alloc_init(NULL);
4923                                 if (fp == NULL) {
4924                                         /*
4925                                          * XXX no room to copy, unable to
4926                                          * XXX safely unwind state at present
4927                                          */
4928                                         *fpp = NULL;
4929                                 } else {
4930                                         fp->fp_flags |=
4931                                             (ofp->fp_flags & ~FP_TYPEMASK);
4932                                         fp->fp_glob = ofp->fp_glob;
4933                                         fg_ref(p, fp->fp_glob);
4934                                         *fpp = fp;
4935                                 }
4936                         } else {
4937                                 *fpp = NULL;
4938                                 *flags = 0;
4939                         }
4940                         if (*fpp == NULL) {
4941                                 if (i == newfdp->fd_lastfile && i > 0) {
4942                                         newfdp->fd_lastfile--;
4943                                 }
4944                                 if (i < newfdp->fd_freefile) {
4945                                         newfdp->fd_freefile = i;
4946                                 }
4947                         }
4948                 }
4949         }
4950
4951         proc_fdunlock(p);
4952
4953         /*
4954          * Initialize knote and kqueue tracking structs
4955          */
4956         newfdp->fd_knlist = NULL;
4957         newfdp->fd_knlistsize = 0;
4958         newfdp->fd_knhash = NULL;
4959         newfdp->fd_knhashmask = 0;
4960         newfdp->fd_kqhash = NULL;
4961         newfdp->fd_kqhashmask = 0;
4962         newfdp->fd_wqkqueue = NULL;
4963         lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
4964         lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
4965
4966         return newfdp;
4967 }
4968
4969
4970 /*
4971  * fdfree
4972  *
4973  * Description: Release a filedesc (per process open file table) structure;
4974  *              this is done on process exit(), or from forkproc_free() if
4975  *              the fork fails for some reason subsequent to a successful
4976  *              call to fdcopy()
4977  *
4978  * Parameters:  p                               Pointer to process going away
4979  *
4980  * Returns:     void
4981  *
4982  * Locks:       This function internally takes and drops proc_fdlock()
4983  */
4984 void
4985 fdfree(proc_t p)
4986 {
4987         struct filedesc *fdp;
4988         struct fileproc *fp;
4989         struct kqworkq *dealloc_kqwq = NULL;
4990         int i;
4991
4992         proc_fdlock(p);
4993
4994         if (p == kernproc || NULL == (fdp = p->p_fd)) {
4995                 proc_fdunlock(p);
4996                 return;
4997         }
4998
4999         extern struct filedesc filedesc0;
5000
5001         if (&filedesc0 == fdp) {
5002                 panic("filedesc0");
5003         }
5004
5005         /*
5006          * deallocate all the knotes up front and claim empty
5007          * tables to make any subsequent kqueue closes faster.
5008          */
5009         knotes_dealloc(p);
5010         assert(fdp->fd_knlistsize == 0);
5011         assert(fdp->fd_knhashmask == 0);
5012
5013         /*
5014          * dealloc all workloops that have outstanding retains
5015          * when created with scheduling parameters.
5016          */
5017         kqworkloops_dealloc(p);
5018
5019         /* close file descriptors */
5020         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
5021                 for (i = fdp->fd_lastfile; i >= 0; i--) {
5022                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
5023                                 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
5024                                         panic("fdfree: found fp with UF_RESERVED");
5025                                 }
5026                                 fp_close_and_unlock(p, i, fp, 0);
5027                                 proc_fdlock(p);
5028                         }
5029                 }
5030                 FREE(fdp->fd_ofiles, M_OFILETABL);
5031                 fdp->fd_ofiles = NULL;
5032                 fdp->fd_nfiles = 0;
5033         }
5034
5035         if (fdp->fd_wqkqueue) {
5036                 dealloc_kqwq = fdp->fd_wqkqueue;
5037                 fdp->fd_wqkqueue = NULL;
5038         }
5039
5040         proc_fdunlock(p);
5041
5042         if (dealloc_kqwq) {
5043                 kqworkq_dealloc(dealloc_kqwq);
5044         }
5045         if (fdp->fd_cdir) {
5046                 vnode_rele(fdp->fd_cdir);
5047         }
5048         if (fdp->fd_rdir) {
5049                 vnode_rele(fdp->fd_rdir);
5050         }
5051
5052         proc_fdlock_spin(p);
5053         p->p_fd = NULL;
5054         proc_fdunlock(p);
5055
5056         if (fdp->fd_kqhash) {
5057                 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5058                         assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5059                 }
5060                 hashdestroy(fdp->fd_kqhash, M_KQUEUE, fdp->fd_kqhashmask);
5061         }
5062
5063         lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5064         lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5065
5066         zfree(fdp_zone, fdp);
5067 }
5068
5069 /*
5070  * fileproc_drain
5071  *
5072  * Description: Drain out pending I/O operations
5073  *
5074  * Parameters:  p                               Process closing this file
5075  *              fp                              fileproc struct for the open
5076  *                                              instance on the file
5077  *
5078  * Returns:     void
5079  *
5080  * Locks:       Assumes the caller holds the proc_fdlock
5081  *
5082  * Notes:       For character devices, this occurs on the last close of the
5083  *              device; for all other file descriptors, this occurs on each
5084  *              close to prevent fd's from being closed out from under
5085  *              operations currently in progress and blocked
5086  *
5087  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
5088  *              regarding their use and interaction with this function.
5089  */
5090 void
5091 fileproc_drain(proc_t p, struct fileproc * fp)
5092 {
5093         struct vfs_context context;
5094         thread_t thread;
5095         bool is_current_proc;
5096
5097         is_current_proc = (p == current_proc());
5098
5099         if (!is_current_proc) {
5100                 proc_lock(p);
5101                 thread = proc_thread(p); /* XXX */
5102                 thread_reference(thread);
5103                 proc_unlock(p);
5104         } else {
5105                 thread = current_thread();
5106         }
5107
5108         context.vc_thread = thread;
5109         context.vc_ucred = fp->fp_glob->fg_cred;
5110
5111         /* Set the vflag for drain */
5112         fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5113
5114         while (os_ref_get_count(&fp->fp_iocount) > 1) {
5115                 lck_mtx_convert_spin(&p->p_fdmlock);
5116
5117                 fo_drain(fp, &context);
5118                 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
5119                         if (waitq_wakeup64_all((struct waitq *)fp->fp_wset, NO_EVENT64,
5120                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5121                                 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->fp_wset, fp);
5122                         }
5123                 }
5124                 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5125                         if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5126                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5127                                 panic("bad select_conflict_queue");
5128                         }
5129                 }
5130                 p->p_fpdrainwait = 1;
5131
5132                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5133         }
5134 #if DIAGNOSTIC
5135         if ((fp->fp_flags & FP_INSELECT) != 0) {
5136                 panic("FP_INSELECT set on drained fp");
5137         }
5138 #endif
5139         if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5140                 fp->fp_flags &= ~FP_SELCONFLICT;
5141         }
5142
5143         if (!is_current_proc) {
5144                 thread_deallocate(thread);
5145         }
5146 }
5147
5148
5149 /*
5150  * fp_free
5151  *
5152  * Description: Release the fd and free the fileproc associated with the fd
5153  *              in the per process open file table of the specified process;
5154  *              these values must correspond.
5155  *
5156  * Parameters:  p                               Process containing fd
5157  *              fd                              fd to be released
5158  *              fp                              fileproc to be freed
5159  */
5160 void
5161 fp_free(proc_t p, int fd, struct fileproc * fp)
5162 {
5163         proc_fdlock_spin(p);
5164         fdrelse(p, fd);
5165         proc_fdunlock(p);
5166
5167         fg_free(fp->fp_glob);
5168         os_ref_release_live(&fp->fp_iocount);
5169         fileproc_free(fp);
5170 }
5171
5172
5173 /*
5174  * sys_flock
5175  *
5176  * Description: Apply an advisory lock on a file descriptor.
5177  *
5178  * Parameters:  p                               Process making request
5179  *              uap->fd                         fd on which the lock is to be
5180  *                                              attempted
5181  *              uap->how                        (Un)Lock bits, including type
5182  *              retval                          Pointer to the call return area
5183  *
5184  * Returns:     0                               Success
5185  *      fp_getfvp:EBADF                         Bad file descriptor
5186  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5187  *      vnode_getwithref:???
5188  *      VNOP_ADVLOCK:???
5189  *
5190  * Implicit returns:
5191  *              *retval (modified)              Size of dtable
5192  *
5193  * Notes:       Just attempt to get a record lock of the requested type on
5194  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5195  */
5196 int
5197 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5198 {
5199         int fd = uap->fd;
5200         int how = uap->how;
5201         struct fileproc *fp;
5202         struct vnode *vp;
5203         struct flock lf;
5204         vfs_context_t ctx = vfs_context_current();
5205         int error = 0;
5206
5207         AUDIT_ARG(fd, uap->fd);
5208         if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5209                 return error;
5210         }
5211         if ((error = vnode_getwithref(vp))) {
5212                 goto out1;
5213         }
5214         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5215
5216         lf.l_whence = SEEK_SET;
5217         lf.l_start = 0;
5218         lf.l_len = 0;
5219         if (how & LOCK_UN) {
5220                 lf.l_type = F_UNLCK;
5221                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5222                 goto out;
5223         }
5224         if (how & LOCK_EX) {
5225                 lf.l_type = F_WRLCK;
5226         } else if (how & LOCK_SH) {
5227                 lf.l_type = F_RDLCK;
5228         } else {
5229                 error = EBADF;
5230                 goto out;
5231         }
5232 #if CONFIG_MACF
5233         error = mac_file_check_lock(proc_ucred(p), fp->fp_glob, F_SETLK, &lf);
5234         if (error) {
5235                 goto out;
5236         }
5237 #endif
5238         error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5239             (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5240             ctx, NULL);
5241         if (!error) {
5242                 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5243         }
5244 out:
5245         (void)vnode_put(vp);
5246 out1:
5247         fp_drop(p, fd, fp, 0);
5248         return error;
5249 }
5250
5251 /*
5252  * sys_fileport_makeport
5253  *
5254  * Description: Obtain a Mach send right for a given file descriptor.
5255  *
5256  * Parameters:  p               Process calling fileport
5257  *              uap->fd         The fd to reference
5258  *              uap->portnamep  User address at which to place port name.
5259  *
5260  * Returns:     0               Success.
5261  *              EBADF           Bad file descriptor.
5262  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
5263  *              EFAULT          Address at which to store port name is not valid.
5264  *              EAGAIN          Resource shortage.
5265  *
5266  * Implicit returns:
5267  *              On success, name of send right is stored at user-specified address.
5268  */
5269 int
5270 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5271     __unused int *retval)
5272 {
5273         int err;
5274         int fd = uap->fd;
5275         user_addr_t user_portaddr = uap->portnamep;
5276         struct fileproc *fp = FILEPROC_NULL;
5277         struct fileglob *fg = NULL;
5278         ipc_port_t fileport;
5279         mach_port_name_t name = MACH_PORT_NULL;
5280
5281         proc_fdlock(p);
5282         err = fp_lookup(p, fd, &fp, 1);
5283         if (err != 0) {
5284                 goto out_unlock;
5285         }
5286
5287         fg = fp->fp_glob;
5288         if (!fg_sendable(fg)) {
5289                 err = EINVAL;
5290                 goto out_unlock;
5291         }
5292
5293         if (fp_isguarded(fp, GUARD_FILEPORT)) {
5294                 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5295                 goto out_unlock;
5296         }
5297
5298         /* Dropped when port is deallocated */
5299         fg_ref(p, fg);
5300
5301         proc_fdunlock(p);
5302
5303         /* Allocate and initialize a port */
5304         fileport = fileport_alloc(fg);
5305         if (fileport == IPC_PORT_NULL) {
5306                 fg_drop_live(fg);
5307                 err = EAGAIN;
5308                 goto out;
5309         }
5310
5311         /* Add an entry.  Deallocates port on failure. */
5312         name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5313         if (!MACH_PORT_VALID(name)) {
5314                 err = EINVAL;
5315                 goto out;
5316         }
5317
5318         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5319         if (err != 0) {
5320                 goto out;
5321         }
5322
5323         /* Tag the fileglob for debugging purposes */
5324         lck_mtx_lock_spin(&fg->fg_lock);
5325         fg->fg_lflags |= FG_PORTMADE;
5326         lck_mtx_unlock(&fg->fg_lock);
5327
5328         fp_drop(p, fd, fp, 0);
5329
5330         return 0;
5331
5332 out_unlock:
5333         proc_fdunlock(p);
5334 out:
5335         if (MACH_PORT_VALID(name)) {
5336                 /* Don't care if another thread races us to deallocate the entry */
5337                 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5338         }
5339
5340         if (fp != FILEPROC_NULL) {
5341                 fp_drop(p, fd, fp, 0);
5342         }
5343
5344         return err;
5345 }
5346
5347 void
5348 fileport_releasefg(struct fileglob *fg)
5349 {
5350         (void)fg_drop(PROC_NULL, fg);
5351 }
5352
5353 /*
5354  * fileport_makefd
5355  *
5356  * Description: Obtain the file descriptor for a given Mach send right.
5357  *
5358  * Returns:     0               Success
5359  *              EINVAL          Invalid Mach port name, or port is not for a file.
5360  *      fdalloc:EMFILE
5361  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5362  *
5363  * Implicit returns:
5364  *              *retval (modified)              The new descriptor
5365  */
5366 int
5367 fileport_makefd(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5368 {
5369         struct fileglob *fg;
5370         struct fileproc *fp = FILEPROC_NULL;
5371         int fd;
5372         int err;
5373
5374         fg = fileport_port_to_fileglob(port);
5375         if (fg == NULL) {
5376                 err = EINVAL;
5377                 goto out;
5378         }
5379
5380         fp = fileproc_alloc_init(NULL);
5381         if (fp == FILEPROC_NULL) {
5382                 err = ENOMEM;
5383                 goto out;
5384         }
5385
5386         proc_fdlock(p);
5387         err = fdalloc(p, 0, &fd);
5388         if (err != 0) {
5389                 proc_fdunlock(p);
5390                 goto out;
5391         }
5392         if (uf_flags) {
5393                 *fdflags(p, fd) |= uf_flags;
5394         }
5395
5396         fp->fp_glob = fg;
5397         fg_ref(p, fg);
5398
5399         procfdtbl_releasefd(p, fd, fp);
5400         proc_fdunlock(p);
5401
5402         *retval = fd;
5403         err = 0;
5404 out:
5405         if ((fp != NULL) && (0 != err)) {
5406                 fileproc_free(fp);
5407         }
5408
5409         return err;
5410 }
5411
5412 /*
5413  * sys_fileport_makefd
5414  *
5415  * Description: Obtain the file descriptor for a given Mach send right.
5416  *
5417  * Parameters:  p               Process calling fileport
5418  *              uap->port       Name of send right to file port.
5419  *
5420  * Returns:     0               Success
5421  *              EINVAL          Invalid Mach port name, or port is not for a file.
5422  *      fdalloc:EMFILE
5423  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5424  *
5425  * Implicit returns:
5426  *              *retval (modified)              The new descriptor
5427  */
5428 int
5429 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5430 {
5431         ipc_port_t port = IPC_PORT_NULL;
5432         mach_port_name_t send = uap->port;
5433         kern_return_t res;
5434         int err;
5435
5436         res = ipc_object_copyin(get_task_ipcspace(p->task),
5437             send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
5438
5439         if (res == KERN_SUCCESS) {
5440                 err = fileport_makefd(p, port, UF_EXCLOSE, retval);
5441         } else {
5442                 err = EINVAL;
5443         }
5444
5445         if (IPC_PORT_NULL != port) {
5446                 ipc_port_release_send(port);
5447         }
5448
5449         return err;
5450 }
5451
5452
5453 /*
5454  * dupfdopen
5455  *
5456  * Description: Duplicate the specified descriptor to a free descriptor;
5457  *              this is the second half of fdopen(), above.
5458  *
5459  * Parameters:  fdp                             filedesc pointer to fill in
5460  *              indx                            fd to dup to
5461  *              dfd                             fd to dup from
5462  *              mode                            mode to set on new fd
5463  *              error                           command code
5464  *
5465  * Returns:     0                               Success
5466  *              EBADF                           Source fd is bad
5467  *              EACCES                          Requested mode not allowed
5468  *              !0                              'error', if not ENODEV or
5469  *                                              ENXIO
5470  *
5471  * Notes:       XXX This is not thread safe; see fdopen() above
5472  */
5473 int
5474 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5475 {
5476         struct fileproc *wfp;
5477         struct fileproc *fp;
5478 #if CONFIG_MACF
5479         int myerror;
5480 #endif
5481         proc_t p = current_proc();
5482
5483         /*
5484          * If the to-be-dup'd fd number is greater than the allowed number
5485          * of file descriptors, or the fd to be dup'd has already been
5486          * closed, reject.  Note, check for new == old is necessary as
5487          * falloc could allocate an already closed to-be-dup'd descriptor
5488          * as the new descriptor.
5489          */
5490         proc_fdlock(p);
5491
5492         fp = fdp->fd_ofiles[indx];
5493         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5494             (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5495             (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5496                 proc_fdunlock(p);
5497                 return EBADF;
5498         }
5499 #if CONFIG_MACF
5500         myerror = mac_file_check_dup(proc_ucred(p), wfp->fp_glob, dfd);
5501         if (myerror) {
5502                 proc_fdunlock(p);
5503                 return myerror;
5504         }
5505 #endif
5506         /*
5507          * There are two cases of interest here.
5508          *
5509          * For ENODEV simply dup (dfd) to file descriptor
5510          * (indx) and return.
5511          *
5512          * For ENXIO steal away the file structure from (dfd) and
5513          * store it in (indx).  (dfd) is effectively closed by
5514          * this operation.
5515          *
5516          * Any other error code is just returned.
5517          */
5518         switch (error) {
5519         case ENODEV:
5520                 if (fp_isguarded(wfp, GUARD_DUP)) {
5521                         proc_fdunlock(p);
5522                         return EPERM;
5523                 }
5524
5525                 /*
5526                  * Check that the mode the file is being opened for is a
5527                  * subset of the mode of the existing descriptor.
5528                  */
5529                 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5530                         proc_fdunlock(p);
5531                         return EACCES;
5532                 }
5533                 if (indx > fdp->fd_lastfile) {
5534                         fdp->fd_lastfile = indx;
5535                 }
5536
5537                 if (fp->fp_glob) {
5538                         fg_free(fp->fp_glob);
5539                 }
5540                 fg_ref(p, wfp->fp_glob);
5541                 fp->fp_glob = wfp->fp_glob;
5542
5543                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5544                     (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5545
5546                 proc_fdunlock(p);
5547                 return 0;
5548
5549         default:
5550                 proc_fdunlock(p);
5551                 return error;
5552         }
5553         /* NOTREACHED */
5554 }
5555
5556
5557 /*
5558  * fo_read
5559  *
5560  * Description: Generic fileops read indirected through the fileops pointer
5561  *              in the fileproc structure
5562  *
5563  * Parameters:  fp                              fileproc structure pointer
5564  *              uio                             user I/O structure pointer
5565  *              flags                           FOF_ flags
5566  *              ctx                             VFS context for operation
5567  *
5568  * Returns:     0                               Success
5569  *              !0                              Errno from read
5570  */
5571 int
5572 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5573 {
5574         return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5575 }
5576
5577 int
5578 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5579 {
5580 #pragma unused(fp, uio, flags, ctx)
5581         return ENXIO;
5582 }
5583
5584
5585 /*
5586  * fo_write
5587  *
5588  * Description: Generic fileops write indirected through the fileops pointer
5589  *              in the fileproc structure
5590  *
5591  * Parameters:  fp                              fileproc structure pointer
5592  *              uio                             user I/O structure pointer
5593  *              flags                           FOF_ flags
5594  *              ctx                             VFS context for operation
5595  *
5596  * Returns:     0                               Success
5597  *              !0                              Errno from write
5598  */
5599 int
5600 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5601 {
5602         return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5603 }
5604
5605 int
5606 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5607 {
5608 #pragma unused(fp, uio, flags, ctx)
5609         return ENXIO;
5610 }
5611
5612
5613 /*
5614  * fo_ioctl
5615  *
5616  * Description: Generic fileops ioctl indirected through the fileops pointer
5617  *              in the fileproc structure
5618  *
5619  * Parameters:  fp                              fileproc structure pointer
5620  *              com                             ioctl command
5621  *              data                            pointer to internalized copy
5622  *                                              of user space ioctl command
5623  *                                              parameter data in kernel space
5624  *              ctx                             VFS context for operation
5625  *
5626  * Returns:     0                               Success
5627  *              !0                              Errno from ioctl
5628  *
5629  * Locks:       The caller is assumed to have held the proc_fdlock; this
5630  *              function releases and reacquires this lock.  If the caller
5631  *              accesses data protected by this lock prior to calling this
5632  *              function, it will need to revalidate/reacquire any cached
5633  *              protected data obtained prior to the call.
5634  */
5635 int
5636 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5637 {
5638         int error;
5639
5640         proc_fdunlock(vfs_context_proc(ctx));
5641         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5642         proc_fdlock(vfs_context_proc(ctx));
5643         return error;
5644 }
5645
5646 int
5647 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5648 {
5649 #pragma unused(fp, com, data, ctx)
5650         return ENOTTY;
5651 }
5652
5653
5654 /*
5655  * fo_select
5656  *
5657  * Description: Generic fileops select indirected through the fileops pointer
5658  *              in the fileproc structure
5659  *
5660  * Parameters:  fp                              fileproc structure pointer
5661  *              which                           select which
5662  *              wql                             pointer to wait queue list
5663  *              ctx                             VFS context for operation
5664  *
5665  * Returns:     0                               Success
5666  *              !0                              Errno from select
5667  */
5668 int
5669 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5670 {
5671         return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5672 }
5673
5674 int
5675 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5676 {
5677 #pragma unused(fp, which, wql, ctx)
5678         return ENOTSUP;
5679 }
5680
5681
5682 /*
5683  * fo_close
5684  *
5685  * Description: Generic fileops close indirected through the fileops pointer
5686  *              in the fileproc structure
5687  *
5688  * Parameters:  fp                              fileproc structure pointer for
5689  *                                              file to close
5690  *              ctx                             VFS context for operation
5691  *
5692  * Returns:     0                               Success
5693  *              !0                              Errno from close
5694  */
5695 int
5696 fo_close(struct fileglob *fg, vfs_context_t ctx)
5697 {
5698         return (*fg->fg_ops->fo_close)(fg, ctx);
5699 }
5700
5701
5702 /*
5703  * fo_drain
5704  *
5705  * Description: Generic fileops kqueue filter indirected through the fileops
5706  *              pointer in the fileproc structure
5707  *
5708  * Parameters:  fp                              fileproc structure pointer
5709  *              ctx                             VFS context for operation
5710  *
5711  * Returns:     0                               Success
5712  *              !0                              errno from drain
5713  */
5714 int
5715 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5716 {
5717         return (*fp->f_ops->fo_drain)(fp, ctx);
5718 }
5719
5720 int
5721 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5722 {
5723 #pragma unused(fp, ctx)
5724         return ENOTSUP;
5725 }
5726
5727
5728 /*
5729  * fo_kqfilter
5730  *
5731  * Description: Generic fileops kqueue filter indirected through the fileops
5732  *              pointer in the fileproc structure
5733  *
5734  * Parameters:  fp                              fileproc structure pointer
5735  *              kn                              pointer to knote to filter on
5736  *
5737  * Returns:     (kn->kn_flags & EV_ERROR)       error in kn->kn_data
5738  *              0                               Filter is not active
5739  *              !0                              Filter is active
5740  */
5741 int
5742 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5743 {
5744         return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5745 }
5746
5747 int
5748 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5749 {
5750 #pragma unused(fp, kev)
5751         knote_set_error(kn, ENOTSUP);
5752         return 0;
5753 }
5754
5755
5756 struct fileproc *
5757 fileproc_alloc_init(__unused void *arg)
5758 {
5759         struct fileproc *fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO);
5760
5761         os_ref_init(&fp->fp_iocount, &f_refgrp);
5762         return fp;
5763 }
5764
5765
5766 void
5767 fileproc_free(struct fileproc *fp)
5768 {
5769         os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
5770 #if DEVELOPMENT || DEBUG
5771         if (0 != refc) {
5772                 panic("%s: pid %d refc: %u != 0",
5773                     __func__, proc_pid(current_proc()), refc);
5774         }
5775 #endif
5776         switch (FILEPROC_TYPE(fp)) {
5777         case FTYPE_SIMPLE:
5778                 zfree(fp_zone, fp);
5779                 break;
5780         case FTYPE_GUARDED:
5781                 guarded_fileproc_free(fp);
5782                 break;
5783         default:
5784                 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->fp_flags);
5785         }
5786 }
5787
5788 void
5789 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
5790 {
5791         if (clearflags) {
5792                 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
5793         } else {
5794                 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
5795         }
5796 }
5797
5798 fileproc_vflags_t
5799 fileproc_get_vflags(struct fileproc *fp)
5800 {
5801         return os_atomic_load(&fp->fp_vflags, relaxed);
5802 }