bsd/kern/kern_descrip.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_descrip.c      8.8 (Berkeley) 2/14/95
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/vnode_internal.h>
  80 #include <sys/proc_internal.h>
  81 #include <sys/kauth.h>
  82 #include <sys/file_internal.h>
  83 #include <sys/guarded.h>
  84 #include <sys/priv.h>
  85 #include <sys/socket.h>
  86 #include <sys/socketvar.h>
  87 #include <sys/stat.h>
  88 #include <sys/ioctl.h>
  89 #include <sys/fcntl.h>
  90 #include <sys/fsctl.h>
  91 #include <sys/malloc.h>
  92 #include <sys/mman.h>
  93 #include <sys/syslog.h>
  94 #include <sys/unistd.h>
  95 #include <sys/resourcevar.h>
  96 #include <sys/aio_kern.h>
  97 #include <sys/ev.h>
  98 #include <kern/locks.h>
  99 #include <sys/uio_internal.h>
 100 #include <sys/codesign.h>
 101 #include <sys/codedir_internal.h>
 102 #include <sys/mount_internal.h>
 103 #include <sys/kdebug.h>
 104 #include <sys/sysproto.h>
 105 #include <sys/pipe.h>
 106 #include <sys/spawn.h>
 107 #include <sys/cprotect.h>
 108 #include <sys/ubc_internal.h>
 109
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112 #include <kern/waitq.h>
 113 #include <kern/ipc_misc.h>
 114
 115 #include <vm/vm_protos.h>
 116 #include <mach/mach_port.h>
 117
 118 #include <security/audit/audit.h>
 119 #if CONFIG_MACF
 120 #include <security/mac_framework.h>
 121 #endif
 122
 123 #include <stdbool.h>
 124 #include <os/atomic_private.h>
 125 #include <IOKit/IOBSD.h>
 126
 127 #define IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND 0x1
 128 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
 129     mach_msg_type_name_t, ipc_port_t *, mach_port_context_t, mach_msg_guard_flags_t *, uint32_t);
 130 void ipc_port_release_send(ipc_port_t);
 131
 132 static void fileproc_drain(proc_t, struct fileproc *);
 133 static int finishdup(proc_t p,
 134     struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 135
 136 void fileport_releasefg(struct fileglob *fg);
 137
 138 /* flags for fp_close_and_unlock */
 139 #define FD_DUP2RESV 1
 140
 141 /* We don't want these exported */
 142
 143 __private_extern__
 144 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
 145
 146 static void fdrelse(struct proc * p, int fd);
 147
 148 extern void file_lock_init(void);
 149
 150 extern kauth_scope_t    kauth_scope_fileop;
 151
 152 /* Conflict wait queue for when selects collide (opaque type) */
 153 extern struct waitq select_conflict_queue;
 154
 155 #ifndef HFS_GET_BOOT_INFO
 156 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
 157 #endif
 158
 159 #ifndef HFS_SET_BOOT_INFO
 160 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
 161 #endif
 162
 163 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
 164 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t)
 165 #endif
 166
 167 #define f_flag fp_glob->fg_flag
 168 #define f_type fp_glob->fg_ops->fo_type
 169 #define f_cred fp_glob->fg_cred
 170 #define f_ops fp_glob->fg_ops
 171 #define f_offset fp_glob->fg_offset
 172 #define f_data fp_glob->fg_data
 173 #define CHECK_ADD_OVERFLOW_INT64L(x, y) \
 174                 (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \
 175                 (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \
 176                 ? 1 : 0)
 177
 178 ZONE_DECLARE(fg_zone, "fileglob",
 179     sizeof(struct fileglob), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 180 ZONE_DECLARE(fp_zone, "fileproc",
 181     sizeof(struct fileproc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 182 ZONE_DECLARE(fdp_zone, "filedesc",
 183     sizeof(struct filedesc), ZC_NOENCRYPT | ZC_ZFREE_CLEARMEM);
 184
 185 /*
 186  * Descriptor management.
 187  */
 188 int nfiles;                     /* actual number of open files */
 189 /*
 190  * "uninitialized" ops -- ensure FILEGLOB_DTYPE(fg) always exists
 191  */
 192 static const struct fileops uninitops;
 193
 194 os_refgrp_decl(, f_refgrp, "files refcounts", NULL);
 195 lck_grp_attr_t * file_lck_grp_attr;
 196 lck_grp_t * file_lck_grp;
 197 lck_attr_t * file_lck_attr;
 198
 199 #pragma mark fileglobs
 200
 201 /*!
 202  * @function fg_free
 203  *
 204  * @brief
 205  * Free a file structure.
 206  */
 207 static void
 208 fg_free(struct fileglob *fg)
 209 {
 210         os_atomic_dec(&nfiles, relaxed);
 211
 212         if (fg->fg_vn_data) {
 213                 fg_vn_data_free(fg->fg_vn_data);
 214                 fg->fg_vn_data = NULL;
 215         }
 216
 217         if (IS_VALID_CRED(fg->fg_cred)) {
 218                 kauth_cred_unref(&fg->fg_cred);
 219         }
 220         lck_mtx_destroy(&fg->fg_lock, file_lck_grp);
 221
 222 #if CONFIG_MACF
 223         mac_file_label_destroy(fg);
 224 #endif
 225         zfree(fg_zone, fg);
 226 }
 227
 228 OS_ALWAYS_INLINE
 229 void
 230 fg_ref(struct fileglob *fg)
 231 {
 232         os_ref_retain_raw(&fg->fg_count, &f_refgrp);
 233 }
 234
 235 int
 236 fg_drop(proc_t p, struct fileglob *fg)
 237 {
 238         struct vnode *vp;
 239         struct vfs_context context;
 240         int error = 0;
 241
 242         if (fg == NULL) {
 243                 return 0;
 244         }
 245
 246         /* Set up context with cred stashed in fg */
 247         if (p == current_proc()) {
 248                 context.vc_thread = current_thread();
 249         } else {
 250                 context.vc_thread = NULL;
 251         }
 252         context.vc_ucred = fg->fg_cred;
 253
 254         /*
 255          * POSIX record locking dictates that any close releases ALL
 256          * locks owned by this process.  This is handled by setting
 257          * a flag in the unlock to free ONLY locks obeying POSIX
 258          * semantics, and not to free BSD-style file locks.
 259          * If the descriptor was in a message, POSIX-style locks
 260          * aren't passed with the descriptor.
 261          */
 262         if (p && DTYPE_VNODE == FILEGLOB_DTYPE(fg) &&
 263             (p->p_ladvflag & P_LADVLOCK)) {
 264                 struct flock lf = {
 265                         .l_whence = SEEK_SET,
 266                         .l_type = F_UNLCK,
 267                 };
 268
 269                 vp = (struct vnode *)fg->fg_data;
 270                 if ((error = vnode_getwithref(vp)) == 0) {
 271                         (void)VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL);
 272                         (void)vnode_put(vp);
 273                 }
 274         }
 275
 276         if (os_ref_release_raw(&fg->fg_count, &f_refgrp) == 0) {
 277                 /*
 278                  * Since we ensure that fg->fg_ops is always initialized,
 279                  * it is safe to invoke fo_close on the fg
 280                  */
 281                 error = fo_close(fg, &context);
 282
 283                 fg_free(fg);
 284         }
 285
 286         return error;
 287 }
 288
 289 /*
 290  * fg_get_vnode
 291  *
 292  * Description: Return vnode associated with the file structure, if
 293  *              any.  The lifetime of the returned vnode is bound to
 294  *              the lifetime of the file structure.
 295  *
 296  * Parameters:  fg                              Pointer to fileglob to
 297  *                                              inspect
 298  *
 299  * Returns:     vnode_t
 300  */
 301 vnode_t
 302 fg_get_vnode(struct fileglob *fg)
 303 {
 304         if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
 305                 return (vnode_t)fg->fg_data;
 306         } else {
 307                 return NULL;
 308         }
 309 }
 310
 311 bool
 312 fg_sendable(struct fileglob *fg)
 313 {
 314         switch (FILEGLOB_DTYPE(fg)) {
 315         case DTYPE_VNODE:
 316         case DTYPE_SOCKET:
 317         case DTYPE_PIPE:
 318         case DTYPE_PSXSHM:
 319         case DTYPE_NETPOLICY:
 320                 return (fg->fg_lflags & FG_CONFINED) == 0;
 321
 322         default:
 323                 return false;
 324         }
 325 }
 326
 327
 328 #pragma mark fileprocs
 329
 330 /*
 331  * check_file_seek_range
 332  *
 333  * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX.
 334  *
 335  * Parameters:  fl              Flock structure.
 336  *              cur_file_offset Current offset in the file.
 337  *
 338  * Returns:     0               on Success.
 339  *              EOVERFLOW       on overflow.
 340  *              EINVAL          on offset less than zero.
 341  */
 342
 343 static int
 344 check_file_seek_range(struct flock *fl, off_t cur_file_offset)
 345 {
 346         if (fl->l_whence == SEEK_CUR) {
 347                 /* Check if the start marker is beyond LLONG_MAX. */
 348                 if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) {
 349                         /* Check if start marker is negative */
 350                         if (fl->l_start < 0) {
 351                                 return EINVAL;
 352                         }
 353                         return EOVERFLOW;
 354                 }
 355                 /* Check if the start marker is negative. */
 356                 if (fl->l_start + cur_file_offset < 0) {
 357                         return EINVAL;
 358                 }
 359                 /* Check if end marker is beyond LLONG_MAX. */
 360                 if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start +
 361                     cur_file_offset, fl->l_len - 1))) {
 362                         return EOVERFLOW;
 363                 }
 364                 /* Check if the end marker is negative. */
 365                 if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset +
 366                     fl->l_len < 0)) {
 367                         return EINVAL;
 368                 }
 369         } else if (fl->l_whence == SEEK_SET) {
 370                 /* Check if the start marker is negative. */
 371                 if (fl->l_start < 0) {
 372                         return EINVAL;
 373                 }
 374                 /* Check if the end marker is beyond LLONG_MAX. */
 375                 if ((fl->l_len > 0) &&
 376                     CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) {
 377                         return EOVERFLOW;
 378                 }
 379                 /* Check if the end marker is negative. */
 380                 if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) {
 381                         return EINVAL;
 382                 }
 383         }
 384         return 0;
 385 }
 386
 387
 388 /*
 389  * file_lock_init
 390  *
 391  * Description: Initialize the file lock group and the uipc and flist locks
 392  *
 393  * Parameters:  (void)
 394  *
 395  * Returns:     void
 396  *
 397  * Notes:       Called at system startup from bsd_init().
 398  */
 399 void
 400 file_lock_init(void)
 401 {
 402         /* allocate file lock group attribute and group */
 403         file_lck_grp_attr = lck_grp_attr_alloc_init();
 404
 405         file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr);
 406
 407         /* Allocate file lock attribute */
 408         file_lck_attr = lck_attr_alloc_init();
 409 }
 410
 411
 412 void
 413 proc_dirs_lock_shared(proc_t p)
 414 {
 415         lck_rw_lock_shared(&p->p_dirs_lock);
 416 }
 417
 418 void
 419 proc_dirs_unlock_shared(proc_t p)
 420 {
 421         lck_rw_unlock_shared(&p->p_dirs_lock);
 422 }
 423
 424 void
 425 proc_dirs_lock_exclusive(proc_t p)
 426 {
 427         lck_rw_lock_exclusive(&p->p_dirs_lock);
 428 }
 429
 430 void
 431 proc_dirs_unlock_exclusive(proc_t p)
 432 {
 433         lck_rw_unlock_exclusive(&p->p_dirs_lock);
 434 }
 435
 436 /*
 437  * proc_fdlock, proc_fdlock_spin
 438  *
 439  * Description: Lock to control access to the per process struct fileproc
 440  *              and struct filedesc
 441  *
 442  * Parameters:  p                               Process to take the lock on
 443  *
 444  * Returns:     void
 445  *
 446  * Notes:       The lock is initialized in forkproc() and destroyed in
 447  *              reap_child_process().
 448  */
 449 void
 450 proc_fdlock(proc_t p)
 451 {
 452         lck_mtx_lock(&p->p_fdmlock);
 453 }
 454
 455 void
 456 proc_fdlock_spin(proc_t p)
 457 {
 458         lck_mtx_lock_spin(&p->p_fdmlock);
 459 }
 460
 461 void
 462 proc_fdlock_assert(proc_t p, int assertflags)
 463 {
 464         lck_mtx_assert(&p->p_fdmlock, assertflags);
 465 }
 466
 467
 468 /*
 469  * proc_fdunlock
 470  *
 471  * Description: Unlock the lock previously locked by a call to proc_fdlock()
 472  *
 473  * Parameters:  p                               Process to drop the lock on
 474  *
 475  * Returns:     void
 476  */
 477 void
 478 proc_fdunlock(proc_t p)
 479 {
 480         lck_mtx_unlock(&p->p_fdmlock);
 481 }
 482
 483 struct fdt_iterator
 484 fdt_next(proc_t p, int fd, bool only_settled)
 485 {
 486         struct fdt_iterator it;
 487         struct filedesc *fdp = p->p_fd;
 488         struct fileproc *fp;
 489         int nfds = min(fdp->fd_lastfile + 1, fdp->fd_nfiles);
 490
 491         while (++fd < nfds) {
 492                 fp = fdp->fd_ofiles[fd];
 493                 if (fp == NULL || fp->fp_glob == NULL) {
 494                         continue;
 495                 }
 496                 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 497                         continue;
 498                 }
 499                 it.fdti_fd = fd;
 500                 it.fdti_fp = fp;
 501                 return it;
 502         }
 503
 504         it.fdti_fd = nfds;
 505         it.fdti_fp = NULL;
 506         return it;
 507 }
 508
 509 struct fdt_iterator
 510 fdt_prev(proc_t p, int fd, bool only_settled)
 511 {
 512         struct fdt_iterator it;
 513         struct filedesc *fdp = p->p_fd;
 514         struct fileproc *fp;
 515
 516         while (--fd >= 0) {
 517                 fp = fdp->fd_ofiles[fd];
 518                 if (fp == NULL || fp->fp_glob == NULL) {
 519                         continue;
 520                 }
 521                 if (only_settled && (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 522                         continue;
 523                 }
 524                 it.fdti_fd = fd;
 525                 it.fdti_fp = fp;
 526                 return it;
 527         }
 528
 529         it.fdti_fd = -1;
 530         it.fdti_fp = NULL;
 531         return it;
 532 }
 533
 534 /*
 535  * System calls on descriptors.
 536  */
 537
 538
 539 /*
 540  * sys_getdtablesize
 541  *
 542  * Description: Returns the per process maximum size of the descriptor table
 543  *
 544  * Parameters:  p                               Process being queried
 545  *              retval                          Pointer to the call return area
 546  *
 547  * Returns:     0                               Success
 548  *
 549  * Implicit returns:
 550  *              *retval (modified)              Size of dtable
 551  */
 552 int
 553 sys_getdtablesize(proc_t p, __unused struct getdtablesize_args *uap, int32_t *retval)
 554 {
 555         *retval = (int32_t)MIN(proc_limitgetcur(p, RLIMIT_NOFILE, TRUE), maxfilesperproc);
 556
 557         return 0;
 558 }
 559
 560
 561 static void
 562 procfdtbl_reservefd(struct proc * p, int fd)
 563 {
 564         p->p_fd->fd_ofiles[fd] = NULL;
 565         p->p_fd->fd_ofileflags[fd] |= UF_RESERVED;
 566 }
 567
 568 void
 569 procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp)
 570 {
 571         if (fp != NULL) {
 572                 p->p_fd->fd_ofiles[fd] = fp;
 573         }
 574         p->p_fd->fd_ofileflags[fd] &= ~UF_RESERVED;
 575         if ((p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT) == UF_RESVWAIT) {
 576                 p->p_fd->fd_ofileflags[fd] &= ~UF_RESVWAIT;
 577                 wakeup(&p->p_fd);
 578         }
 579 }
 580
 581 static void
 582 procfdtbl_waitfd(struct proc * p, int fd)
 583 {
 584         p->p_fd->fd_ofileflags[fd] |= UF_RESVWAIT;
 585         msleep(&p->p_fd, &p->p_fdmlock, PRIBIO, "ftbl_waitfd", NULL);
 586 }
 587
 588 static void
 589 procfdtbl_clearfd(struct proc * p, int fd)
 590 {
 591         int waiting;
 592
 593         waiting = (p->p_fd->fd_ofileflags[fd] & UF_RESVWAIT);
 594         p->p_fd->fd_ofiles[fd] = NULL;
 595         p->p_fd->fd_ofileflags[fd] = 0;
 596         if (waiting == UF_RESVWAIT) {
 597                 wakeup(&p->p_fd);
 598         }
 599 }
 600
 601 /*
 602  * fdrelse
 603  *
 604  * Description: Inline utility function to free an fd in a filedesc
 605  *
 606  * Parameters:  fdp                             Pointer to filedesc fd lies in
 607  *              fd                              fd to free
 608  *              reserv                          fd should be reserved
 609  *
 610  * Returns:     void
 611  *
 612  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
 613  *              the caller
 614  */
 615 static void
 616 fdrelse(struct proc * p, int fd)
 617 {
 618         struct filedesc *fdp = p->p_fd;
 619         int nfd = 0;
 620
 621         if (fd < fdp->fd_freefile) {
 622                 fdp->fd_freefile = fd;
 623         }
 624 #if DIAGNOSTIC
 625         if (fd > fdp->fd_lastfile) {
 626                 panic("fdrelse: fd_lastfile inconsistent");
 627         }
 628 #endif
 629         procfdtbl_clearfd(p, fd);
 630
 631         while ((nfd = fdp->fd_lastfile) > 0 &&
 632             fdp->fd_ofiles[nfd] == NULL &&
 633             !(fdp->fd_ofileflags[nfd] & UF_RESERVED)) {
 634                 /* JMM - What about files with lingering EV_VANISHED knotes? */
 635                 fdp->fd_lastfile--;
 636         }
 637 }
 638
 639
 640 int
 641 fd_rdwr(
 642         int fd,
 643         enum uio_rw rw,
 644         uint64_t base,
 645         int64_t len,
 646         enum uio_seg segflg,
 647         off_t   offset,
 648         int     io_flg,
 649         int64_t *aresid)
 650 {
 651         struct fileproc *fp;
 652         proc_t  p;
 653         int error = 0;
 654         int flags = 0;
 655         int spacetype;
 656         uio_t auio = NULL;
 657         char uio_buf[UIO_SIZEOF(1)];
 658         struct vfs_context context = *(vfs_context_current());
 659
 660         p = current_proc();
 661
 662         error = fp_lookup(p, fd, &fp, 0);
 663         if (error) {
 664                 return error;
 665         }
 666
 667         switch (FILEGLOB_DTYPE(fp->fp_glob)) {
 668         case DTYPE_VNODE:
 669         case DTYPE_PIPE:
 670         case DTYPE_SOCKET:
 671                 break;
 672         default:
 673                 error = EINVAL;
 674                 goto out;
 675         }
 676         if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) {
 677                 error = EBADF;
 678                 goto out;
 679         }
 680
 681         if (rw == UIO_READ && !(fp->f_flag & FREAD)) {
 682                 error = EBADF;
 683                 goto out;
 684         }
 685
 686         context.vc_ucred = fp->fp_glob->fg_cred;
 687
 688         if (UIO_SEG_IS_USER_SPACE(segflg)) {
 689                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 690         } else {
 691                 spacetype = UIO_SYSSPACE;
 692         }
 693
 694         auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf));
 695
 696         uio_addiov(auio, (user_addr_t)base, (user_size_t)len);
 697
 698         if (!(io_flg & IO_APPEND)) {
 699                 flags = FOF_OFFSET;
 700         }
 701
 702         if (rw == UIO_WRITE) {
 703                 user_ssize_t orig_resid = uio_resid(auio);
 704                 error = fo_write(fp, auio, flags, &context);
 705                 if (uio_resid(auio) < orig_resid) {
 706                         os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
 707                 }
 708         } else {
 709                 error = fo_read(fp, auio, flags, &context);
 710         }
 711
 712         if (aresid) {
 713                 *aresid = uio_resid(auio);
 714         } else if (uio_resid(auio) && error == 0) {
 715                 error = EIO;
 716         }
 717 out:
 718         fp_drop(p, fd, fp, 0);
 719         return error;
 720 }
 721
 722
 723
 724 /*
 725  * sys_dup
 726  *
 727  * Description: Duplicate a file descriptor.
 728  *
 729  * Parameters:  p                               Process performing the dup
 730  *              uap->fd                         The fd to dup
 731  *              retval                          Pointer to the call return area
 732  *
 733  * Returns:     0                               Success
 734  *              !0                              Errno
 735  *
 736  * Implicit returns:
 737  *              *retval (modified)              The new descriptor
 738  */
 739 int
 740 sys_dup(proc_t p, struct dup_args *uap, int32_t *retval)
 741 {
 742         struct filedesc *fdp = p->p_fd;
 743         int old = uap->fd;
 744         int new, error;
 745         struct fileproc *fp;
 746
 747         proc_fdlock(p);
 748         if ((error = fp_lookup(p, old, &fp, 1))) {
 749                 proc_fdunlock(p);
 750                 return error;
 751         }
 752         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 753                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 754                 (void) fp_drop(p, old, fp, 1);
 755                 proc_fdunlock(p);
 756                 return error;
 757         }
 758         if ((error = fdalloc(p, 0, &new))) {
 759                 fp_drop(p, old, fp, 1);
 760                 proc_fdunlock(p);
 761                 return error;
 762         }
 763         error = finishdup(p, fdp, old, new, 0, retval);
 764         fp_drop(p, old, fp, 1);
 765         proc_fdunlock(p);
 766
 767         if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fp->fp_glob) == DTYPE_SOCKET) {
 768                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_START,
 769                     new, 0, (int64_t)VM_KERNEL_ADDRPERM(fp->f_data));
 770         }
 771
 772         return error;
 773 }
 774
 775 /*
 776  * sys_dup2
 777  *
 778  * Description: Duplicate a file descriptor to a particular value.
 779  *
 780  * Parameters:  p                               Process performing the dup
 781  *              uap->from                       The fd to dup
 782  *              uap->to                         The fd to dup it to
 783  *              retval                          Pointer to the call return area
 784  *
 785  * Returns:     0                               Success
 786  *              !0                              Errno
 787  *
 788  * Implicit returns:
 789  *              *retval (modified)              The new descriptor
 790  */
 791 int
 792 sys_dup2(proc_t p, struct dup2_args *uap, int32_t *retval)
 793 {
 794         return dup2(p, uap->from, uap->to, retval);
 795 }
 796
 797 int
 798 dup2(proc_t p, int old, int new, int *retval)
 799 {
 800         struct filedesc *fdp = p->p_fd;
 801         struct fileproc *fp, *nfp;
 802         int i, error;
 803         rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
 804
 805         proc_fdlock(p);
 806
 807 startover:
 808         if ((error = fp_lookup(p, old, &fp, 1))) {
 809                 proc_fdunlock(p);
 810                 return error;
 811         }
 812         if (FP_ISGUARDED(fp, GUARD_DUP)) {
 813                 error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP);
 814                 (void) fp_drop(p, old, fp, 1);
 815                 proc_fdunlock(p);
 816                 return error;
 817         }
 818         if (new < 0 ||
 819             (rlim_t)new >= nofile ||
 820             new >= maxfilesperproc) {
 821                 fp_drop(p, old, fp, 1);
 822                 proc_fdunlock(p);
 823                 return EBADF;
 824         }
 825         if (old == new) {
 826                 fp_drop(p, old, fp, 1);
 827                 *retval = new;
 828                 proc_fdunlock(p);
 829                 return 0;
 830         }
 831         if (new < 0 || new >= fdp->fd_nfiles) {
 832                 if ((error = fdalloc(p, new, &i))) {
 833                         fp_drop(p, old, fp, 1);
 834                         proc_fdunlock(p);
 835                         return error;
 836                 }
 837                 if (new != i) {
 838                         fdrelse(p, i);
 839                         goto closeit;
 840                 }
 841         } else {
 842 closeit:
 843                 if ((fdp->fd_ofileflags[new] & UF_RESERVED) == UF_RESERVED) {
 844                         fp_drop(p, old, fp, 1);
 845                         procfdtbl_waitfd(p, new);
 846 #if DIAGNOSTIC
 847                         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 848 #endif
 849                         goto startover;
 850                 }
 851
 852                 if ((nfp = fdp->fd_ofiles[new]) != NULL) {
 853                         if (FP_ISGUARDED(nfp, GUARD_CLOSE)) {
 854                                 fp_drop(p, old, fp, 1);
 855                                 error = fp_guard_exception(p,
 856                                     new, nfp, kGUARD_EXC_CLOSE);
 857                                 proc_fdunlock(p);
 858                                 return error;
 859                         }
 860                         (void)fp_close_and_unlock(p, new, nfp, FD_DUP2RESV);
 861                         proc_fdlock(p);
 862                         assert(fdp->fd_ofileflags[new] & UF_RESERVED);
 863                 } else {
 864 #if DIAGNOSTIC
 865                         if (fdp->fd_ofiles[new] != NULL) {
 866                                 panic("dup2: no ref on fileproc %d", new);
 867                         }
 868 #endif
 869                         procfdtbl_reservefd(p, new);
 870                 }
 871         }
 872 #if DIAGNOSTIC
 873         if (fdp->fd_ofiles[new] != 0) {
 874                 panic("dup2: overwriting fd_ofiles with new %d", new);
 875         }
 876         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
 877                 panic("dup2: unreserved fileflags with new %d", new);
 878         }
 879 #endif
 880         error = finishdup(p, fdp, old, new, 0, retval);
 881         fp_drop(p, old, fp, 1);
 882         proc_fdunlock(p);
 883
 884         return error;
 885 }
 886
 887
 888 /*
 889  * fcntl
 890  *
 891  * Description: The file control system call.
 892  *
 893  * Parameters:  p                               Process performing the fcntl
 894  *              uap->fd                         The fd to operate against
 895  *              uap->cmd                        The command to perform
 896  *              uap->arg                        Pointer to the command argument
 897  *              retval                          Pointer to the call return area
 898  *
 899  * Returns:     0                               Success
 900  *              !0                              Errno (see fcntl_nocancel)
 901  *
 902  * Implicit returns:
 903  *              *retval (modified)              fcntl return value (if any)
 904  *
 905  * Notes:       This system call differs from fcntl_nocancel() in that it
 906  *              tests for cancellation prior to performing a potentially
 907  *              blocking operation.
 908  */
 909 int
 910 sys_fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval)
 911 {
 912         __pthread_testcancel(1);
 913         return sys_fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval);
 914 }
 915
 916 #define ACCOUNT_OPENFROM_ENTITLEMENT \
 917         "com.apple.private.vfs.role-account-openfrom"
 918
 919 /*
 920  * sys_fcntl_nocancel
 921  *
 922  * Description: A non-cancel-testing file control system call.
 923  *
 924  * Parameters:  p                               Process performing the fcntl
 925  *              uap->fd                         The fd to operate against
 926  *              uap->cmd                        The command to perform
 927  *              uap->arg                        Pointer to the command argument
 928  *              retval                          Pointer to the call return area
 929  *
 930  * Returns:     0                               Success
 931  *              EINVAL
 932  *      fp_lookup:EBADF                         Bad file descriptor
 933  * [F_DUPFD]
 934  *      fdalloc:EMFILE
 935  *      fdalloc:ENOMEM
 936  *      finishdup:EBADF
 937  *      finishdup:ENOMEM
 938  * [F_SETOWN]
 939  *              ESRCH
 940  * [F_SETLK]
 941  *              EBADF
 942  *              EOVERFLOW
 943  *      copyin:EFAULT
 944  *      vnode_getwithref:???
 945  *      VNOP_ADVLOCK:???
 946  *      msleep:ETIMEDOUT
 947  * [F_GETLK]
 948  *              EBADF
 949  *              EOVERFLOW
 950  *      copyin:EFAULT
 951  *      copyout:EFAULT
 952  *      vnode_getwithref:???
 953  *      VNOP_ADVLOCK:???
 954  * [F_PREALLOCATE]
 955  *              EBADF
 956  *              EINVAL
 957  *      copyin:EFAULT
 958  *      copyout:EFAULT
 959  *      vnode_getwithref:???
 960  *      VNOP_ALLOCATE:???
 961  * [F_SETSIZE,F_RDADVISE]
 962  *              EBADF
 963  *              EINVAL
 964  *      copyin:EFAULT
 965  *      vnode_getwithref:???
 966  * [F_RDAHEAD,F_NOCACHE]
 967  *              EBADF
 968  *      vnode_getwithref:???
 969  * [???]
 970  *
 971  * Implicit returns:
 972  *              *retval (modified)              fcntl return value (if any)
 973  */
 974 int
 975 sys_fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 976 {
 977         int fd = uap->fd;
 978         struct filedesc *fdp = p->p_fd;
 979         struct fileproc *fp;
 980         char *pop;
 981         struct vnode *vp = NULLVP;      /* for AUDIT_ARG() at end */
 982         unsigned int oflags, nflags;
 983         int i, tmp, error, error2, flg = 0;
 984         struct flock fl = {};
 985         struct flocktimeout fltimeout;
 986         struct timespec *timeout = NULL;
 987         struct vfs_context context;
 988         off_t offset;
 989         int newmin;
 990         daddr64_t lbn, bn;
 991         unsigned int fflag;
 992         user_addr_t argp;
 993         boolean_t is64bit;
 994         rlim_t nofile;
 995         int has_entitlement = 0;
 996
 997         AUDIT_ARG(fd, uap->fd);
 998         AUDIT_ARG(cmd, uap->cmd);
 999
1000         nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
1001
1002         proc_fdlock(p);
1003         if ((error = fp_lookup(p, fd, &fp, 1))) {
1004                 proc_fdunlock(p);
1005                 return error;
1006         }
1007         context.vc_thread = current_thread();
1008         context.vc_ucred = fp->f_cred;
1009
1010         is64bit = proc_is64bit(p);
1011         if (is64bit) {
1012                 argp = uap->arg;
1013         } else {
1014                 /*
1015                  * Since the arg parameter is defined as a long but may be
1016                  * either a long or a pointer we must take care to handle
1017                  * sign extension issues.  Our sys call munger will sign
1018                  * extend a long when we are called from a 32-bit process.
1019                  * Since we can never have an address greater than 32-bits
1020                  * from a 32-bit process we lop off the top 32-bits to avoid
1021                  * getting the wrong address
1022                  */
1023                 argp = CAST_USER_ADDR_T((uint32_t)uap->arg);
1024         }
1025
1026 #if CONFIG_MACF
1027         error = mac_file_check_fcntl(proc_ucred(p), fp->fp_glob, uap->cmd,
1028             uap->arg);
1029         if (error) {
1030                 goto out;
1031         }
1032 #endif
1033
1034         pop = &fdp->fd_ofileflags[fd];
1035
1036         switch (uap->cmd) {
1037         case F_DUPFD:
1038         case F_DUPFD_CLOEXEC:
1039                 if (FP_ISGUARDED(fp, GUARD_DUP)) {
1040                         error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP);
1041                         goto out;
1042                 }
1043                 newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1044                 AUDIT_ARG(value32, newmin);
1045                 if ((rlim_t)newmin >= nofile ||
1046                     newmin >= maxfilesperproc) {
1047                         error = EINVAL;
1048                         goto out;
1049                 }
1050                 if ((error = fdalloc(p, newmin, &i))) {
1051                         goto out;
1052                 }
1053                 error = finishdup(p, fdp, fd, i,
1054                     uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
1055                 goto out;
1056
1057         case F_GETFD:
1058                 *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0;
1059                 error = 0;
1060                 goto out;
1061
1062         case F_SETFD:
1063                 AUDIT_ARG(value32, (uint32_t)uap->arg);
1064                 if (uap->arg & FD_CLOEXEC) {
1065                         *pop |= UF_EXCLOSE;
1066                 } else {
1067                         if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) {
1068                                 error = fp_guard_exception(p,
1069                                     fd, fp, kGUARD_EXC_NOCLOEXEC);
1070                                 goto out;
1071                         }
1072                         *pop &= ~UF_EXCLOSE;
1073                 }
1074                 error = 0;
1075                 goto out;
1076
1077         case F_GETFL:
1078                 *retval = OFLAGS(fp->f_flag);
1079                 error = 0;
1080                 goto out;
1081
1082         case F_SETFL:
1083                 // FIXME (rdar://54898652)
1084                 //
1085                 // this code is broken if fnctl(F_SETFL), ioctl() are
1086                 // called concurrently for the same fileglob.
1087
1088                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
1089                 AUDIT_ARG(value32, tmp);
1090
1091                 os_atomic_rmw_loop(&fp->f_flag, oflags, nflags, relaxed, {
1092                         nflags  = oflags & ~FCNTLFLAGS;
1093                         nflags |= FFLAGS(tmp) & FCNTLFLAGS;
1094                 });
1095                 tmp = nflags & FNONBLOCK;
1096                 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1097                 if (error) {
1098                         goto out;
1099                 }
1100                 tmp = nflags & FASYNC;
1101                 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1102                 if (!error) {
1103                         goto out;
1104                 }
1105                 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1106                 tmp = 0;
1107                 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1108                 goto out;
1109
1110         case F_GETOWN:
1111                 if (fp->f_type == DTYPE_SOCKET) {
1112                         *retval = ((struct socket *)fp->f_data)->so_pgid;
1113                         error = 0;
1114                         goto out;
1115                 }
1116                 error = fo_ioctl(fp, TIOCGPGRP, (caddr_t)retval, &context);
1117                 *retval = -*retval;
1118                 goto out;
1119
1120         case F_SETOWN:
1121                 tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */
1122                 AUDIT_ARG(value32, tmp);
1123                 if (fp->f_type == DTYPE_SOCKET) {
1124                         ((struct socket *)fp->f_data)->so_pgid = tmp;
1125                         error = 0;
1126                         goto out;
1127                 }
1128                 if (fp->f_type == DTYPE_PIPE) {
1129                         error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1130                         goto out;
1131                 }
1132
1133                 if (tmp <= 0) {
1134                         tmp = -tmp;
1135                 } else {
1136                         proc_t p1 = proc_find(tmp);
1137                         if (p1 == 0) {
1138                                 error = ESRCH;
1139                                 goto out;
1140                         }
1141                         tmp = (int)p1->p_pgrpid;
1142                         proc_rele(p1);
1143                 }
1144                 error =  fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1145                 goto out;
1146
1147         case F_SETNOSIGPIPE:
1148                 tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
1149                 if (fp->f_type == DTYPE_SOCKET) {
1150 #if SOCKETS
1151                         error = sock_setsockopt((struct socket *)fp->f_data,
1152                             SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof(tmp));
1153 #else
1154                         error = EINVAL;
1155 #endif
1156                 } else {
1157                         struct fileglob *fg = fp->fp_glob;
1158
1159                         lck_mtx_lock_spin(&fg->fg_lock);
1160                         if (tmp) {
1161                                 fg->fg_lflags |= FG_NOSIGPIPE;
1162                         } else {
1163                                 fg->fg_lflags &= ~FG_NOSIGPIPE;
1164                         }
1165                         lck_mtx_unlock(&fg->fg_lock);
1166                         error = 0;
1167                 }
1168                 goto out;
1169
1170         case F_GETNOSIGPIPE:
1171                 if (fp->f_type == DTYPE_SOCKET) {
1172 #if SOCKETS
1173                         int retsize = sizeof(*retval);
1174                         error = sock_getsockopt((struct socket *)fp->f_data,
1175                             SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
1176 #else
1177                         error = EINVAL;
1178 #endif
1179                 } else {
1180                         *retval = (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) ?
1181                             1 : 0;
1182                         error = 0;
1183                 }
1184                 goto out;
1185
1186         case F_SETCONFINED:
1187                 /*
1188                  * If this is the only reference to this fglob in the process
1189                  * and it's already marked as close-on-fork then mark it as
1190                  * (immutably) "confined" i.e. any fd that points to it will
1191                  * forever be close-on-fork, and attempts to use an IPC
1192                  * mechanism to move the descriptor elsewhere will fail.
1193                  */
1194                 if (CAST_DOWN_EXPLICIT(int, uap->arg)) {
1195                         struct fileglob *fg = fp->fp_glob;
1196
1197                         lck_mtx_lock_spin(&fg->fg_lock);
1198                         if (fg->fg_lflags & FG_CONFINED) {
1199                                 error = 0;
1200                         } else if (1 != os_ref_get_count_raw(&fg->fg_count)) {
1201                                 error = EAGAIN; /* go close the dup .. */
1202                         } else if (UF_FORKCLOSE == (*pop & UF_FORKCLOSE)) {
1203                                 fg->fg_lflags |= FG_CONFINED;
1204                                 error = 0;
1205                         } else {
1206                                 error = EBADF;  /* open without O_CLOFORK? */
1207                         }
1208                         lck_mtx_unlock(&fg->fg_lock);
1209                 } else {
1210                         /*
1211                          * Other subsystems may have built on the immutability
1212                          * of FG_CONFINED; clearing it may be tricky.
1213                          */
1214                         error = EPERM;          /* immutable */
1215                 }
1216                 goto out;
1217
1218         case F_GETCONFINED:
1219                 *retval = (fp->fp_glob->fg_lflags & FG_CONFINED) ? 1 : 0;
1220                 error = 0;
1221                 goto out;
1222
1223         case F_SETLKWTIMEOUT:
1224         case F_SETLKW:
1225         case F_OFD_SETLKWTIMEOUT:
1226         case F_OFD_SETLKW:
1227                 flg |= F_WAIT;
1228                 OS_FALLTHROUGH;
1229
1230         case F_SETLK:
1231         case F_OFD_SETLK:
1232                 if (fp->f_type != DTYPE_VNODE) {
1233                         error = EBADF;
1234                         goto out;
1235                 }
1236                 vp = (struct vnode *)fp->f_data;
1237
1238                 fflag = fp->f_flag;
1239                 offset = fp->f_offset;
1240                 proc_fdunlock(p);
1241
1242                 /* Copy in the lock structure */
1243                 if (F_SETLKWTIMEOUT == uap->cmd ||
1244                     F_OFD_SETLKWTIMEOUT == uap->cmd) {
1245                         error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout));
1246                         if (error) {
1247                                 goto outdrop;
1248                         }
1249                         fl = fltimeout.fl;
1250                         timeout = &fltimeout.timeout;
1251                 } else {
1252                         error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1253                         if (error) {
1254                                 goto outdrop;
1255                         }
1256                 }
1257
1258                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1259                 /* and ending byte for EOVERFLOW in SEEK_SET */
1260                 error = check_file_seek_range(&fl, offset);
1261                 if (error) {
1262                         goto outdrop;
1263                 }
1264
1265                 if ((error = vnode_getwithref(vp))) {
1266                         goto outdrop;
1267                 }
1268                 if (fl.l_whence == SEEK_CUR) {
1269                         fl.l_start += offset;
1270                 }
1271
1272 #if CONFIG_MACF
1273                 error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1274                     F_SETLK, &fl);
1275                 if (error) {
1276                         (void)vnode_put(vp);
1277                         goto outdrop;
1278                 }
1279 #endif
1280                 switch (uap->cmd) {
1281                 case F_OFD_SETLK:
1282                 case F_OFD_SETLKW:
1283                 case F_OFD_SETLKWTIMEOUT:
1284                         flg |= F_OFD_LOCK;
1285                         switch (fl.l_type) {
1286                         case F_RDLCK:
1287                                 if ((fflag & FREAD) == 0) {
1288                                         error = EBADF;
1289                                         break;
1290                                 }
1291                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1292                                     F_SETLK, &fl, flg, &context, timeout);
1293                                 break;
1294                         case F_WRLCK:
1295                                 if ((fflag & FWRITE) == 0) {
1296                                         error = EBADF;
1297                                         break;
1298                                 }
1299                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1300                                     F_SETLK, &fl, flg, &context, timeout);
1301                                 break;
1302                         case F_UNLCK:
1303                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1304                                     F_UNLCK, &fl, F_OFD_LOCK, &context,
1305                                     timeout);
1306                                 break;
1307                         default:
1308                                 error = EINVAL;
1309                                 break;
1310                         }
1311                         if (0 == error &&
1312                             (F_RDLCK == fl.l_type || F_WRLCK == fl.l_type)) {
1313                                 struct fileglob *fg = fp->fp_glob;
1314
1315                                 /*
1316                                  * arrange F_UNLCK on last close (once
1317                                  * set, FG_HAS_OFDLOCK is immutable)
1318                                  */
1319                                 if ((fg->fg_lflags & FG_HAS_OFDLOCK) == 0) {
1320                                         lck_mtx_lock_spin(&fg->fg_lock);
1321                                         fg->fg_lflags |= FG_HAS_OFDLOCK;
1322                                         lck_mtx_unlock(&fg->fg_lock);
1323                                 }
1324                         }
1325                         break;
1326                 default:
1327                         flg |= F_POSIX;
1328                         switch (fl.l_type) {
1329                         case F_RDLCK:
1330                                 if ((fflag & FREAD) == 0) {
1331                                         error = EBADF;
1332                                         break;
1333                                 }
1334                                 // XXX UInt32 unsafe for LP64 kernel
1335                                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1336                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1337                                     F_SETLK, &fl, flg, &context, timeout);
1338                                 break;
1339                         case F_WRLCK:
1340                                 if ((fflag & FWRITE) == 0) {
1341                                         error = EBADF;
1342                                         break;
1343                                 }
1344                                 // XXX UInt32 unsafe for LP64 kernel
1345                                 os_atomic_or(&p->p_ladvflag, P_LADVLOCK, relaxed);
1346                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1347                                     F_SETLK, &fl, flg, &context, timeout);
1348                                 break;
1349                         case F_UNLCK:
1350                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1351                                     F_UNLCK, &fl, F_POSIX, &context, timeout);
1352                                 break;
1353                         default:
1354                                 error = EINVAL;
1355                                 break;
1356                         }
1357                         break;
1358                 }
1359                 (void) vnode_put(vp);
1360                 goto outdrop;
1361
1362         case F_GETLK:
1363         case F_OFD_GETLK:
1364         case F_GETLKPID:
1365         case F_OFD_GETLKPID:
1366                 if (fp->f_type != DTYPE_VNODE) {
1367                         error = EBADF;
1368                         goto out;
1369                 }
1370                 vp = (struct vnode *)fp->f_data;
1371
1372                 offset = fp->f_offset;
1373                 proc_fdunlock(p);
1374
1375                 /* Copy in the lock structure */
1376                 error = copyin(argp, (caddr_t)&fl, sizeof(fl));
1377                 if (error) {
1378                         goto outdrop;
1379                 }
1380
1381                 /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */
1382                 /* and ending byte for EOVERFLOW in SEEK_SET */
1383                 error = check_file_seek_range(&fl, offset);
1384                 if (error) {
1385                         goto outdrop;
1386                 }
1387
1388                 if ((fl.l_whence == SEEK_SET) && (fl.l_start < 0)) {
1389                         error = EINVAL;
1390                         goto outdrop;
1391                 }
1392
1393                 switch (fl.l_type) {
1394                 case F_RDLCK:
1395                 case F_UNLCK:
1396                 case F_WRLCK:
1397                         break;
1398                 default:
1399                         error = EINVAL;
1400                         goto outdrop;
1401                 }
1402
1403                 switch (fl.l_whence) {
1404                 case SEEK_CUR:
1405                 case SEEK_SET:
1406                 case SEEK_END:
1407                         break;
1408                 default:
1409                         error = EINVAL;
1410                         goto outdrop;
1411                 }
1412
1413                 if ((error = vnode_getwithref(vp)) == 0) {
1414                         if (fl.l_whence == SEEK_CUR) {
1415                                 fl.l_start += offset;
1416                         }
1417
1418 #if CONFIG_MACF
1419                         error = mac_file_check_lock(proc_ucred(p), fp->fp_glob,
1420                             uap->cmd, &fl);
1421                         if (error == 0)
1422 #endif
1423                         switch (uap->cmd) {
1424                         case F_OFD_GETLK:
1425                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1426                                     F_GETLK, &fl, F_OFD_LOCK, &context, NULL);
1427                                 break;
1428                         case F_OFD_GETLKPID:
1429                                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob,
1430                                     F_GETLKPID, &fl, F_OFD_LOCK, &context, NULL);
1431                                 break;
1432                         default:
1433                                 error = VNOP_ADVLOCK(vp, (caddr_t)p,
1434                                     uap->cmd, &fl, F_POSIX, &context, NULL);
1435                                 break;
1436                         }
1437
1438                         (void)vnode_put(vp);
1439
1440                         if (error == 0) {
1441                                 error = copyout((caddr_t)&fl, argp, sizeof(fl));
1442                         }
1443                 }
1444                 goto outdrop;
1445
1446         case F_PREALLOCATE: {
1447                 fstore_t alloc_struct;    /* structure for allocate command */
1448                 u_int32_t alloc_flags = 0;
1449
1450                 if (fp->f_type != DTYPE_VNODE) {
1451                         error = EBADF;
1452                         goto out;
1453                 }
1454
1455                 vp = (struct vnode *)fp->f_data;
1456                 proc_fdunlock(p);
1457
1458                 /* make sure that we have write permission */
1459                 if ((fp->f_flag & FWRITE) == 0) {
1460                         error = EBADF;
1461                         goto outdrop;
1462                 }
1463
1464                 error = copyin(argp, (caddr_t)&alloc_struct, sizeof(alloc_struct));
1465                 if (error) {
1466                         goto outdrop;
1467                 }
1468
1469                 /* now set the space allocated to 0 */
1470                 alloc_struct.fst_bytesalloc = 0;
1471
1472                 /*
1473                  * Do some simple parameter checking
1474                  */
1475
1476                 /* set up the flags */
1477
1478                 alloc_flags |= PREALLOCATE;
1479
1480                 if (alloc_struct.fst_flags & F_ALLOCATECONTIG) {
1481                         alloc_flags |= ALLOCATECONTIG;
1482                 }
1483
1484                 if (alloc_struct.fst_flags & F_ALLOCATEALL) {
1485                         alloc_flags |= ALLOCATEALL;
1486                 }
1487
1488                 /*
1489                  * Do any position mode specific stuff.  The only
1490                  * position mode  supported now is PEOFPOSMODE
1491                  */
1492
1493                 switch (alloc_struct.fst_posmode) {
1494                 case F_PEOFPOSMODE:
1495                         if (alloc_struct.fst_offset != 0) {
1496                                 error = EINVAL;
1497                                 goto outdrop;
1498                         }
1499
1500                         alloc_flags |= ALLOCATEFROMPEOF;
1501                         break;
1502
1503                 case F_VOLPOSMODE:
1504                         if (alloc_struct.fst_offset <= 0) {
1505                                 error = EINVAL;
1506                                 goto outdrop;
1507                         }
1508
1509                         alloc_flags |= ALLOCATEFROMVOL;
1510                         break;
1511
1512                 default: {
1513                         error = EINVAL;
1514                         goto outdrop;
1515                 }
1516                 }
1517                 if ((error = vnode_getwithref(vp)) == 0) {
1518                         /*
1519                          * call allocate to get the space
1520                          */
1521                         error = VNOP_ALLOCATE(vp, alloc_struct.fst_length, alloc_flags,
1522                             &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset,
1523                             &context);
1524                         (void)vnode_put(vp);
1525
1526                         error2 = copyout((caddr_t)&alloc_struct, argp, sizeof(alloc_struct));
1527
1528                         if (error == 0) {
1529                                 error = error2;
1530                         }
1531                 }
1532                 goto outdrop;
1533         }
1534         case F_PUNCHHOLE: {
1535                 fpunchhole_t args;
1536
1537                 if (fp->f_type != DTYPE_VNODE) {
1538                         error = EBADF;
1539                         goto out;
1540                 }
1541
1542                 vp = (struct vnode *)fp->f_data;
1543                 proc_fdunlock(p);
1544
1545                 /* need write permissions */
1546                 if ((fp->f_flag & FWRITE) == 0) {
1547                         error = EPERM;
1548                         goto outdrop;
1549                 }
1550
1551                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1552                         goto outdrop;
1553                 }
1554
1555                 if ((error = vnode_getwithref(vp))) {
1556                         goto outdrop;
1557                 }
1558
1559 #if CONFIG_MACF
1560                 if ((error = mac_vnode_check_write(&context, fp->fp_glob->fg_cred, vp))) {
1561                         (void)vnode_put(vp);
1562                         goto outdrop;
1563                 }
1564 #endif
1565
1566                 error = VNOP_IOCTL(vp, F_PUNCHHOLE, (caddr_t)&args, 0, &context);
1567                 (void)vnode_put(vp);
1568
1569                 goto outdrop;
1570         }
1571         case F_TRIM_ACTIVE_FILE: {
1572                 ftrimactivefile_t args;
1573
1574                 if (priv_check_cred(kauth_cred_get(), PRIV_TRIM_ACTIVE_FILE, 0)) {
1575                         error = EACCES;
1576                         goto out;
1577                 }
1578
1579                 if (fp->f_type != DTYPE_VNODE) {
1580                         error = EBADF;
1581                         goto out;
1582                 }
1583
1584                 vp = (struct vnode *)fp->f_data;
1585                 proc_fdunlock(p);
1586
1587                 /* need write permissions */
1588                 if ((fp->f_flag & FWRITE) == 0) {
1589                         error = EPERM;
1590                         goto outdrop;
1591                 }
1592
1593                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1594                         goto outdrop;
1595                 }
1596
1597                 if ((error = vnode_getwithref(vp))) {
1598                         goto outdrop;
1599                 }
1600
1601                 error = VNOP_IOCTL(vp, F_TRIM_ACTIVE_FILE, (caddr_t)&args, 0, &context);
1602                 (void)vnode_put(vp);
1603
1604                 goto outdrop;
1605         }
1606         case F_SPECULATIVE_READ: {
1607                 fspecread_t args;
1608
1609                 if (fp->f_type != DTYPE_VNODE) {
1610                         error = EBADF;
1611                         goto out;
1612                 }
1613
1614                 vp = (struct vnode *)fp->f_data;
1615                 proc_fdunlock(p);
1616
1617                 if ((error = copyin(argp, (caddr_t)&args, sizeof(args)))) {
1618                         goto outdrop;
1619                 }
1620
1621                 /* Discard invalid offsets or lengths */
1622                 if ((args.fsr_offset < 0) || (args.fsr_length < 0)) {
1623                         error = EINVAL;
1624                         goto outdrop;
1625                 }
1626
1627                 /*
1628                  * Round the file offset down to a page-size boundary (or to 0).
1629                  * The filesystem will need to round the length up to the end of the page boundary
1630                  * or to the EOF of the file.
1631                  */
1632                 uint64_t foff = (((uint64_t)args.fsr_offset) & ~((uint64_t)PAGE_MASK));
1633                 uint64_t foff_delta = args.fsr_offset - foff;
1634                 args.fsr_offset = (off_t) foff;
1635
1636                 /*
1637                  * Now add in the delta to the supplied length. Since we may have adjusted the
1638                  * offset, increase it by the amount that we adjusted.
1639                  */
1640                 args.fsr_length += foff_delta;
1641
1642                 if ((error = vnode_getwithref(vp))) {
1643                         goto outdrop;
1644                 }
1645                 error = VNOP_IOCTL(vp, F_SPECULATIVE_READ, (caddr_t)&args, 0, &context);
1646                 (void)vnode_put(vp);
1647
1648                 goto outdrop;
1649         }
1650         case F_SETSIZE:
1651                 if (fp->f_type != DTYPE_VNODE) {
1652                         error = EBADF;
1653                         goto out;
1654                 }
1655                 vp = (struct vnode *)fp->f_data;
1656                 proc_fdunlock(p);
1657
1658                 error = copyin(argp, (caddr_t)&offset, sizeof(off_t));
1659                 if (error) {
1660                         goto outdrop;
1661                 }
1662                 AUDIT_ARG(value64, offset);
1663
1664                 error = vnode_getwithref(vp);
1665                 if (error) {
1666                         goto outdrop;
1667                 }
1668
1669 #if CONFIG_MACF
1670                 error = mac_vnode_check_truncate(&context,
1671                     fp->fp_glob->fg_cred, vp);
1672                 if (error) {
1673                         (void)vnode_put(vp);
1674                         goto outdrop;
1675                 }
1676 #endif
1677                 /*
1678                  * Make sure that we are root.  Growing a file
1679                  * without zero filling the data is a security hole.
1680                  */
1681                 if (!kauth_cred_issuser(kauth_cred_get())) {
1682                         error = EACCES;
1683                 } else {
1684                         /*
1685                          * Require privilege to change file size without zerofill,
1686                          * else will change the file size and zerofill it.
1687                          */
1688                         error = priv_check_cred(kauth_cred_get(), PRIV_VFS_SETSIZE, 0);
1689                         if (error == 0) {
1690                                 error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context);
1691                         } else {
1692                                 error = vnode_setsize(vp, offset, 0, &context);
1693                         }
1694
1695 #if CONFIG_MACF
1696                         if (error == 0) {
1697                                 mac_vnode_notify_truncate(&context, fp->fp_glob->fg_cred, vp);
1698                         }
1699 #endif
1700                 }
1701
1702                 (void)vnode_put(vp);
1703                 goto outdrop;
1704
1705         case F_RDAHEAD:
1706                 if (fp->f_type != DTYPE_VNODE) {
1707                         error = EBADF;
1708                         goto out;
1709                 }
1710                 if (uap->arg) {
1711                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1712                 } else {
1713                         os_atomic_or(&fp->fp_glob->fg_flag, FNORDAHEAD, relaxed);
1714                 }
1715                 goto out;
1716
1717         case F_NOCACHE:
1718                 if (fp->f_type != DTYPE_VNODE) {
1719                         error = EBADF;
1720                         goto out;
1721                 }
1722                 if (uap->arg) {
1723                         os_atomic_or(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1724                 } else {
1725                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNOCACHE, relaxed);
1726                 }
1727                 goto out;
1728
1729         case F_NODIRECT:
1730                 if (fp->f_type != DTYPE_VNODE) {
1731                         error = EBADF;
1732                         goto out;
1733                 }
1734                 if (uap->arg) {
1735                         os_atomic_or(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1736                 } else {
1737                         os_atomic_andnot(&fp->fp_glob->fg_flag, FNODIRECT, relaxed);
1738                 }
1739                 goto out;
1740
1741         case F_SINGLE_WRITER:
1742                 if (fp->f_type != DTYPE_VNODE) {
1743                         error = EBADF;
1744                         goto out;
1745                 }
1746                 if (uap->arg) {
1747                         os_atomic_or(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1748                 } else {
1749                         os_atomic_andnot(&fp->fp_glob->fg_flag, FSINGLE_WRITER, relaxed);
1750                 }
1751                 goto out;
1752
1753         case F_GLOBAL_NOCACHE:
1754                 if (fp->f_type != DTYPE_VNODE) {
1755                         error = EBADF;
1756                         goto out;
1757                 }
1758                 vp = (struct vnode *)fp->f_data;
1759                 proc_fdunlock(p);
1760
1761                 if ((error = vnode_getwithref(vp)) == 0) {
1762                         *retval = vnode_isnocache(vp);
1763
1764                         if (uap->arg) {
1765                                 vnode_setnocache(vp);
1766                         } else {
1767                                 vnode_clearnocache(vp);
1768                         }
1769
1770                         (void)vnode_put(vp);
1771                 }
1772                 goto outdrop;
1773
1774         case F_CHECK_OPENEVT:
1775                 if (fp->f_type != DTYPE_VNODE) {
1776                         error = EBADF;
1777                         goto out;
1778                 }
1779                 vp = (struct vnode *)fp->f_data;
1780                 proc_fdunlock(p);
1781
1782                 if ((error = vnode_getwithref(vp)) == 0) {
1783                         *retval = vnode_is_openevt(vp);
1784
1785                         if (uap->arg) {
1786                                 vnode_set_openevt(vp);
1787                         } else {
1788                                 vnode_clear_openevt(vp);
1789                         }
1790
1791                         (void)vnode_put(vp);
1792                 }
1793                 goto outdrop;
1794
1795         case F_RDADVISE: {
1796                 struct radvisory ra_struct;
1797
1798                 if (fp->f_type != DTYPE_VNODE) {
1799                         error = EBADF;
1800                         goto out;
1801                 }
1802                 vp = (struct vnode *)fp->f_data;
1803                 proc_fdunlock(p);
1804
1805                 if ((error = copyin(argp, (caddr_t)&ra_struct, sizeof(ra_struct)))) {
1806                         goto outdrop;
1807                 }
1808                 if (ra_struct.ra_offset < 0 || ra_struct.ra_count < 0) {
1809                         error = EINVAL;
1810                         goto outdrop;
1811                 }
1812                 if ((error = vnode_getwithref(vp)) == 0) {
1813                         error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context);
1814
1815                         (void)vnode_put(vp);
1816                 }
1817                 goto outdrop;
1818         }
1819
1820         case F_FLUSH_DATA:
1821
1822                 if (fp->f_type != DTYPE_VNODE) {
1823                         error = EBADF;
1824                         goto out;
1825                 }
1826                 vp = (struct vnode *)fp->f_data;
1827                 proc_fdunlock(p);
1828
1829                 if ((error = vnode_getwithref(vp)) == 0) {
1830                         error = VNOP_FSYNC(vp, MNT_NOWAIT, &context);
1831
1832                         (void)vnode_put(vp);
1833                 }
1834                 goto outdrop;
1835
1836         case F_LOG2PHYS:
1837         case F_LOG2PHYS_EXT: {
1838                 struct log2phys l2p_struct = {};    /* structure for allocate command */
1839                 int devBlockSize;
1840
1841                 off_t file_offset = 0;
1842                 size_t a_size = 0;
1843                 size_t run = 0;
1844
1845                 if (uap->cmd == F_LOG2PHYS_EXT) {
1846                         error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
1847                         if (error) {
1848                                 goto out;
1849                         }
1850                         file_offset = l2p_struct.l2p_devoffset;
1851                 } else {
1852                         file_offset = fp->f_offset;
1853                 }
1854                 if (fp->f_type != DTYPE_VNODE) {
1855                         error = EBADF;
1856                         goto out;
1857                 }
1858                 vp = (struct vnode *)fp->f_data;
1859                 proc_fdunlock(p);
1860                 if ((error = vnode_getwithref(vp))) {
1861                         goto outdrop;
1862                 }
1863                 error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
1864                 if (error) {
1865                         (void)vnode_put(vp);
1866                         goto outdrop;
1867                 }
1868                 error = VNOP_BLKTOOFF(vp, lbn, &offset);
1869                 if (error) {
1870                         (void)vnode_put(vp);
1871                         goto outdrop;
1872                 }
1873                 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1874                 if (uap->cmd == F_LOG2PHYS_EXT) {
1875                         if (l2p_struct.l2p_contigbytes < 0) {
1876                                 vnode_put(vp);
1877                                 error = EINVAL;
1878                                 goto outdrop;
1879                         }
1880
1881                         a_size = (size_t)MIN((uint64_t)l2p_struct.l2p_contigbytes, SIZE_MAX);
1882                 } else {
1883                         a_size = devBlockSize;
1884                 }
1885
1886                 error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
1887
1888                 (void)vnode_put(vp);
1889
1890                 if (!error) {
1891                         l2p_struct.l2p_flags = 0;       /* for now */
1892                         if (uap->cmd == F_LOG2PHYS_EXT) {
1893                                 l2p_struct.l2p_contigbytes = run - (file_offset - offset);
1894                         } else {
1895                                 l2p_struct.l2p_contigbytes = 0; /* for now */
1896                         }
1897
1898                         /*
1899                          * The block number being -1 suggests that the file offset is not backed
1900                          * by any real blocks on-disk.  As a result, just let it be passed back up wholesale.
1901                          */
1902                         if (bn == -1) {
1903                                 /* Don't multiply it by the block size */
1904                                 l2p_struct.l2p_devoffset = bn;
1905                         } else {
1906                                 l2p_struct.l2p_devoffset = bn * devBlockSize;
1907                                 l2p_struct.l2p_devoffset += file_offset - offset;
1908                         }
1909                         error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
1910                 }
1911                 goto outdrop;
1912         }
1913         case F_GETPATH:
1914         case F_GETPATH_NOFIRMLINK: {
1915                 char *pathbufp;
1916                 int pathlen;
1917
1918                 if (fp->f_type != DTYPE_VNODE) {
1919                         error = EBADF;
1920                         goto out;
1921                 }
1922                 vp = (struct vnode *)fp->f_data;
1923                 proc_fdunlock(p);
1924
1925                 pathlen = MAXPATHLEN;
1926                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
1927                 if (pathbufp == NULL) {
1928                         error = ENOMEM;
1929                         goto outdrop;
1930                 }
1931                 if ((error = vnode_getwithref(vp)) == 0) {
1932                         if (uap->cmd == F_GETPATH_NOFIRMLINK) {
1933                                 error = vn_getpath_ext(vp, NULL, pathbufp, &pathlen, VN_GETPATH_NO_FIRMLINK);
1934                         } else {
1935                                 error = vn_getpath(vp, pathbufp, &pathlen);
1936                         }
1937                         (void)vnode_put(vp);
1938
1939                         if (error == 0) {
1940                                 error = copyout((caddr_t)pathbufp, argp, pathlen);
1941                         }
1942                 }
1943                 FREE(pathbufp, M_TEMP);
1944                 goto outdrop;
1945         }
1946
1947         case F_PATHPKG_CHECK: {
1948                 char *pathbufp;
1949                 size_t pathlen;
1950
1951                 if (fp->f_type != DTYPE_VNODE) {
1952                         error = EBADF;
1953                         goto out;
1954                 }
1955                 vp = (struct vnode *)fp->f_data;
1956                 proc_fdunlock(p);
1957
1958                 pathlen = MAXPATHLEN;
1959                 pathbufp = zalloc(ZV_NAMEI);
1960
1961                 if ((error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0) {
1962                         if ((error = vnode_getwithref(vp)) == 0) {
1963                                 AUDIT_ARG(text, pathbufp);
1964                                 error = vn_path_package_check(vp, pathbufp, (int)pathlen, retval);
1965
1966                                 (void)vnode_put(vp);
1967                         }
1968                 }
1969                 zfree(ZV_NAMEI, pathbufp);
1970                 goto outdrop;
1971         }
1972
1973         case F_CHKCLEAN:   // used by regression tests to see if all dirty pages got cleaned by fsync()
1974         case F_FULLFSYNC:  // fsync + flush the journal + DKIOCSYNCHRONIZE
1975         case F_BARRIERFSYNC:  // fsync + barrier
1976         case F_FREEZE_FS:  // freeze all other fs operations for the fs of this fd
1977         case F_THAW_FS: {  // thaw all frozen fs operations for the fs of this fd
1978                 if (fp->f_type != DTYPE_VNODE) {
1979                         error = EBADF;
1980                         goto out;
1981                 }
1982                 vp = (struct vnode *)fp->f_data;
1983                 proc_fdunlock(p);
1984
1985                 if ((error = vnode_getwithref(vp)) == 0) {
1986                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
1987
1988                         (void)vnode_put(vp);
1989                 }
1990                 break;
1991         }
1992
1993         /*
1994          * SPI (private) for opening a file starting from a dir fd
1995          */
1996         case F_OPENFROM: {
1997                 struct user_fopenfrom fopen;
1998                 struct vnode_attr va;
1999                 struct nameidata nd;
2000                 int cmode;
2001
2002                 /* Check if this isn't a valid file descriptor */
2003                 if ((fp->f_type != DTYPE_VNODE) ||
2004                     (fp->f_flag & FREAD) == 0) {
2005                         error = EBADF;
2006                         goto out;
2007                 }
2008                 vp = (struct vnode *)fp->f_data;
2009                 proc_fdunlock(p);
2010
2011                 if (vnode_getwithref(vp)) {
2012                         error = ENOENT;
2013                         goto outdrop;
2014                 }
2015
2016                 /* Only valid for directories */
2017                 if (vp->v_type != VDIR) {
2018                         vnode_put(vp);
2019                         error = ENOTDIR;
2020                         goto outdrop;
2021                 }
2022
2023                 /*
2024                  * Only entitled apps may use the credentials of the thread
2025                  * that opened the file descriptor.
2026                  * Non-entitled threads will use their own context.
2027                  */
2028                 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2029                         has_entitlement = 1;
2030                 }
2031
2032                 /* Get flags, mode and pathname arguments. */
2033                 if (IS_64BIT_PROCESS(p)) {
2034                         error = copyin(argp, &fopen, sizeof(fopen));
2035                 } else {
2036                         struct user32_fopenfrom fopen32;
2037
2038                         error = copyin(argp, &fopen32, sizeof(fopen32));
2039                         fopen.o_flags = fopen32.o_flags;
2040                         fopen.o_mode = fopen32.o_mode;
2041                         fopen.o_pathname = CAST_USER_ADDR_T(fopen32.o_pathname);
2042                 }
2043                 if (error) {
2044                         vnode_put(vp);
2045                         goto outdrop;
2046                 }
2047                 AUDIT_ARG(fflags, fopen.o_flags);
2048                 AUDIT_ARG(mode, fopen.o_mode);
2049                 VATTR_INIT(&va);
2050                 /* Mask off all but regular access permissions */
2051                 cmode = ((fopen.o_mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2052                 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2053
2054                 /* Start the lookup relative to the file descriptor's vnode. */
2055                 NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2056                     fopen.o_pathname, has_entitlement ? &context : vfs_context_current());
2057                 nd.ni_dvp = vp;
2058
2059                 error = open1(has_entitlement ? &context : vfs_context_current(),
2060                     &nd, fopen.o_flags, &va, fileproc_alloc_init, NULL, retval);
2061
2062                 vnode_put(vp);
2063                 break;
2064         }
2065         /*
2066          * SPI (private) for unlinking a file starting from a dir fd
2067          */
2068         case F_UNLINKFROM: {
2069                 user_addr_t pathname;
2070
2071                 /* Check if this isn't a valid file descriptor */
2072                 if ((fp->f_type != DTYPE_VNODE) ||
2073                     (fp->f_flag & FREAD) == 0) {
2074                         error = EBADF;
2075                         goto out;
2076                 }
2077                 vp = (struct vnode *)fp->f_data;
2078                 proc_fdunlock(p);
2079
2080                 if (vnode_getwithref(vp)) {
2081                         error = ENOENT;
2082                         goto outdrop;
2083                 }
2084
2085                 /* Only valid for directories */
2086                 if (vp->v_type != VDIR) {
2087                         vnode_put(vp);
2088                         error = ENOTDIR;
2089                         goto outdrop;
2090                 }
2091
2092                 /*
2093                  * Only entitled apps may use the credentials of the thread
2094                  * that opened the file descriptor.
2095                  * Non-entitled threads will use their own context.
2096                  */
2097                 if (IOTaskHasEntitlement(current_task(), ACCOUNT_OPENFROM_ENTITLEMENT)) {
2098                         has_entitlement = 1;
2099                 }
2100
2101                 /* Get flags, mode and pathname arguments. */
2102                 if (IS_64BIT_PROCESS(p)) {
2103                         pathname = (user_addr_t)argp;
2104                 } else {
2105                         pathname = CAST_USER_ADDR_T(argp);
2106                 }
2107
2108                 /* Start the lookup relative to the file descriptor's vnode. */
2109                 error = unlink1(has_entitlement ? &context : vfs_context_current(),
2110                     vp, pathname, UIO_USERSPACE, 0);
2111
2112                 vnode_put(vp);
2113                 break;
2114         }
2115
2116         case F_ADDSIGS:
2117         case F_ADDFILESIGS:
2118         case F_ADDFILESIGS_FOR_DYLD_SIM:
2119         case F_ADDFILESIGS_RETURN:
2120         case F_ADDFILESIGS_INFO:
2121         {
2122                 struct cs_blob *blob = NULL;
2123                 struct user_fsignatures fs;
2124                 kern_return_t kr;
2125                 vm_offset_t kernel_blob_addr;
2126                 vm_size_t kernel_blob_size;
2127                 int blob_add_flags = 0;
2128                 const size_t sizeof_fs = (uap->cmd == F_ADDFILESIGS_INFO ?
2129                     offsetof(struct user_fsignatures, fs_cdhash /* first output element */) :
2130                     offsetof(struct user_fsignatures, fs_fsignatures_size /* compat */));
2131
2132                 if (fp->f_type != DTYPE_VNODE) {
2133                         error = EBADF;
2134                         goto out;
2135                 }
2136                 vp = (struct vnode *)fp->f_data;
2137                 proc_fdunlock(p);
2138
2139                 if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2140                         blob_add_flags |= MAC_VNODE_CHECK_DYLD_SIM;
2141                         if ((p->p_csflags & CS_KILL) == 0) {
2142                                 proc_lock(p);
2143                                 p->p_csflags |= CS_KILL;
2144                                 proc_unlock(p);
2145                         }
2146                 }
2147
2148                 error = vnode_getwithref(vp);
2149                 if (error) {
2150                         goto outdrop;
2151                 }
2152
2153                 if (IS_64BIT_PROCESS(p)) {
2154                         error = copyin(argp, &fs, sizeof_fs);
2155                 } else {
2156                         if (uap->cmd == F_ADDFILESIGS_INFO) {
2157                                 error = EINVAL;
2158                                 vnode_put(vp);
2159                                 goto outdrop;
2160                         }
2161
2162                         struct user32_fsignatures fs32;
2163
2164                         error = copyin(argp, &fs32, sizeof(fs32));
2165                         fs.fs_file_start = fs32.fs_file_start;
2166                         fs.fs_blob_start = CAST_USER_ADDR_T(fs32.fs_blob_start);
2167                         fs.fs_blob_size = fs32.fs_blob_size;
2168                 }
2169
2170                 if (error) {
2171                         vnode_put(vp);
2172                         goto outdrop;
2173                 }
2174
2175                 /*
2176                  * First check if we have something loaded a this offset
2177                  */
2178                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, fs.fs_file_start);
2179                 if (blob != NULL) {
2180                         /* If this is for dyld_sim revalidate the blob */
2181                         if (uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM) {
2182                                 error = ubc_cs_blob_revalidate(vp, blob, NULL, blob_add_flags, proc_platform(p));
2183                                 if (error) {
2184                                         blob = NULL;
2185                                         if (error != EAGAIN) {
2186                                                 vnode_put(vp);
2187                                                 goto outdrop;
2188                                         }
2189                                 }
2190                         }
2191                 }
2192
2193                 if (blob == NULL) {
2194                         /*
2195                          * An arbitrary limit, to prevent someone from mapping in a 20GB blob.  This should cover
2196                          * our use cases for the immediate future, but note that at the time of this commit, some
2197                          * platforms are nearing 2MB blob sizes (with a prior soft limit of 2.5MB).
2198                          *
2199                          * We should consider how we can manage this more effectively; the above means that some
2200                          * platforms are using megabytes of memory for signing data; it merely hasn't crossed the
2201                          * threshold considered ridiculous at the time of this change.
2202                          */
2203 #define CS_MAX_BLOB_SIZE (40ULL * 1024ULL * 1024ULL)
2204                         if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2205                                 error = E2BIG;
2206                                 vnode_put(vp);
2207                                 goto outdrop;
2208                         }
2209
2210                         kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2211                         kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2212                         if (kr != KERN_SUCCESS || kernel_blob_size < fs.fs_blob_size) {
2213                                 error = ENOMEM;
2214                                 vnode_put(vp);
2215                                 goto outdrop;
2216                         }
2217
2218                         if (uap->cmd == F_ADDSIGS) {
2219                                 error = copyin(fs.fs_blob_start,
2220                                     (void *) kernel_blob_addr,
2221                                     fs.fs_blob_size);
2222                         } else { /* F_ADDFILESIGS || F_ADDFILESIGS_RETURN || F_ADDFILESIGS_FOR_DYLD_SIM || F_ADDFILESIGS_INFO */
2223                                 int resid;
2224
2225                                 error = vn_rdwr(UIO_READ,
2226                                     vp,
2227                                     (caddr_t) kernel_blob_addr,
2228                                     (int)kernel_blob_size,
2229                                     fs.fs_file_start + fs.fs_blob_start,
2230                                     UIO_SYSSPACE,
2231                                     0,
2232                                     kauth_cred_get(),
2233                                     &resid,
2234                                     p);
2235                                 if ((error == 0) && resid) {
2236                                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2237                                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2238                                 }
2239                         }
2240
2241                         if (error) {
2242                                 ubc_cs_blob_deallocate(kernel_blob_addr,
2243                                     kernel_blob_size);
2244                                 vnode_put(vp);
2245                                 goto outdrop;
2246                         }
2247
2248                         blob = NULL;
2249                         error = ubc_cs_blob_add(vp,
2250                             proc_platform(p),
2251                             CPU_TYPE_ANY,                       /* not for a specific architecture */
2252                             CPU_SUBTYPE_ANY,
2253                             fs.fs_file_start,
2254                             &kernel_blob_addr,
2255                             kernel_blob_size,
2256                             NULL,
2257                             blob_add_flags,
2258                             &blob);
2259
2260                         /* ubc_blob_add() has consumed "kernel_blob_addr" if it is zeroed */
2261                         if (error) {
2262                                 if (kernel_blob_addr) {
2263                                         ubc_cs_blob_deallocate(kernel_blob_addr,
2264                                             kernel_blob_size);
2265                                 }
2266                                 vnode_put(vp);
2267                                 goto outdrop;
2268                         } else {
2269 #if CHECK_CS_VALIDATION_BITMAP
2270                                 ubc_cs_validation_bitmap_allocate( vp );
2271 #endif
2272                         }
2273                 }
2274
2275                 if (uap->cmd == F_ADDFILESIGS_RETURN || uap->cmd == F_ADDFILESIGS_FOR_DYLD_SIM ||
2276                     uap->cmd == F_ADDFILESIGS_INFO) {
2277                         /*
2278                          * The first element of the structure is a
2279                          * off_t that happen to have the same size for
2280                          * all archs. Lets overwrite that.
2281                          */
2282                         off_t end_offset = 0;
2283                         if (blob) {
2284                                 end_offset = blob->csb_end_offset;
2285                         }
2286                         error = copyout(&end_offset, argp, sizeof(end_offset));
2287
2288                         if (error) {
2289                                 vnode_put(vp);
2290                                 goto outdrop;
2291                         }
2292                 }
2293
2294                 if (uap->cmd == F_ADDFILESIGS_INFO) {
2295                         /* Return information. What we copy out depends on the size of the
2296                          * passed in structure, to keep binary compatibility. */
2297
2298                         if (fs.fs_fsignatures_size >= sizeof(struct user_fsignatures)) {
2299                                 // enough room for fs_cdhash[20]+fs_hash_type
2300
2301                                 if (blob != NULL) {
2302                                         error = copyout(blob->csb_cdhash,
2303                                             (vm_address_t)argp + offsetof(struct user_fsignatures, fs_cdhash),
2304                                             USER_FSIGNATURES_CDHASH_LEN);
2305                                         if (error) {
2306                                                 vnode_put(vp);
2307                                                 goto outdrop;
2308                                         }
2309                                         int hashtype = cs_hash_type(blob->csb_hashtype);
2310                                         error = copyout(&hashtype,
2311                                             (vm_address_t)argp + offsetof(struct user_fsignatures, fs_hash_type),
2312                                             sizeof(int));
2313                                         if (error) {
2314                                                 vnode_put(vp);
2315                                                 goto outdrop;
2316                                         }
2317                                 }
2318                         }
2319                 }
2320
2321                 (void) vnode_put(vp);
2322                 break;
2323         }
2324 #if CONFIG_SUPPLEMENTAL_SIGNATURES
2325         case F_ADDFILESUPPL:
2326         {
2327                 struct vnode *ivp;
2328                 struct cs_blob *blob = NULL;
2329                 struct user_fsupplement fs;
2330                 int orig_fd;
2331                 struct fileproc* orig_fp = NULL;
2332                 kern_return_t kr;
2333                 vm_offset_t kernel_blob_addr;
2334                 vm_size_t kernel_blob_size;
2335
2336                 if (!IS_64BIT_PROCESS(p)) {
2337                         error = EINVAL;
2338                         goto out; // drop fp and unlock fds
2339                 }
2340
2341                 if (fp->f_type != DTYPE_VNODE) {
2342                         error = EBADF;
2343                         goto out;
2344                 }
2345
2346                 error = copyin(argp, &fs, sizeof(fs));
2347                 if (error) {
2348                         goto out;
2349                 }
2350
2351                 orig_fd = fs.fs_orig_fd;
2352                 if ((error = fp_lookup(p, orig_fd, &orig_fp, 1))) {
2353                         printf("CODE SIGNING: Failed to find original file for supplemental signature attachment\n");
2354                         goto out;
2355                 }
2356
2357                 if (orig_fp->f_type != DTYPE_VNODE) {
2358                         error = EBADF;
2359                         fp_drop(p, orig_fd, orig_fp, 1);
2360                         goto out;
2361                 }
2362
2363                 ivp = (struct vnode *)orig_fp->f_data;
2364
2365                 vp = (struct vnode *)fp->f_data;
2366
2367                 proc_fdunlock(p);
2368
2369                 error = vnode_getwithref(ivp);
2370                 if (error) {
2371                         fp_drop(p, orig_fd, orig_fp, 0);
2372                         goto outdrop; //drop fp
2373                 }
2374
2375                 error = vnode_getwithref(vp);
2376                 if (error) {
2377                         vnode_put(ivp);
2378                         fp_drop(p, orig_fd, orig_fp, 0);
2379                         goto outdrop;
2380                 }
2381
2382                 if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
2383                         error = E2BIG;
2384                         goto dropboth; // drop iocounts on vp and ivp, drop orig_fp then drop fp via outdrop
2385                 }
2386
2387                 kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
2388                 kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
2389                 if (kr != KERN_SUCCESS) {
2390                         error = ENOMEM;
2391                         goto dropboth;
2392                 }
2393
2394                 int resid;
2395                 error = vn_rdwr(UIO_READ, vp,
2396                     (caddr_t)kernel_blob_addr, (int)kernel_blob_size,
2397                     fs.fs_file_start + fs.fs_blob_start,
2398                     UIO_SYSSPACE, 0,
2399                     kauth_cred_get(), &resid, p);
2400                 if ((error == 0) && resid) {
2401                         /* kernel_blob_size rounded to a page size, but signature may be at end of file */
2402                         memset((void *)(kernel_blob_addr + (kernel_blob_size - resid)), 0x0, resid);
2403                 }
2404
2405                 if (error) {
2406                         ubc_cs_blob_deallocate(kernel_blob_addr,
2407                             kernel_blob_size);
2408                         goto dropboth;
2409                 }
2410
2411                 error = ubc_cs_blob_add_supplement(vp, ivp, fs.fs_file_start,
2412                     &kernel_blob_addr, kernel_blob_size, &blob);
2413
2414                 /* ubc_blob_add_supplement() has consumed kernel_blob_addr if it is zeroed */
2415                 if (error) {
2416                         if (kernel_blob_addr) {
2417                                 ubc_cs_blob_deallocate(kernel_blob_addr,
2418                                     kernel_blob_size);
2419                         }
2420                         goto dropboth;
2421                 }
2422                 vnode_put(ivp);
2423                 vnode_put(vp);
2424                 fp_drop(p, orig_fd, orig_fp, 0);
2425                 break;
2426
2427 dropboth:
2428                 vnode_put(ivp);
2429                 vnode_put(vp);
2430                 fp_drop(p, orig_fd, orig_fp, 0);
2431                 goto outdrop;
2432         }
2433 #endif
2434         case F_GETCODEDIR:
2435         case F_FINDSIGS: {
2436                 error = ENOTSUP;
2437                 goto out;
2438         }
2439         case F_CHECK_LV: {
2440                 struct fileglob *fg;
2441                 fchecklv_t lv = {};
2442
2443                 if (fp->f_type != DTYPE_VNODE) {
2444                         error = EBADF;
2445                         goto out;
2446                 }
2447                 fg = fp->fp_glob;
2448                 proc_fdunlock(p);
2449
2450                 if (IS_64BIT_PROCESS(p)) {
2451                         error = copyin(argp, &lv, sizeof(lv));
2452                 } else {
2453                         struct user32_fchecklv lv32 = {};
2454
2455                         error = copyin(argp, &lv32, sizeof(lv32));
2456                         lv.lv_file_start = lv32.lv_file_start;
2457                         lv.lv_error_message = (void *)(uintptr_t)lv32.lv_error_message;
2458                         lv.lv_error_message_size = lv32.lv_error_message_size;
2459                 }
2460                 if (error) {
2461                         goto outdrop;
2462                 }
2463
2464 #if CONFIG_MACF
2465                 error = mac_file_check_library_validation(p, fg, lv.lv_file_start,
2466                     (user_long_t)lv.lv_error_message, lv.lv_error_message_size);
2467 #endif
2468
2469                 break;
2470         }
2471         case F_GETSIGSINFO: {
2472                 struct cs_blob *blob = NULL;
2473                 fgetsigsinfo_t sigsinfo = {};
2474
2475                 if (fp->f_type != DTYPE_VNODE) {
2476                         error = EBADF;
2477                         goto out;
2478                 }
2479                 vp = (struct vnode *)fp->f_data;
2480                 proc_fdunlock(p);
2481
2482                 error = vnode_getwithref(vp);
2483                 if (error) {
2484                         goto outdrop;
2485                 }
2486
2487                 error = copyin(argp, &sigsinfo, sizeof(sigsinfo));
2488                 if (error) {
2489                         vnode_put(vp);
2490                         goto outdrop;
2491                 }
2492
2493                 blob = ubc_cs_blob_get(vp, CPU_TYPE_ANY, CPU_SUBTYPE_ANY, sigsinfo.fg_file_start);
2494                 if (blob == NULL) {
2495                         error = ENOENT;
2496                         vnode_put(vp);
2497                         goto outdrop;
2498                 }
2499                 switch (sigsinfo.fg_info_request) {
2500                 case GETSIGSINFO_PLATFORM_BINARY:
2501                         sigsinfo.fg_sig_is_platform = blob->csb_platform_binary;
2502                         error = copyout(&sigsinfo.fg_sig_is_platform,
2503                             (vm_address_t)argp + offsetof(struct fgetsigsinfo, fg_sig_is_platform),
2504                             sizeof(sigsinfo.fg_sig_is_platform));
2505                         if (error) {
2506                                 vnode_put(vp);
2507                                 goto outdrop;
2508                         }
2509                         break;
2510                 default:
2511                         error = EINVAL;
2512                         vnode_put(vp);
2513                         goto outdrop;
2514                 }
2515                 vnode_put(vp);
2516                 break;
2517         }
2518 #if CONFIG_PROTECT
2519         case F_GETPROTECTIONCLASS: {
2520                 if (fp->f_type != DTYPE_VNODE) {
2521                         error = EBADF;
2522                         goto out;
2523                 }
2524                 vp = (struct vnode *)fp->f_data;
2525
2526                 proc_fdunlock(p);
2527
2528                 if (vnode_getwithref(vp)) {
2529                         error = ENOENT;
2530                         goto outdrop;
2531                 }
2532
2533                 struct vnode_attr va;
2534
2535                 VATTR_INIT(&va);
2536                 VATTR_WANTED(&va, va_dataprotect_class);
2537                 error = VNOP_GETATTR(vp, &va, &context);
2538                 if (!error) {
2539                         if (VATTR_IS_SUPPORTED(&va, va_dataprotect_class)) {
2540                                 *retval = va.va_dataprotect_class;
2541                         } else {
2542                                 error = ENOTSUP;
2543                         }
2544                 }
2545
2546                 vnode_put(vp);
2547                 break;
2548         }
2549
2550         case F_SETPROTECTIONCLASS: {
2551                 /* tmp must be a valid PROTECTION_CLASS_* */
2552                 tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2553
2554                 if (fp->f_type != DTYPE_VNODE) {
2555                         error = EBADF;
2556                         goto out;
2557                 }
2558                 vp = (struct vnode *)fp->f_data;
2559
2560                 proc_fdunlock(p);
2561
2562                 if (vnode_getwithref(vp)) {
2563                         error = ENOENT;
2564                         goto outdrop;
2565                 }
2566
2567                 /* Only go forward if you have write access */
2568                 vfs_context_t ctx = vfs_context_current();
2569                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2570                         vnode_put(vp);
2571                         error = EBADF;
2572                         goto outdrop;
2573                 }
2574
2575                 struct vnode_attr va;
2576
2577                 VATTR_INIT(&va);
2578                 VATTR_SET(&va, va_dataprotect_class, tmp);
2579
2580                 error = VNOP_SETATTR(vp, &va, ctx);
2581
2582                 vnode_put(vp);
2583                 break;
2584         }
2585
2586         case F_TRANSCODEKEY: {
2587                 if (fp->f_type != DTYPE_VNODE) {
2588                         error = EBADF;
2589                         goto out;
2590                 }
2591
2592                 vp = (struct vnode *)fp->f_data;
2593                 proc_fdunlock(p);
2594
2595                 if (vnode_getwithref(vp)) {
2596                         error = ENOENT;
2597                         goto outdrop;
2598                 }
2599
2600                 cp_key_t k = {
2601                         .len = CP_MAX_WRAPPEDKEYSIZE,
2602                 };
2603
2604                 MALLOC(k.key, char *, k.len, M_TEMP, M_WAITOK | M_ZERO);
2605
2606                 error = VNOP_IOCTL(vp, F_TRANSCODEKEY, (caddr_t)&k, 1, &context);
2607
2608                 vnode_put(vp);
2609
2610                 if (error == 0) {
2611                         error = copyout(k.key, argp, k.len);
2612                         *retval = k.len;
2613                 }
2614
2615                 FREE(k.key, M_TEMP);
2616
2617                 break;
2618         }
2619
2620         case F_GETPROTECTIONLEVEL:  {
2621                 if (fp->f_type != DTYPE_VNODE) {
2622                         error = EBADF;
2623                         goto out;
2624                 }
2625
2626                 vp = (struct vnode*) fp->f_data;
2627                 proc_fdunlock(p);
2628
2629                 if (vnode_getwithref(vp)) {
2630                         error = ENOENT;
2631                         goto outdrop;
2632                 }
2633
2634                 error = VNOP_IOCTL(vp, F_GETPROTECTIONLEVEL, (caddr_t)retval, 0, &context);
2635
2636                 vnode_put(vp);
2637                 break;
2638         }
2639
2640         case F_GETDEFAULTPROTLEVEL:  {
2641                 if (fp->f_type != DTYPE_VNODE) {
2642                         error = EBADF;
2643                         goto out;
2644                 }
2645
2646                 vp = (struct vnode*) fp->f_data;
2647                 proc_fdunlock(p);
2648
2649                 if (vnode_getwithref(vp)) {
2650                         error = ENOENT;
2651                         goto outdrop;
2652                 }
2653
2654                 /*
2655                  * if cp_get_major_vers fails, error will be set to proper errno
2656                  * and cp_version will still be 0.
2657                  */
2658
2659                 error = VNOP_IOCTL(vp, F_GETDEFAULTPROTLEVEL, (caddr_t)retval, 0, &context);
2660
2661                 vnode_put(vp);
2662                 break;
2663         }
2664
2665 #endif /* CONFIG_PROTECT */
2666
2667         case F_MOVEDATAEXTENTS: {
2668                 struct fileproc *fp2 = NULL;
2669                 struct vnode *src_vp = NULLVP;
2670                 struct vnode *dst_vp = NULLVP;
2671                 /* We need to grab the 2nd FD out of the argments before moving on. */
2672                 int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
2673
2674                 error = priv_check_cred(kauth_cred_get(), PRIV_VFS_MOVE_DATA_EXTENTS, 0);
2675                 if (error) {
2676                         goto out;
2677                 }
2678
2679                 if (fp->f_type != DTYPE_VNODE) {
2680                         error = EBADF;
2681                         goto out;
2682                 }
2683
2684                 /*
2685                  * For now, special case HFS+ and APFS only, since this
2686                  * is SPI.
2687                  */
2688                 src_vp = (struct vnode *)fp->f_data;
2689                 if (src_vp->v_tag != VT_HFS && src_vp->v_tag != VT_APFS) {
2690                         error = ENOTSUP;
2691                         goto out;
2692                 }
2693
2694                 /*
2695                  * Get the references before we start acquiring iocounts on the vnodes,
2696                  * while we still hold the proc fd lock
2697                  */
2698                 if ((error = fp_lookup(p, fd2, &fp2, 1))) {
2699                         error = EBADF;
2700                         goto out;
2701                 }
2702                 if (fp2->f_type != DTYPE_VNODE) {
2703                         fp_drop(p, fd2, fp2, 1);
2704                         error = EBADF;
2705                         goto out;
2706                 }
2707                 dst_vp = (struct vnode *)fp2->f_data;
2708                 if (dst_vp->v_tag != VT_HFS && dst_vp->v_tag != VT_APFS) {
2709                         fp_drop(p, fd2, fp2, 1);
2710                         error = ENOTSUP;
2711                         goto out;
2712                 }
2713
2714 #if CONFIG_MACF
2715                 /* Re-do MAC checks against the new FD, pass in a fake argument */
2716                 error = mac_file_check_fcntl(proc_ucred(p), fp2->fp_glob, uap->cmd, 0);
2717                 if (error) {
2718                         fp_drop(p, fd2, fp2, 1);
2719                         goto out;
2720                 }
2721 #endif
2722                 /* Audit the 2nd FD */
2723                 AUDIT_ARG(fd, fd2);
2724
2725                 proc_fdunlock(p);
2726
2727                 if (vnode_getwithref(src_vp)) {
2728                         fp_drop(p, fd2, fp2, 0);
2729                         error = ENOENT;
2730                         goto outdrop;
2731                 }
2732                 if (vnode_getwithref(dst_vp)) {
2733                         vnode_put(src_vp);
2734                         fp_drop(p, fd2, fp2, 0);
2735                         error = ENOENT;
2736                         goto outdrop;
2737                 }
2738
2739                 /*
2740                  * Basic asserts; validate they are not the same and that
2741                  * both live on the same filesystem.
2742                  */
2743                 if (dst_vp == src_vp) {
2744                         vnode_put(src_vp);
2745                         vnode_put(dst_vp);
2746                         fp_drop(p, fd2, fp2, 0);
2747                         error = EINVAL;
2748                         goto outdrop;
2749                 }
2750
2751                 if (dst_vp->v_mount != src_vp->v_mount) {
2752                         vnode_put(src_vp);
2753                         vnode_put(dst_vp);
2754                         fp_drop(p, fd2, fp2, 0);
2755                         error = EXDEV;
2756                         goto outdrop;
2757                 }
2758
2759                 /* Now we have a legit pair of FDs.  Go to work */
2760
2761                 /* Now check for write access to the target files */
2762                 if (vnode_authorize(src_vp, NULLVP,
2763                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2764                         vnode_put(src_vp);
2765                         vnode_put(dst_vp);
2766                         fp_drop(p, fd2, fp2, 0);
2767                         error = EBADF;
2768                         goto outdrop;
2769                 }
2770
2771                 if (vnode_authorize(dst_vp, NULLVP,
2772                     (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
2773                         vnode_put(src_vp);
2774                         vnode_put(dst_vp);
2775                         fp_drop(p, fd2, fp2, 0);
2776                         error = EBADF;
2777                         goto outdrop;
2778                 }
2779
2780                 /* Verify that both vps point to files and not directories */
2781                 if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
2782                         error = EINVAL;
2783                         vnode_put(src_vp);
2784                         vnode_put(dst_vp);
2785                         fp_drop(p, fd2, fp2, 0);
2786                         goto outdrop;
2787                 }
2788
2789                 /*
2790                  * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
2791                  * We'll pass in our special bit indicating that the new behavior is expected
2792                  */
2793
2794                 error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
2795
2796                 vnode_put(src_vp);
2797                 vnode_put(dst_vp);
2798                 fp_drop(p, fd2, fp2, 0);
2799                 break;
2800         }
2801
2802         /*
2803          * SPI for making a file compressed.
2804          */
2805         case F_MAKECOMPRESSED: {
2806                 uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
2807
2808                 if (fp->f_type != DTYPE_VNODE) {
2809                         error = EBADF;
2810                         goto out;
2811                 }
2812
2813                 vp = (struct vnode*) fp->f_data;
2814                 proc_fdunlock(p);
2815
2816                 /* get the vnode */
2817                 if (vnode_getwithref(vp)) {
2818                         error = ENOENT;
2819                         goto outdrop;
2820                 }
2821
2822                 /* Is it a file? */
2823                 if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) {
2824                         vnode_put(vp);
2825                         error = EBADF;
2826                         goto outdrop;
2827                 }
2828
2829                 /* invoke ioctl to pass off to FS */
2830                 /* Only go forward if you have write access */
2831                 vfs_context_t ctx = vfs_context_current();
2832                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2833                         vnode_put(vp);
2834                         error = EBADF;
2835                         goto outdrop;
2836                 }
2837
2838                 error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context);
2839
2840                 vnode_put(vp);
2841                 break;
2842         }
2843
2844         /*
2845          * SPI (private) for indicating to a filesystem that subsequent writes to
2846          * the open FD will written to the Fastflow.
2847          */
2848         case F_SET_GREEDY_MODE:
2849         /* intentionally drop through to the same handler as F_SETSTATIC.
2850          * both fcntls should pass the argument and their selector into VNOP_IOCTL.
2851          */
2852
2853         /*
2854          * SPI (private) for indicating to a filesystem that subsequent writes to
2855          * the open FD will represent static content.
2856          */
2857         case F_SETSTATICCONTENT: {
2858                 caddr_t ioctl_arg = NULL;
2859
2860                 if (uap->arg) {
2861                         ioctl_arg = (caddr_t) 1;
2862                 }
2863
2864                 if (fp->f_type != DTYPE_VNODE) {
2865                         error = EBADF;
2866                         goto out;
2867                 }
2868                 vp = (struct vnode *)fp->f_data;
2869                 proc_fdunlock(p);
2870
2871                 error = vnode_getwithref(vp);
2872                 if (error) {
2873                         error = ENOENT;
2874                         goto outdrop;
2875                 }
2876
2877                 /* Only go forward if you have write access */
2878                 vfs_context_t ctx = vfs_context_current();
2879                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2880                         vnode_put(vp);
2881                         error = EBADF;
2882                         goto outdrop;
2883                 }
2884
2885                 error = VNOP_IOCTL(vp, uap->cmd, ioctl_arg, 0, &context);
2886                 (void)vnode_put(vp);
2887
2888                 break;
2889         }
2890
2891         /*
2892          * SPI (private) for indicating to the lower level storage driver that the
2893          * subsequent writes should be of a particular IO type (burst, greedy, static),
2894          * or other flavors that may be necessary.
2895          */
2896         case F_SETIOTYPE: {
2897                 caddr_t param_ptr;
2898                 uint32_t param;
2899
2900                 if (uap->arg) {
2901                         /* extract 32 bits of flags from userland */
2902                         param_ptr = (caddr_t) uap->arg;
2903                         param = (uint32_t) param_ptr;
2904                 } else {
2905                         /* If no argument is specified, error out */
2906                         error = EINVAL;
2907                         goto out;
2908                 }
2909
2910                 /*
2911                  * Validate the different types of flags that can be specified:
2912                  * all of them are mutually exclusive for now.
2913                  */
2914                 switch (param) {
2915                 case F_IOTYPE_ISOCHRONOUS:
2916                         break;
2917
2918                 default:
2919                         error = EINVAL;
2920                         goto out;
2921                 }
2922
2923
2924                 if (fp->f_type != DTYPE_VNODE) {
2925                         error = EBADF;
2926                         goto out;
2927                 }
2928                 vp = (struct vnode *)fp->f_data;
2929                 proc_fdunlock(p);
2930
2931                 error = vnode_getwithref(vp);
2932                 if (error) {
2933                         error = ENOENT;
2934                         goto outdrop;
2935                 }
2936
2937                 /* Only go forward if you have write access */
2938                 vfs_context_t ctx = vfs_context_current();
2939                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2940                         vnode_put(vp);
2941                         error = EBADF;
2942                         goto outdrop;
2943                 }
2944
2945                 error = VNOP_IOCTL(vp, uap->cmd, param_ptr, 0, &context);
2946                 (void)vnode_put(vp);
2947
2948                 break;
2949         }
2950
2951         /*
2952          * Set the vnode pointed to by 'fd'
2953          * and tag it as the (potentially future) backing store
2954          * for another filesystem
2955          */
2956         case F_SETBACKINGSTORE: {
2957                 if (fp->f_type != DTYPE_VNODE) {
2958                         error = EBADF;
2959                         goto out;
2960                 }
2961
2962                 vp = (struct vnode *)fp->f_data;
2963
2964                 if (vp->v_tag != VT_HFS) {
2965                         error = EINVAL;
2966                         goto out;
2967                 }
2968                 proc_fdunlock(p);
2969
2970                 if (vnode_getwithref(vp)) {
2971                         error = ENOENT;
2972                         goto outdrop;
2973                 }
2974
2975                 /* only proceed if you have write access */
2976                 vfs_context_t ctx = vfs_context_current();
2977                 if (vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
2978                         vnode_put(vp);
2979                         error = EBADF;
2980                         goto outdrop;
2981                 }
2982
2983
2984                 /* If arg != 0, set, otherwise unset */
2985                 if (uap->arg) {
2986                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)1, 0, &context);
2987                 } else {
2988                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context);
2989                 }
2990
2991                 vnode_put(vp);
2992                 break;
2993         }
2994
2995         /*
2996          * like F_GETPATH, but special semantics for
2997          * the mobile time machine handler.
2998          */
2999         case F_GETPATH_MTMINFO: {
3000                 char *pathbufp;
3001                 int pathlen;
3002
3003                 if (fp->f_type != DTYPE_VNODE) {
3004                         error = EBADF;
3005                         goto out;
3006                 }
3007                 vp = (struct vnode *)fp->f_data;
3008                 proc_fdunlock(p);
3009
3010                 pathlen = MAXPATHLEN;
3011                 MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
3012                 if (pathbufp == NULL) {
3013                         error = ENOMEM;
3014                         goto outdrop;
3015                 }
3016                 if ((error = vnode_getwithref(vp)) == 0) {
3017                         int backingstore = 0;
3018
3019                         /* Check for error from vn_getpath before moving on */
3020                         if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
3021                                 if (vp->v_tag == VT_HFS) {
3022                                         error = VNOP_IOCTL(vp, uap->cmd, (caddr_t) &backingstore, 0, &context);
3023                                 }
3024                                 (void)vnode_put(vp);
3025
3026                                 if (error == 0) {
3027                                         error = copyout((caddr_t)pathbufp, argp, pathlen);
3028                                 }
3029                                 if (error == 0) {
3030                                         /*
3031                                          * If the copyout was successful, now check to ensure
3032                                          * that this vnode is not a BACKINGSTORE vnode.  mtmd
3033                                          * wants the path regardless.
3034                                          */
3035                                         if (backingstore) {
3036                                                 error = EBUSY;
3037                                         }
3038                                 }
3039                         } else {
3040                                 (void)vnode_put(vp);
3041                         }
3042                 }
3043                 FREE(pathbufp, M_TEMP);
3044                 goto outdrop;
3045         }
3046
3047 #if DEBUG || DEVELOPMENT
3048         case F_RECYCLE:
3049                 if (fp->f_type != DTYPE_VNODE) {
3050                         error = EBADF;
3051                         goto out;
3052                 }
3053                 vp = (struct vnode *)fp->f_data;
3054                 proc_fdunlock(p);
3055
3056                 vnode_recycle(vp);
3057                 break;
3058 #endif
3059
3060         default:
3061                 /*
3062                  * This is an fcntl() that we d not recognize at this level;
3063                  * if this is a vnode, we send it down into the VNOP_IOCTL
3064                  * for this vnode; this can include special devices, and will
3065                  * effectively overload fcntl() to send ioctl()'s.
3066                  */
3067                 if ((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)) {
3068                         error = EINVAL;
3069                         goto out;
3070                 }
3071
3072                 /* Catch any now-invalid fcntl() selectors */
3073                 switch (uap->cmd) {
3074                 case (int)APFSIOC_REVERT_TO_SNAPSHOT:
3075                 case (int)FSIOC_FIOSEEKHOLE:
3076                 case (int)FSIOC_FIOSEEKDATA:
3077                 case (int)FSIOC_CAS_BSDFLAGS:
3078                 case HFS_GET_BOOT_INFO:
3079                 case HFS_SET_BOOT_INFO:
3080                 case FIOPINSWAP:
3081                 case F_MARKDEPENDENCY:
3082                 case TIOCREVOKE:
3083                 case TIOCREVOKECLEAR:
3084                         error = EINVAL;
3085                         goto out;
3086                 default:
3087                         break;
3088                 }
3089
3090                 if (fp->f_type != DTYPE_VNODE) {
3091                         error = EBADF;
3092                         goto out;
3093                 }
3094                 vp = (struct vnode *)fp->f_data;
3095                 proc_fdunlock(p);
3096
3097                 if ((error = vnode_getwithref(vp)) == 0) {
3098 #define STK_PARAMS 128
3099                         char stkbuf[STK_PARAMS] = {0};
3100                         unsigned int size;
3101                         caddr_t data, memp;
3102                         /*
3103                          * For this to work properly, we have to copy in the
3104                          * ioctl() cmd argument if there is one; we must also
3105                          * check that a command parameter, if present, does
3106                          * not exceed the maximum command length dictated by
3107                          * the number of bits we have available in the command
3108                          * to represent a structure length.  Finally, we have
3109                          * to copy the results back out, if it is that type of
3110                          * ioctl().
3111                          */
3112                         size = IOCPARM_LEN(uap->cmd);
3113                         if (size > IOCPARM_MAX) {
3114                                 (void)vnode_put(vp);
3115                                 error = EINVAL;
3116                                 break;
3117                         }
3118
3119                         memp = NULL;
3120                         if (size > sizeof(stkbuf)) {
3121                                 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
3122                                 if (memp == 0) {
3123                                         (void)vnode_put(vp);
3124                                         error = ENOMEM;
3125                                         goto outdrop;
3126                                 }
3127                                 data = memp;
3128                         } else {
3129                                 data = &stkbuf[0];
3130                         }
3131
3132                         if (uap->cmd & IOC_IN) {
3133                                 if (size) {
3134                                         /* structure */
3135                                         error = copyin(argp, data, size);
3136                                         if (error) {
3137                                                 (void)vnode_put(vp);
3138                                                 if (memp) {
3139                                                         kheap_free(KHEAP_TEMP, memp, size);
3140                                                 }
3141                                                 goto outdrop;
3142                                         }
3143
3144                                         /* Bzero the section beyond that which was needed */
3145                                         if (size <= sizeof(stkbuf)) {
3146                                                 bzero((((uint8_t*)data) + size), (sizeof(stkbuf) - size));
3147                                         }
3148                                 } else {
3149                                         /* int */
3150                                         if (is64bit) {
3151                                                 *(user_addr_t *)data = argp;
3152                                         } else {
3153                                                 *(uint32_t *)data = (uint32_t)argp;
3154                                         }
3155                                 };
3156                         } else if ((uap->cmd & IOC_OUT) && size) {
3157                                 /*
3158                                  * Zero the buffer so the user always
3159                                  * gets back something deterministic.
3160                                  */
3161                                 bzero(data, size);
3162                         } else if (uap->cmd & IOC_VOID) {
3163                                 if (is64bit) {
3164                                         *(user_addr_t *)data = argp;
3165                                 } else {
3166                                         *(uint32_t *)data = (uint32_t)argp;
3167                                 }
3168                         }
3169
3170                         error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
3171
3172                         (void)vnode_put(vp);
3173
3174                         /* Copy any output data to user */
3175                         if (error == 0 && (uap->cmd & IOC_OUT) && size) {
3176                                 error = copyout(data, argp, size);
3177                         }
3178                         if (memp) {
3179                                 kheap_free(KHEAP_TEMP, memp, size);
3180                         }
3181                 }
3182                 break;
3183         }
3184
3185 outdrop:
3186         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3187         fp_drop(p, fd, fp, 0);
3188         return error;
3189 out:
3190         fp_drop(p, fd, fp, 1);
3191         proc_fdunlock(p);
3192         return error;
3193 }
3194
3195
3196 /*
3197  * finishdup
3198  *
3199  * Description: Common code for dup, dup2, and fcntl(F_DUPFD).
3200  *
3201  * Parameters:  p                               Process performing the dup
3202  *              old                             The fd to dup
3203  *              new                             The fd to dup it to
3204  *              fd_flags                        Flags to augment the new fd
3205  *              retval                          Pointer to the call return area
3206  *
3207  * Returns:     0                               Success
3208  *              EBADF
3209  *              ENOMEM
3210  *
3211  * Implicit returns:
3212  *              *retval (modified)              The new descriptor
3213  *
3214  * Locks:       Assumes proc_fdlock for process pointing to fdp is held by
3215  *              the caller
3216  *
3217  * Notes:       This function may drop and reacquire this lock; it is unsafe
3218  *              for a caller to assume that other state protected by the lock
3219  *              has not been subsequently changed out from under it.
3220  */
3221 int
3222 finishdup(proc_t p,
3223     struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
3224 {
3225         struct fileproc *nfp;
3226         struct fileproc *ofp;
3227 #if CONFIG_MACF
3228         int error;
3229 #endif
3230
3231 #if DIAGNOSTIC
3232         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3233 #endif
3234         if ((ofp = fdp->fd_ofiles[old]) == NULL ||
3235             (fdp->fd_ofileflags[old] & UF_RESERVED)) {
3236                 fdrelse(p, new);
3237                 return EBADF;
3238         }
3239
3240 #if CONFIG_MACF
3241         error = mac_file_check_dup(proc_ucred(p), ofp->fp_glob, new);
3242         if (error) {
3243                 fdrelse(p, new);
3244                 return error;
3245         }
3246 #endif
3247
3248         proc_fdunlock(p);
3249
3250         nfp = fileproc_alloc_init(NULL);
3251
3252         proc_fdlock(p);
3253
3254         if (nfp == NULL) {
3255                 fdrelse(p, new);
3256                 return ENOMEM;
3257         }
3258
3259         fg_ref(ofp->fp_glob);
3260         nfp->fp_glob = ofp->fp_glob;
3261
3262 #if DIAGNOSTIC
3263         if (fdp->fd_ofiles[new] != 0) {
3264                 panic("finishdup: overwriting fd_ofiles with new %d", new);
3265         }
3266         if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0) {
3267                 panic("finishdup: unreserved fileflags with new %d", new);
3268         }
3269 #endif
3270
3271         if (new > fdp->fd_lastfile) {
3272                 fdp->fd_lastfile = new;
3273         }
3274         *fdflags(p, new) |= fd_flags;
3275         procfdtbl_releasefd(p, new, nfp);
3276         *retval = new;
3277         return 0;
3278 }
3279
3280
3281 /*
3282  * sys_close
3283  *
3284  * Description: The implementation of the close(2) system call
3285  *
3286  * Parameters:  p                       Process in whose per process file table
3287  *                                      the close is to occur
3288  *              uap->fd                 fd to be closed
3289  *              retval                  <unused>
3290  *
3291  * Returns:     0                       Success
3292  *      fp_lookup:EBADF                 Bad file descriptor
3293  *      fp_guard_exception:???          Guarded file descriptor
3294  *      close_internal:EBADF
3295  *      close_internal:???              Anything returnable by a per-fileops
3296  *                                      close function
3297  */
3298 int
3299 sys_close(proc_t p, struct close_args *uap, __unused int32_t *retval)
3300 {
3301         __pthread_testcancel(1);
3302         return close_nocancel(p, uap->fd);
3303 }
3304
3305 int
3306 sys_close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval)
3307 {
3308         return close_nocancel(p, uap->fd);
3309 }
3310
3311 int
3312 close_nocancel(proc_t p, int fd)
3313 {
3314         struct fileproc *fp;
3315
3316         AUDIT_SYSCLOSE(p, fd);
3317
3318         proc_fdlock(p);
3319         if ((fp = fp_get_noref_locked(p, fd)) == NULL) {
3320                 proc_fdunlock(p);
3321                 return EBADF;
3322         }
3323
3324         if (FP_ISGUARDED(fp, GUARD_CLOSE)) {
3325                 int error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE);
3326                 proc_fdunlock(p);
3327                 return error;
3328         }
3329
3330         return fp_close_and_unlock(p, fd, fp, 0);
3331 }
3332
3333
3334 int
3335 fp_close_and_unlock(proc_t p, int fd, struct fileproc *fp, int flags)
3336 {
3337         struct filedesc *fdp = p->p_fd;
3338         struct fileglob *fg = fp->fp_glob;
3339
3340 #if DIAGNOSTIC
3341         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3342 #endif
3343
3344         /*
3345          * Keep most people from finding the filedesc while we are closing it.
3346          *
3347          * Callers are:
3348          *
3349          * - dup2() which always waits for UF_RESERVED to clear
3350          *
3351          * - close/guarded_close/... who will fail the fileproc lookup if
3352          *   UF_RESERVED is set,
3353          *
3354          * - fdexec()/fdfree() who only run once all threads in the proc
3355          *   are properly canceled, hence no fileproc in this proc should
3356          *   be in flux.
3357          *
3358          * Which means that neither UF_RESERVED nor UF_CLOSING should be set.
3359          *
3360          * Callers of fp_get_noref_locked_with_iocount() can still find
3361          * this entry so that they can drop their I/O reference despite
3362          * not having remembered the fileproc pointer (namely select() and
3363          * file_drop()).
3364          */
3365         if (p->p_fd->fd_ofileflags[fd] & (UF_RESERVED | UF_CLOSING)) {
3366                 panic("%s: called with fileproc in flux (%d/:%p)",
3367                     __func__, fd, fp);
3368         }
3369         p->p_fd->fd_ofileflags[fd] |= (UF_RESERVED | UF_CLOSING);
3370
3371         if ((fp->fp_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners()) {
3372                 proc_fdunlock(p);
3373
3374                 if ((FILEGLOB_DTYPE(fg) == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners()) {
3375                         /*
3376                          * call out to allow 3rd party notification of close.
3377                          * Ignore result of kauth_authorize_fileop call.
3378                          */
3379                         if (vnode_getwithref((vnode_t)fg->fg_data) == 0) {
3380                                 u_int   fileop_flags = 0;
3381                                 if (fg->fg_flag & FWASWRITTEN) {
3382                                         fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED;
3383                                 }
3384                                 kauth_authorize_fileop(fg->fg_cred, KAUTH_FILEOP_CLOSE,
3385                                     (uintptr_t)fg->fg_data, (uintptr_t)fileop_flags);
3386 #if CONFIG_MACF
3387                                 mac_file_notify_close(proc_ucred(p), fp->fp_glob);
3388 #endif
3389                                 vnode_put((vnode_t)fg->fg_data);
3390                         }
3391                 }
3392                 if (fp->fp_flags & FP_AIOISSUED) {
3393                         /*
3394                          * cancel all async IO requests that can be cancelled.
3395                          */
3396                         _aio_close( p, fd );
3397                 }
3398
3399                 proc_fdlock(p);
3400         }
3401
3402         if (fd < fdp->fd_knlistsize) {
3403                 knote_fdclose(p, fd);
3404         }
3405
3406         fileproc_drain(p, fp);
3407
3408         if (flags & FD_DUP2RESV) {
3409                 fdp->fd_ofiles[fd] = NULL;
3410                 fdp->fd_ofileflags[fd] &= ~(UF_CLOSING | UF_EXCLOSE | UF_FORKCLOSE);
3411         } else {
3412                 fdrelse(p, fd);
3413         }
3414
3415         proc_fdunlock(p);
3416
3417         if (ENTR_SHOULDTRACE && FILEGLOB_DTYPE(fg) == DTYPE_SOCKET) {
3418                 KERNEL_ENERGYTRACE(kEnTrActKernSocket, DBG_FUNC_END,
3419                     fd, 0, (int64_t)VM_KERNEL_ADDRPERM(fg->fg_data));
3420         }
3421
3422         fileproc_free(fp);
3423
3424         return fg_drop(p, fg);
3425 }
3426
3427
3428 /*
3429  * fstat
3430  *
3431  * Description: Return status information about a file descriptor.
3432  *
3433  * Parameters:  p                               The process doing the fstat
3434  *              fd                              The fd to stat
3435  *              ub                              The user stat buffer
3436  *              xsecurity                       The user extended security
3437  *                                              buffer, or 0 if none
3438  *              xsecurity_size                  The size of xsecurity, or 0
3439  *                                              if no xsecurity
3440  *              isstat64                        Flag to indicate 64 bit version
3441  *                                              for inode size, etc.
3442  *
3443  * Returns:     0                               Success
3444  *              EBADF
3445  *              EFAULT
3446  *      fp_lookup:EBADF                         Bad file descriptor
3447  *      vnode_getwithref:???
3448  *      copyout:EFAULT
3449  *      vnode_getwithref:???
3450  *      vn_stat:???
3451  *      soo_stat:???
3452  *      pipe_stat:???
3453  *      pshm_stat:???
3454  *      kqueue_stat:???
3455  *
3456  * Notes:       Internal implementation for all other fstat() related
3457  *              functions
3458  *
3459  *              XXX switch on node type is bogus; need a stat in struct
3460  *              XXX fileops instead.
3461  */
3462 static int
3463 fstat(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3464 {
3465         struct fileproc *fp;
3466         union {
3467                 struct stat sb;
3468                 struct stat64 sb64;
3469         } source;
3470         union {
3471                 struct user64_stat user64_sb;
3472                 struct user32_stat user32_sb;
3473                 struct user64_stat64 user64_sb64;
3474                 struct user32_stat64 user32_sb64;
3475         } dest;
3476         int error, my_size;
3477         file_type_t type;
3478         caddr_t data;
3479         kauth_filesec_t fsec;
3480         user_size_t xsecurity_bufsize;
3481         vfs_context_t ctx = vfs_context_current();
3482         void * sbptr;
3483
3484
3485         AUDIT_ARG(fd, fd);
3486
3487         if ((error = fp_lookup(p, fd, &fp, 0)) != 0) {
3488                 return error;
3489         }
3490         type = fp->f_type;
3491         data = fp->f_data;
3492         fsec = KAUTH_FILESEC_NONE;
3493
3494         sbptr = (void *)&source;
3495
3496         switch (type) {
3497         case DTYPE_VNODE:
3498                 if ((error = vnode_getwithref((vnode_t)data)) == 0) {
3499                         /*
3500                          * If the caller has the file open, and is not
3501                          * requesting extended security information, we are
3502                          * going to let them get the basic stat information.
3503                          */
3504                         if (xsecurity == USER_ADDR_NULL) {
3505                                 error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, 0, ctx,
3506                                     fp->fp_glob->fg_cred);
3507                         } else {
3508                                 error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, 0, ctx);
3509                         }
3510
3511                         AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1);
3512                         (void)vnode_put((vnode_t)data);
3513                 }
3514                 break;
3515
3516 #if SOCKETS
3517         case DTYPE_SOCKET:
3518                 error = soo_stat((struct socket *)data, sbptr, isstat64);
3519                 break;
3520 #endif /* SOCKETS */
3521
3522         case DTYPE_PIPE:
3523                 error = pipe_stat((void *)data, sbptr, isstat64);
3524                 break;
3525
3526         case DTYPE_PSXSHM:
3527                 error = pshm_stat((void *)data, sbptr, isstat64);
3528                 break;
3529
3530         case DTYPE_KQUEUE:
3531                 error = kqueue_stat((void *)data, sbptr, isstat64, p);
3532                 break;
3533
3534         default:
3535                 error = EBADF;
3536                 goto out;
3537         }
3538         if (error == 0) {
3539                 caddr_t sbp;
3540
3541                 if (isstat64 != 0) {
3542                         source.sb64.st_lspare = 0;
3543                         source.sb64.st_qspare[0] = 0LL;
3544                         source.sb64.st_qspare[1] = 0LL;
3545
3546                         if (IS_64BIT_PROCESS(current_proc())) {
3547                                 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3548                                 my_size = sizeof(dest.user64_sb64);
3549                                 sbp = (caddr_t)&dest.user64_sb64;
3550                         } else {
3551                                 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3552                                 my_size = sizeof(dest.user32_sb64);
3553                                 sbp = (caddr_t)&dest.user32_sb64;
3554                         }
3555                 } else {
3556                         source.sb.st_lspare = 0;
3557                         source.sb.st_qspare[0] = 0LL;
3558                         source.sb.st_qspare[1] = 0LL;
3559                         if (IS_64BIT_PROCESS(current_proc())) {
3560                                 munge_user64_stat(&source.sb, &dest.user64_sb);
3561                                 my_size = sizeof(dest.user64_sb);
3562                                 sbp = (caddr_t)&dest.user64_sb;
3563                         } else {
3564                                 munge_user32_stat(&source.sb, &dest.user32_sb);
3565                                 my_size = sizeof(dest.user32_sb);
3566                                 sbp = (caddr_t)&dest.user32_sb;
3567                         }
3568                 }
3569
3570                 error = copyout(sbp, ub, my_size);
3571         }
3572
3573         /* caller wants extended security information? */
3574         if (xsecurity != USER_ADDR_NULL) {
3575                 /* did we get any? */
3576                 if (fsec == KAUTH_FILESEC_NONE) {
3577                         if (susize(xsecurity_size, 0) != 0) {
3578                                 error = EFAULT;
3579                                 goto out;
3580                         }
3581                 } else {
3582                         /* find the user buffer size */
3583                         xsecurity_bufsize = fusize(xsecurity_size);
3584
3585                         /* copy out the actual data size */
3586                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3587                                 error = EFAULT;
3588                                 goto out;
3589                         }
3590
3591                         /* if the caller supplied enough room, copy out to it */
3592                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
3593                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3594                         }
3595                 }
3596         }
3597 out:
3598         fp_drop(p, fd, fp, 0);
3599         if (fsec != NULL) {
3600                 kauth_filesec_free(fsec);
3601         }
3602         return error;
3603 }
3604
3605
3606 /*
3607  * sys_fstat_extended
3608  *
3609  * Description: Extended version of fstat supporting returning extended
3610  *              security information
3611  *
3612  * Parameters:  p                               The process doing the fstat
3613  *              uap->fd                         The fd to stat
3614  *              uap->ub                         The user stat buffer
3615  *              uap->xsecurity                  The user extended security
3616  *                                              buffer, or 0 if none
3617  *              uap->xsecurity_size             The size of xsecurity, or 0
3618  *
3619  * Returns:     0                               Success
3620  *              !0                              Errno (see fstat)
3621  */
3622 int
3623 sys_fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval)
3624 {
3625         return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0);
3626 }
3627
3628
3629 /*
3630  * sys_fstat
3631  *
3632  * Description: Get file status for the file associated with fd
3633  *
3634  * Parameters:  p                               The process doing the fstat
3635  *              uap->fd                         The fd to stat
3636  *              uap->ub                         The user stat buffer
3637  *
3638  * Returns:     0                               Success
3639  *              !0                              Errno (see fstat)
3640  */
3641 int
3642 sys_fstat(proc_t p, struct fstat_args *uap, __unused int32_t *retval)
3643 {
3644         return fstat(p, uap->fd, uap->ub, 0, 0, 0);
3645 }
3646
3647
3648 /*
3649  * sys_fstat64_extended
3650  *
3651  * Description: Extended version of fstat64 supporting returning extended
3652  *              security information
3653  *
3654  * Parameters:  p                               The process doing the fstat
3655  *              uap->fd                         The fd to stat
3656  *              uap->ub                         The user stat buffer
3657  *              uap->xsecurity                  The user extended security
3658  *                                              buffer, or 0 if none
3659  *              uap->xsecurity_size             The size of xsecurity, or 0
3660  *
3661  * Returns:     0                               Success
3662  *              !0                              Errno (see fstat)
3663  */
3664 int
3665 sys_fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval)
3666 {
3667         return fstat(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1);
3668 }
3669
3670
3671 /*
3672  * sys_fstat64
3673  *
3674  * Description: Get 64 bit version of the file status for the file associated
3675  *              with fd
3676  *
3677  * Parameters:  p                               The process doing the fstat
3678  *              uap->fd                         The fd to stat
3679  *              uap->ub                         The user stat buffer
3680  *
3681  * Returns:     0                               Success
3682  *              !0                              Errno (see fstat)
3683  */
3684 int
3685 sys_fstat64(proc_t p, struct fstat64_args *uap, __unused int32_t *retval)
3686 {
3687         return fstat(p, uap->fd, uap->ub, 0, 0, 1);
3688 }
3689
3690
3691 /*
3692  * sys_fpathconf
3693  *
3694  * Description: Return pathconf information about a file descriptor.
3695  *
3696  * Parameters:  p                               Process making the request
3697  *              uap->fd                         fd to get information about
3698  *              uap->name                       Name of information desired
3699  *              retval                          Pointer to the call return area
3700  *
3701  * Returns:     0                               Success
3702  *              EINVAL
3703  *      fp_lookup:EBADF                         Bad file descriptor
3704  *      vnode_getwithref:???
3705  *      vn_pathconf:???
3706  *
3707  * Implicit returns:
3708  *              *retval (modified)              Returned information (numeric)
3709  */
3710 int
3711 sys_fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval)
3712 {
3713         int fd = uap->fd;
3714         struct fileproc *fp;
3715         struct vnode *vp;
3716         int error = 0;
3717         file_type_t type;
3718         caddr_t data;
3719
3720
3721         AUDIT_ARG(fd, uap->fd);
3722         if ((error = fp_lookup(p, fd, &fp, 0))) {
3723                 return error;
3724         }
3725         type = fp->f_type;
3726         data = fp->f_data;
3727
3728         switch (type) {
3729         case DTYPE_SOCKET:
3730                 if (uap->name != _PC_PIPE_BUF) {
3731                         error = EINVAL;
3732                         goto out;
3733                 }
3734                 *retval = PIPE_BUF;
3735                 error = 0;
3736                 goto out;
3737
3738         case DTYPE_PIPE:
3739                 if (uap->name != _PC_PIPE_BUF) {
3740                         error = EINVAL;
3741                         goto out;
3742                 }
3743                 *retval = PIPE_BUF;
3744                 error = 0;
3745                 goto out;
3746
3747         case DTYPE_VNODE:
3748                 vp = (struct vnode *)data;
3749
3750                 if ((error = vnode_getwithref(vp)) == 0) {
3751                         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3752
3753                         error = vn_pathconf(vp, uap->name, retval, vfs_context_current());
3754
3755                         (void)vnode_put(vp);
3756                 }
3757                 goto out;
3758
3759         default:
3760                 error = EINVAL;
3761                 goto out;
3762         }
3763         /*NOTREACHED*/
3764 out:
3765         fp_drop(p, fd, fp, 0);
3766         return error;
3767 }
3768
3769 /*
3770  * Statistics counter for the number of times a process calling fdalloc()
3771  * has resulted in an expansion of the per process open file table.
3772  *
3773  * XXX This would likely be of more use if it were per process
3774  */
3775 int fdexpand;
3776
3777
3778 /*
3779  * fdalloc
3780  *
3781  * Description: Allocate a file descriptor for the process.
3782  *
3783  * Parameters:  p                               Process to allocate the fd in
3784  *              want                            The fd we would prefer to get
3785  *              result                          Pointer to fd we got
3786  *
3787  * Returns:     0                               Success
3788  *              EMFILE
3789  *              ENOMEM
3790  *
3791  * Implicit returns:
3792  *              *result (modified)              The fd which was allocated
3793  */
3794 int
3795 fdalloc(proc_t p, int want, int *result)
3796 {
3797         struct filedesc *fdp = p->p_fd;
3798         int i;
3799         int last, numfiles, oldnfiles;
3800         struct fileproc **newofiles, **ofiles;
3801         char *newofileflags;
3802         rlim_t lim;
3803         rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3804
3805         nofile = MIN(nofile, INT_MAX);
3806
3807         /*
3808          * Search for a free descriptor starting at the higher
3809          * of want or fd_freefile.  If that fails, consider
3810          * expanding the ofile array.
3811          */
3812 #if DIAGNOSTIC
3813         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
3814 #endif
3815
3816         lim = MIN(nofile, maxfilesperproc);
3817         for (;;) {
3818                 last = (int)MIN((unsigned int)fdp->fd_nfiles, (unsigned int)lim);
3819                 if ((i = want) < fdp->fd_freefile) {
3820                         i = fdp->fd_freefile;
3821                 }
3822                 for (; i < last; i++) {
3823                         if (fdp->fd_ofiles[i] == NULL && !(fdp->fd_ofileflags[i] & UF_RESERVED)) {
3824                                 procfdtbl_reservefd(p, i);
3825                                 if (i > fdp->fd_lastfile) {
3826                                         fdp->fd_lastfile = i;
3827                                 }
3828                                 if (want <= fdp->fd_freefile) {
3829                                         fdp->fd_freefile = i;
3830                                 }
3831                                 *result = i;
3832                                 return 0;
3833                         }
3834                 }
3835
3836                 /*
3837                  * No space in current array.  Expand?
3838                  */
3839                 if ((rlim_t)fdp->fd_nfiles >= lim) {
3840                         return EMFILE;
3841                 }
3842                 if (fdp->fd_nfiles < NDEXTENT) {
3843                         numfiles = NDEXTENT;
3844                 } else {
3845                         numfiles = 2 * fdp->fd_nfiles;
3846                 }
3847                 /* Enforce lim */
3848                 if ((rlim_t)numfiles > lim) {
3849                         numfiles = (int)lim;
3850                 }
3851                 proc_fdunlock(p);
3852                 MALLOC(newofiles, struct fileproc **,
3853                     numfiles * OFILESIZE, M_OFILETABL, M_WAITOK);
3854                 proc_fdlock(p);
3855                 if (newofiles == NULL) {
3856                         return ENOMEM;
3857                 }
3858                 if (fdp->fd_nfiles >= numfiles) {
3859                         FREE(newofiles, M_OFILETABL);
3860                         continue;
3861                 }
3862                 newofileflags = (char *) &newofiles[numfiles];
3863                 /*
3864                  * Copy the existing ofile and ofileflags arrays
3865                  * and zero the new portion of each array.
3866                  */
3867                 oldnfiles = fdp->fd_nfiles;
3868                 (void) memcpy(newofiles, fdp->fd_ofiles,
3869                     oldnfiles * sizeof(*fdp->fd_ofiles));
3870                 (void) memset(&newofiles[oldnfiles], 0,
3871                     (numfiles - oldnfiles) * sizeof(*fdp->fd_ofiles));
3872
3873                 (void) memcpy(newofileflags, fdp->fd_ofileflags,
3874                     oldnfiles * sizeof(*fdp->fd_ofileflags));
3875                 (void) memset(&newofileflags[oldnfiles], 0,
3876                     (numfiles - oldnfiles) *
3877                     sizeof(*fdp->fd_ofileflags));
3878                 ofiles = fdp->fd_ofiles;
3879                 fdp->fd_ofiles = newofiles;
3880                 fdp->fd_ofileflags = newofileflags;
3881                 fdp->fd_nfiles = numfiles;
3882                 FREE(ofiles, M_OFILETABL);
3883                 fdexpand++;
3884         }
3885 }
3886
3887
3888 /*
3889  * fdavail
3890  *
3891  * Description: Check to see whether n user file descriptors are available
3892  *              to the process p.
3893  *
3894  * Parameters:  p                               Process to check in
3895  *              n                               The number of fd's desired
3896  *
3897  * Returns:     0                               No
3898  *              1                               Yes
3899  *
3900  * Locks:       Assumes proc_fdlock for process is held by the caller
3901  *
3902  * Notes:       The answer only remains valid so long as the proc_fdlock is
3903  *              held by the caller.
3904  */
3905 int
3906 fdavail(proc_t p, int n)
3907 {
3908         struct filedesc *fdp = p->p_fd;
3909         struct fileproc **fpp;
3910         char *flags;
3911         int i;
3912         int lim;
3913         rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
3914
3915         lim = (int)MIN(nofile, maxfilesperproc);
3916         if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
3917                 return 1;
3918         }
3919         fpp = &fdp->fd_ofiles[fdp->fd_freefile];
3920         flags = &fdp->fd_ofileflags[fdp->fd_freefile];
3921         for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) {
3922                 if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) {
3923                         return 1;
3924                 }
3925         }
3926         return 0;
3927 }
3928
3929
3930 struct fileproc *
3931 fp_get_noref_locked(proc_t p, int fd)
3932 {
3933         struct filedesc *fdp = p->p_fd;
3934         struct fileproc *fp;
3935
3936         if (fd < 0 || fd >= fdp->fd_nfiles ||
3937             (fp = fdp->fd_ofiles[fd]) == NULL ||
3938             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3939                 return NULL;
3940         }
3941         return fp;
3942 }
3943
3944 struct fileproc *
3945 fp_get_noref_locked_with_iocount(proc_t p, int fd)
3946 {
3947         struct filedesc *fdp = p->p_fd;
3948         struct fileproc *fp = NULL;
3949
3950         if (fd < 0 || fd >= fdp->fd_nfiles ||
3951             (fp = fdp->fd_ofiles[fd]) == NULL ||
3952             os_ref_get_count(&fp->fp_iocount) <= 1 ||
3953             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
3954             !(fdp->fd_ofileflags[fd] & UF_CLOSING))) {
3955                 panic("%s: caller without an ioccount on fileproc (%d/:%p)",
3956                     __func__, fd, fp);
3957         }
3958
3959         return fp;
3960 }
3961
3962 int
3963 fp_get_ftype(proc_t p, int fd, file_type_t ftype, int err, struct fileproc **fpp)
3964 {
3965         struct filedesc *fdp = p->p_fd;
3966         struct fileproc *fp;
3967
3968         proc_fdlock_spin(p);
3969         if (fd < 0 || fd >= fdp->fd_nfiles ||
3970             (fp = fdp->fd_ofiles[fd]) == NULL ||
3971             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
3972                 proc_fdunlock(p);
3973                 return EBADF;
3974         }
3975
3976         if (fp->f_type != ftype) {
3977                 proc_fdunlock(p);
3978                 return err;
3979         }
3980
3981         os_ref_retain_locked(&fp->fp_iocount);
3982         proc_fdunlock(p);
3983
3984         *fpp = fp;
3985         return 0;
3986 }
3987
3988
3989 /*
3990  * fp_getfvp
3991  *
3992  * Description: Get fileproc and vnode pointer for a given fd from the per
3993  *              process open file table of the specified process, and if
3994  *              successful, increment the fp_iocount
3995  *
3996  * Parameters:  p                               Process in which fd lives
3997  *              fd                              fd to get information for
3998  *              resultfp                        Pointer to result fileproc
3999  *                                              pointer area, or 0 if none
4000  *              resultvp                        Pointer to result vnode pointer
4001  *                                              area, or 0 if none
4002  *
4003  * Returns:     0                               Success
4004  *              EBADF                           Bad file descriptor
4005  *              ENOTSUP                         fd does not refer to a vnode
4006  *
4007  * Implicit returns:
4008  *              *resultfp (modified)            Fileproc pointer
4009  *              *resultvp (modified)            vnode pointer
4010  *
4011  * Notes:       The resultfp and resultvp fields are optional, and may be
4012  *              independently specified as NULL to skip returning information
4013  *
4014  * Locks:       Internally takes and releases proc_fdlock
4015  */
4016 int
4017 fp_getfvp(proc_t p, int fd, struct fileproc **resultfp, struct vnode **resultvp)
4018 {
4019         struct fileproc *fp;
4020         int error;
4021
4022         error = fp_get_ftype(p, fd, DTYPE_VNODE, ENOTSUP, &fp);
4023         if (error == 0) {
4024                 if (resultfp) {
4025                         *resultfp = fp;
4026                 }
4027                 if (resultvp) {
4028                         *resultvp = (struct vnode *)fp->f_data;
4029                 }
4030         }
4031
4032         return error;
4033 }
4034
4035
4036 /*
4037  * fp_get_pipe_id
4038  *
4039  * Description: Get pipe id for a given fd from the per process open file table
4040  *              of the specified process.
4041  *
4042  * Parameters:  p                               Process in which fd lives
4043  *              fd                              fd to get information for
4044  *              result_pipe_id                  Pointer to result pipe id
4045  *
4046  * Returns:     0                               Success
4047  *              EIVAL                           NULL pointer arguments passed
4048  *              fp_lookup:EBADF                 Bad file descriptor
4049  *              ENOTSUP                         fd does not refer to a pipe
4050  *
4051  * Implicit returns:
4052  *              *result_pipe_id (modified)      pipe id
4053  *
4054  * Locks:       Internally takes and releases proc_fdlock
4055  */
4056 int
4057 fp_get_pipe_id(proc_t p, int fd, uint64_t *result_pipe_id)
4058 {
4059         struct fileproc *fp = FILEPROC_NULL;
4060         struct fileglob *fg = NULL;
4061         int error = 0;
4062
4063         if (p == NULL || result_pipe_id == NULL) {
4064                 return EINVAL;
4065         }
4066
4067         proc_fdlock(p);
4068         if ((error = fp_lookup(p, fd, &fp, 1))) {
4069                 proc_fdunlock(p);
4070                 return error;
4071         }
4072         fg = fp->fp_glob;
4073
4074         if (FILEGLOB_DTYPE(fg) == DTYPE_PIPE) {
4075                 *result_pipe_id = pipe_id((struct pipe*)fg->fg_data);
4076         } else {
4077                 error = ENOTSUP;
4078         }
4079
4080         fp_drop(p, fd, fp, 1);
4081         proc_fdunlock(p);
4082         return error;
4083 }
4084
4085
4086 /*
4087  * fp_lookup
4088  *
4089  * Description: Get fileproc pointer for a given fd from the per process
4090  *              open file table of the specified process and if successful,
4091  *              increment the fp_iocount
4092  *
4093  * Parameters:  p                               Process in which fd lives
4094  *              fd                              fd to get information for
4095  *              resultfp                        Pointer to result fileproc
4096  *                                              pointer area, or 0 if none
4097  *              locked                          !0 if the caller holds the
4098  *                                              proc_fdlock, 0 otherwise
4099  *
4100  * Returns:     0                       Success
4101  *              EBADF                   Bad file descriptor
4102  *
4103  * Implicit returns:
4104  *              *resultfp (modified)            Fileproc pointer
4105  *
4106  * Locks:       If the argument 'locked' is non-zero, then the caller is
4107  *              expected to have taken and held the proc_fdlock; if it is
4108  *              zero, than this routine internally takes and drops this lock.
4109  */
4110 int
4111 fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked)
4112 {
4113         struct filedesc *fdp = p->p_fd;
4114         struct fileproc *fp;
4115
4116         if (!locked) {
4117                 proc_fdlock_spin(p);
4118         }
4119         if (fd < 0 || fdp == NULL || fd >= fdp->fd_nfiles ||
4120             (fp = fdp->fd_ofiles[fd]) == NULL ||
4121             (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
4122                 if (!locked) {
4123                         proc_fdunlock(p);
4124                 }
4125                 return EBADF;
4126         }
4127         os_ref_retain_locked(&fp->fp_iocount);
4128
4129         if (resultfp) {
4130                 *resultfp = fp;
4131         }
4132         if (!locked) {
4133                 proc_fdunlock(p);
4134         }
4135
4136         return 0;
4137 }
4138
4139
4140 /*
4141  * fp_tryswap
4142  *
4143  * Description: Swap the fileproc pointer for a given fd with a new
4144  *              fileproc pointer in the per-process open file table of
4145  *              the specified process.  The fdlock must be held at entry.
4146  *              Iff the swap is successful, the old fileproc pointer is freed.
4147  *
4148  * Parameters:  p               Process containing the fd
4149  *              fd              The fd of interest
4150  *              nfp             Pointer to the newfp
4151  *
4152  * Returns:     0               Success
4153  *              EBADF           Bad file descriptor
4154  *              EINTR           Interrupted
4155  *              EKEEPLOOKING    Other references were active, try again.
4156  */
4157 int
4158 fp_tryswap(proc_t p, int fd, struct fileproc *nfp)
4159 {
4160         struct fileproc *fp;
4161         int error;
4162
4163         proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
4164
4165         if (0 != (error = fp_lookup(p, fd, &fp, 1))) {
4166                 return error;
4167         }
4168         /*
4169          * At this point, our caller (change_guardedfd_np) has
4170          * one fp_iocount reference, and we just took another
4171          * one to begin the replacement.
4172          * fp and nfp have a +1 reference from allocation.
4173          * Thus if no-one else is looking, fp_iocount should be 3.
4174          */
4175         if (os_ref_get_count(&fp->fp_iocount) < 3 ||
4176             1 != os_ref_get_count(&nfp->fp_iocount)) {
4177                 panic("%s: fp_iocount", __func__);
4178         } else if (3 == os_ref_get_count(&fp->fp_iocount)) {
4179                 /* Copy the contents of *fp, preserving the "type" of *nfp */
4180
4181                 nfp->fp_flags = (nfp->fp_flags & FP_TYPEMASK) |
4182                     (fp->fp_flags & ~FP_TYPEMASK);
4183                 os_ref_retain_locked(&nfp->fp_iocount);
4184                 os_ref_retain_locked(&nfp->fp_iocount);
4185                 nfp->fp_glob = fp->fp_glob;
4186                 nfp->fp_wset = fp->fp_wset;
4187
4188                 p->p_fd->fd_ofiles[fd] = nfp;
4189                 fp_drop(p, fd, nfp, 1);
4190
4191                 os_ref_release_live(&fp->fp_iocount);
4192                 os_ref_release_live(&fp->fp_iocount);
4193                 fileproc_free(fp);
4194         } else {
4195                 /*
4196                  * Wait for all other active references to evaporate.
4197                  */
4198                 p->p_fpdrainwait = 1;
4199                 error = msleep(&p->p_fpdrainwait, &p->p_fdmlock,
4200                     PRIBIO | PCATCH, "tryswap fpdrain", NULL);
4201                 if (0 == error) {
4202                         /*
4203                          * Return an "internal" errno to trigger a full
4204                          * reevaluation of the change-guard attempt.
4205                          */
4206                         error = EKEEPLOOKING;
4207                 }
4208                 (void) fp_drop(p, fd, fp, 1);
4209         }
4210         return error;
4211 }
4212
4213
4214 /*
4215  * fp_drop
4216  *
4217  * Description: Drop the I/O reference previously taken by calling fp_lookup
4218  *              et. al.
4219  *
4220  * Parameters:  p                               Process in which the fd lives
4221  *              fd                              fd associated with the fileproc
4222  *              fp                              fileproc on which to set the
4223  *                                              flag and drop the reference
4224  *              locked                          flag to internally take and
4225  *                                              drop proc_fdlock if it is not
4226  *                                              already held by the caller
4227  *
4228  * Returns:     0                               Success
4229  *              EBADF                           Bad file descriptor
4230  *
4231  * Locks:       This function internally takes and drops the proc_fdlock for
4232  *              the supplied process if 'locked' is non-zero, and assumes that
4233  *              the caller already holds this lock if 'locked' is non-zero.
4234  *
4235  * Notes:       The fileproc must correspond to the fd in the supplied proc
4236  */
4237 int
4238 fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
4239 {
4240         struct filedesc *fdp = p->p_fd;
4241         int     needwakeup = 0;
4242
4243         if (!locked) {
4244                 proc_fdlock_spin(p);
4245         }
4246         if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles ||
4247             (fp = fdp->fd_ofiles[fd]) == NULL ||
4248             ((fdp->fd_ofileflags[fd] & UF_RESERVED) &&
4249             !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) {
4250                 if (!locked) {
4251                         proc_fdunlock(p);
4252                 }
4253                 return EBADF;
4254         }
4255
4256         if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4257                 if (fp->fp_flags & FP_SELCONFLICT) {
4258                         fp->fp_flags &= ~FP_SELCONFLICT;
4259                 }
4260
4261                 if (p->p_fpdrainwait) {
4262                         p->p_fpdrainwait = 0;
4263                         needwakeup = 1;
4264                 }
4265         }
4266         if (!locked) {
4267                 proc_fdunlock(p);
4268         }
4269         if (needwakeup) {
4270                 wakeup(&p->p_fpdrainwait);
4271         }
4272
4273         return 0;
4274 }
4275
4276
4277 /*
4278  * file_vnode
4279  *
4280  * Description: Given an fd, look it up in the current process's per process
4281  *              open file table, and return its internal vnode pointer.
4282  *
4283  * Parameters:  fd                              fd to obtain vnode from
4284  *              vpp                             pointer to vnode return area
4285  *
4286  * Returns:     0                               Success
4287  *              EINVAL                          The fd does not refer to a
4288  *                                              vnode fileproc entry
4289  *      fp_lookup:EBADF                         Bad file descriptor
4290  *
4291  * Implicit returns:
4292  *              *vpp (modified)                 Returned vnode pointer
4293  *
4294  * Locks:       This function internally takes and drops the proc_fdlock for
4295  *              the current process
4296  *
4297  * Notes:       If successful, this function increments the fp_iocount on the
4298  *              fd's corresponding fileproc.
4299  *
4300  *              The fileproc referenced is not returned; because of this, care
4301  *              must be taken to not drop the last reference (e.g. by closing
4302  *              the file).  This is inherently unsafe, since the reference may
4303  *              not be recoverable from the vnode, if there is a subsequent
4304  *              close that destroys the associate fileproc.  The caller should
4305  *              therefore retain their own reference on the fileproc so that
4306  *              the fp_iocount can be dropped subsequently.  Failure to do this
4307  *              can result in the returned pointer immediately becoming invalid
4308  *              following the call.
4309  *
4310  *              Use of this function is discouraged.
4311  */
4312 int
4313 file_vnode(int fd, struct vnode **vpp)
4314 {
4315         return file_vnode_withvid(fd, vpp, NULL);
4316 }
4317
4318 /*
4319  * file_vnode_withvid
4320  *
4321  * Description: Given an fd, look it up in the current process's per process
4322  *              open file table, and return its internal vnode pointer.
4323  *
4324  * Parameters:  fd                              fd to obtain vnode from
4325  *              vpp                             pointer to vnode return area
4326  *              vidp                            pointer to vid of the returned vnode
4327  *
4328  * Returns:     0                               Success
4329  *              EINVAL                          The fd does not refer to a
4330  *                                              vnode fileproc entry
4331  *      fp_lookup:EBADF                         Bad file descriptor
4332  *
4333  * Implicit returns:
4334  *              *vpp (modified)                 Returned vnode pointer
4335  *
4336  * Locks:       This function internally takes and drops the proc_fdlock for
4337  *              the current process
4338  *
4339  * Notes:       If successful, this function increments the fp_iocount on the
4340  *              fd's corresponding fileproc.
4341  *
4342  *              The fileproc referenced is not returned; because of this, care
4343  *              must be taken to not drop the last reference (e.g. by closing
4344  *              the file).  This is inherently unsafe, since the reference may
4345  *              not be recoverable from the vnode, if there is a subsequent
4346  *              close that destroys the associate fileproc.  The caller should
4347  *              therefore retain their own reference on the fileproc so that
4348  *              the fp_iocount can be dropped subsequently.  Failure to do this
4349  *              can result in the returned pointer immediately becoming invalid
4350  *              following the call.
4351  *
4352  *              Use of this function is discouraged.
4353  */
4354 int
4355 file_vnode_withvid(int fd, struct vnode **vpp, uint32_t *vidp)
4356 {
4357         struct fileproc *fp;
4358         int error;
4359
4360         error = fp_get_ftype(current_proc(), fd, DTYPE_VNODE, EINVAL, &fp);
4361         if (error == 0) {
4362                 if (vpp) {
4363                         *vpp = fp->f_data;
4364                 }
4365                 if (vidp) {
4366                         *vidp = vnode_vid(fp->f_data);
4367                 }
4368         }
4369         return error;
4370 }
4371
4372 /*
4373  * file_socket
4374  *
4375  * Description: Given an fd, look it up in the current process's per process
4376  *              open file table, and return its internal socket pointer.
4377  *
4378  * Parameters:  fd                              fd to obtain vnode from
4379  *              sp                              pointer to socket return area
4380  *
4381  * Returns:     0                               Success
4382  *              ENOTSOCK                        Not a socket
4383  *              fp_lookup:EBADF                 Bad file descriptor
4384  *
4385  * Implicit returns:
4386  *              *sp (modified)                  Returned socket pointer
4387  *
4388  * Locks:       This function internally takes and drops the proc_fdlock for
4389  *              the current process
4390  *
4391  * Notes:       If successful, this function increments the fp_iocount on the
4392  *              fd's corresponding fileproc.
4393  *
4394  *              The fileproc referenced is not returned; because of this, care
4395  *              must be taken to not drop the last reference (e.g. by closing
4396  *              the file).  This is inherently unsafe, since the reference may
4397  *              not be recoverable from the socket, if there is a subsequent
4398  *              close that destroys the associate fileproc.  The caller should
4399  *              therefore retain their own reference on the fileproc so that
4400  *              the fp_iocount can be dropped subsequently.  Failure to do this
4401  *              can result in the returned pointer immediately becoming invalid
4402  *              following the call.
4403  *
4404  *              Use of this function is discouraged.
4405  */
4406 int
4407 file_socket(int fd, struct socket **sp)
4408 {
4409         struct fileproc *fp;
4410         int error;
4411
4412         error = fp_get_ftype(current_proc(), fd, DTYPE_SOCKET, ENOTSOCK, &fp);
4413         if (error == 0) {
4414                 if (sp) {
4415                         *sp = (struct socket *)fp->f_data;
4416                 }
4417         }
4418         return error;
4419 }
4420
4421
4422 /*
4423  * file_flags
4424  *
4425  * Description: Given an fd, look it up in the current process's per process
4426  *              open file table, and return its fileproc's flags field.
4427  *
4428  * Parameters:  fd                              fd whose flags are to be
4429  *                                              retrieved
4430  *              flags                           pointer to flags data area
4431  *
4432  * Returns:     0                               Success
4433  *              ENOTSOCK                        Not a socket
4434  *              fp_lookup:EBADF                 Bad file descriptor
4435  *
4436  * Implicit returns:
4437  *              *flags (modified)               Returned flags field
4438  *
4439  * Locks:       This function internally takes and drops the proc_fdlock for
4440  *              the current process
4441  */
4442 int
4443 file_flags(int fd, int *flags)
4444 {
4445         proc_t p = current_proc();
4446         struct fileproc *fp;
4447         int error = EBADF;
4448
4449         proc_fdlock_spin(p);
4450         fp = fp_get_noref_locked(p, fd);
4451         if (fp) {
4452                 *flags = (int)fp->f_flag;
4453                 error = 0;
4454         }
4455         proc_fdunlock(p);
4456
4457         return error;
4458 }
4459
4460
4461 /*
4462  * file_drop
4463  *
4464  * Description: Drop an iocount reference on an fd, and wake up any waiters
4465  *              for draining (i.e. blocked in fileproc_drain() called during
4466  *              the last attempt to close a file).
4467  *
4468  * Parameters:  fd                              fd on which an ioreference is
4469  *                                              to be dropped
4470  *
4471  * Returns:     0                               Success
4472  *
4473  * Description: Given an fd, look it up in the current process's per process
4474  *              open file table, and drop it's fileproc's fp_iocount by one
4475  *
4476  * Notes:       This is intended as a corresponding operation to the functions
4477  *              file_vnode() and file_socket() operations.
4478  *
4479  *              If the caller can't possibly hold an I/O reference,
4480  *              this function will panic the kernel rather than allowing
4481  *              for memory corruption. Callers should always call this
4482  *              because they acquired an I/O reference on this file before.
4483  *
4484  *              Use of this function is discouraged.
4485  */
4486 int
4487 file_drop(int fd)
4488 {
4489         struct fileproc *fp;
4490         proc_t p = current_proc();
4491         int     needwakeup = 0;
4492
4493         proc_fdlock_spin(p);
4494         fp = fp_get_noref_locked_with_iocount(p, fd);
4495
4496         if (1 == os_ref_release_locked(&fp->fp_iocount)) {
4497                 if (fp->fp_flags & FP_SELCONFLICT) {
4498                         fp->fp_flags &= ~FP_SELCONFLICT;
4499                 }
4500
4501                 if (p->p_fpdrainwait) {
4502                         p->p_fpdrainwait = 0;
4503                         needwakeup = 1;
4504                 }
4505         }
4506         proc_fdunlock(p);
4507
4508         if (needwakeup) {
4509                 wakeup(&p->p_fpdrainwait);
4510         }
4511         return 0;
4512 }
4513
4514
4515
4516 /*
4517  * falloc_withalloc
4518  *
4519  * Create a new open file structure and allocate
4520  * a file descriptor for the process that refers to it.
4521  *
4522  * Returns:     0                       Success
4523  *
4524  * Description: Allocate an entry in the per process open file table and
4525  *              return the corresponding fileproc and fd.
4526  *
4527  * Parameters:  p                               The process in whose open file
4528  *                                              table the fd is to be allocated
4529  *              resultfp                        Pointer to fileproc pointer
4530  *                                              return area
4531  *              resultfd                        Pointer to fd return area
4532  *              ctx                             VFS context
4533  *              fp_zalloc                       fileproc allocator to use
4534  *              crarg                           allocator args
4535  *
4536  * Returns:     0                               Success
4537  *              ENFILE                          Too many open files in system
4538  *              fdalloc:EMFILE                  Too many open files in process
4539  *              fdalloc:ENOMEM                  M_OFILETABL zone exhausted
4540  *              ENOMEM                          fp_zone or fg_zone zone
4541  *                                              exhausted
4542  *
4543  * Implicit returns:
4544  *              *resultfd (modified)            Returned fileproc pointer
4545  *              *resultfd (modified)            Returned fd
4546  *
4547  * Notes:       This function takes separate process and context arguments
4548  *              solely to support kern_exec.c; otherwise, it would take
4549  *              neither, and use the vfs_context_current() routine internally.
4550  */
4551 int
4552 falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd,
4553     vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg)
4554 {
4555         struct fileproc *fp;
4556         struct fileglob *fg;
4557         int error, nfd;
4558
4559         /* Make sure we don't go beyond the system-wide limit */
4560         if (nfiles >= maxfiles) {
4561                 tablefull("file");
4562                 return ENFILE;
4563         }
4564
4565         proc_fdlock(p);
4566
4567         /* fdalloc will make sure the process stays below per-process limit */
4568         if ((error = fdalloc(p, 0, &nfd))) {
4569                 proc_fdunlock(p);
4570                 return error;
4571         }
4572
4573 #if CONFIG_MACF
4574         error = mac_file_check_create(proc_ucred(p));
4575         if (error) {
4576                 proc_fdunlock(p);
4577                 return error;
4578         }
4579 #endif
4580
4581         /*
4582          * Allocate a new file descriptor.
4583          * If the process has file descriptor zero open, add to the list
4584          * of open files at that point, otherwise put it at the front of
4585          * the list of open files.
4586          */
4587         proc_fdunlock(p);
4588
4589         fp = (*fp_zalloc)(crarg);
4590         if (fp == NULL) {
4591                 return ENOMEM;
4592         }
4593         fg = zalloc_flags(fg_zone, Z_WAITOK | Z_ZERO);
4594         lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr);
4595
4596         os_ref_retain_locked(&fp->fp_iocount);
4597         os_ref_init_raw(&fg->fg_count, &f_refgrp);
4598         fg->fg_ops = &uninitops;
4599         fp->fp_glob = fg;
4600 #if CONFIG_MACF
4601         mac_file_label_init(fg);
4602 #endif
4603
4604         kauth_cred_ref(ctx->vc_ucred);
4605
4606         fp->f_cred = ctx->vc_ucred;
4607
4608 #if CONFIG_MACF
4609         mac_file_label_associate(fp->f_cred, fg);
4610 #endif
4611
4612         os_atomic_inc(&nfiles, relaxed);
4613
4614         proc_fdlock(p);
4615
4616         p->p_fd->fd_ofiles[nfd] = fp;
4617
4618         proc_fdunlock(p);
4619
4620         if (resultfp) {
4621                 *resultfp = fp;
4622         }
4623         if (resultfd) {
4624                 *resultfd = nfd;
4625         }
4626
4627         return 0;
4628 }
4629
4630 int
4631 falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
4632 {
4633         return falloc_withalloc(p, resultfp, resultfd, ctx,
4634                    fileproc_alloc_init, NULL);
4635 }
4636
4637 /*
4638  * fdexec
4639  *
4640  * Description: Perform close-on-exec processing for all files in a process
4641  *              that are either marked as close-on-exec, or which were in the
4642  *              process of being opened at the time of the execve
4643  *
4644  *              Also handles the case (via posix_spawn()) where -all-
4645  *              files except those marked with "inherit" as treated as
4646  *              close-on-exec.
4647  *
4648  * Parameters:  p                               Pointer to process calling
4649  *                                              execve
4650  *
4651  * Returns:     void
4652  *
4653  * Locks:       This function internally takes and drops proc_fdlock()
4654  *          But assumes tables don't grow/change while unlocked.
4655  *
4656  */
4657 void
4658 fdexec(proc_t p, short flags, int self_exec)
4659 {
4660         struct filedesc *fdp = p->p_fd;
4661         int i;
4662         boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
4663         thread_t self = current_thread();
4664         struct uthread *ut = get_bsdthread_info(self);
4665         struct kqworkq *dealloc_kqwq = NULL;
4666
4667         /*
4668          * If the current thread is bound as a workq/workloop
4669          * servicing thread, we need to unbind it first.
4670          */
4671         if (ut->uu_kqr_bound && self_exec) {
4672                 kqueue_threadreq_unbind(p, ut->uu_kqr_bound);
4673         }
4674
4675         proc_fdlock(p);
4676
4677         /*
4678          * Deallocate the knotes for this process
4679          * and mark the tables non-existent so
4680          * subsequent kqueue closes go faster.
4681          */
4682         knotes_dealloc(p);
4683         assert(fdp->fd_knlistsize == 0);
4684         assert(fdp->fd_knhashmask == 0);
4685
4686         for (i = fdp->fd_lastfile; i >= 0; i--) {
4687                 struct fileproc *fp = fdp->fd_ofiles[i];
4688                 char *flagp = &fdp->fd_ofileflags[i];
4689
4690                 if (fp && cloexec_default) {
4691                         /*
4692                          * Reverse the usual semantics of file descriptor
4693                          * inheritance - all of them should be closed
4694                          * except files marked explicitly as "inherit" and
4695                          * not marked close-on-exec.
4696                          */
4697                         if ((*flagp & (UF_EXCLOSE | UF_INHERIT)) != UF_INHERIT) {
4698                                 *flagp |= UF_EXCLOSE;
4699                         }
4700                         *flagp &= ~UF_INHERIT;
4701                 }
4702
4703                 if (
4704                         ((*flagp & (UF_RESERVED | UF_EXCLOSE)) == UF_EXCLOSE)
4705 #if CONFIG_MACF
4706                         || (fp && mac_file_check_inherit(proc_ucred(p), fp->fp_glob))
4707 #endif
4708                         ) {
4709                         fp_close_and_unlock(p, i, fp, 0);
4710                         proc_fdlock(p);
4711                 }
4712         }
4713
4714         /* release the per-process workq kq */
4715         if (fdp->fd_wqkqueue) {
4716                 dealloc_kqwq = fdp->fd_wqkqueue;
4717                 fdp->fd_wqkqueue = NULL;
4718         }
4719
4720         proc_fdunlock(p);
4721
4722         /* Anything to free? */
4723         if (dealloc_kqwq) {
4724                 kqworkq_dealloc(dealloc_kqwq);
4725         }
4726 }
4727
4728
4729 /*
4730  * fdcopy
4731  *
4732  * Description: Copy a filedesc structure.  This is normally used as part of
4733  *              forkproc() when forking a new process, to copy the per process
4734  *              open file table over to the new process.
4735  *
4736  * Parameters:  p                               Process whose open file table
4737  *                                              is to be copied (parent)
4738  *              uth_cdir                        Per thread current working
4739  *                                              cirectory, or NULL
4740  *
4741  * Returns:     NULL                            Copy failed
4742  *              !NULL                           Pointer to new struct filedesc
4743  *
4744  * Locks:       This function internally takes and drops proc_fdlock()
4745  *
4746  * Notes:       Files are copied directly, ignoring the new resource limits
4747  *              for the process that's being copied into.  Since the descriptor
4748  *              references are just additional references, this does not count
4749  *              against the number of open files on the system.
4750  *
4751  *              The struct filedesc includes the current working directory,
4752  *              and the current root directory, if the process is chroot'ed.
4753  *
4754  *              If the exec was called by a thread using a per thread current
4755  *              working directory, we inherit the working directory from the
4756  *              thread making the call, rather than from the process.
4757  *
4758  *              In the case of a failure to obtain a reference, for most cases,
4759  *              the file entry will be silently dropped.  There's an exception
4760  *              for the case of a chroot dir, since a failure to to obtain a
4761  *              reference there would constitute an "escape" from the chroot
4762  *              environment, which must not be allowed.  In that case, we will
4763  *              deny the execve() operation, rather than allowing the escape.
4764  */
4765 struct filedesc *
4766 fdcopy(proc_t p, vnode_t uth_cdir)
4767 {
4768         struct filedesc *newfdp, *fdp = p->p_fd;
4769         int i;
4770         struct fileproc *ofp, *fp;
4771         vnode_t v_dir;
4772
4773         newfdp = zalloc(fdp_zone);
4774
4775         proc_fdlock(p);
4776
4777         /*
4778          * the FD_CHROOT flag will be inherited via this copy
4779          */
4780         (void) memcpy(newfdp, fdp, sizeof(*newfdp));
4781
4782         /*
4783          * If we are running with per-thread current working directories,
4784          * inherit the new current working directory from the current thread
4785          * instead, before we take our references.
4786          */
4787         if (uth_cdir != NULLVP) {
4788                 newfdp->fd_cdir = uth_cdir;
4789         }
4790
4791         /*
4792          * For both fd_cdir and fd_rdir make sure we get
4793          * a valid reference... if we can't, than set
4794          * set the pointer(s) to NULL in the child... this
4795          * will keep us from using a non-referenced vp
4796          * and allows us to do the vnode_rele only on
4797          * a properly referenced vp
4798          */
4799         if ((v_dir = newfdp->fd_cdir)) {
4800                 if (vnode_getwithref(v_dir) == 0) {
4801                         if ((vnode_ref(v_dir))) {
4802                                 newfdp->fd_cdir = NULL;
4803                         }
4804                         vnode_put(v_dir);
4805                 } else {
4806                         newfdp->fd_cdir = NULL;
4807                 }
4808         }
4809         if (newfdp->fd_cdir == NULL && fdp->fd_cdir) {
4810                 /*
4811                  * we couldn't get a new reference on
4812                  * the current working directory being
4813                  * inherited... we might as well drop
4814                  * our reference from the parent also
4815                  * since the vnode has gone DEAD making
4816                  * it useless... by dropping it we'll
4817                  * be that much closer to recycling it
4818                  */
4819                 vnode_rele(fdp->fd_cdir);
4820                 fdp->fd_cdir = NULL;
4821         }
4822
4823         if ((v_dir = newfdp->fd_rdir)) {
4824                 if (vnode_getwithref(v_dir) == 0) {
4825                         if ((vnode_ref(v_dir))) {
4826                                 newfdp->fd_rdir = NULL;
4827                         }
4828                         vnode_put(v_dir);
4829                 } else {
4830                         newfdp->fd_rdir = NULL;
4831                 }
4832         }
4833         /* Coming from a chroot environment and unable to get a reference... */
4834         if (newfdp->fd_rdir == NULL && fdp->fd_rdir) {
4835                 proc_fdunlock(p);
4836                 /*
4837                  * We couldn't get a new reference on
4838                  * the chroot directory being
4839                  * inherited... this is fatal, since
4840                  * otherwise it would constitute an
4841                  * escape from a chroot environment by
4842                  * the new process.
4843                  */
4844                 if (newfdp->fd_cdir) {
4845                         vnode_rele(newfdp->fd_cdir);
4846                 }
4847                 zfree(fdp_zone, newfdp);
4848                 return NULL;
4849         }
4850
4851         /*
4852          * If the number of open files fits in the internal arrays
4853          * of the open file structure, use them, otherwise allocate
4854          * additional memory for the number of descriptors currently
4855          * in use.
4856          */
4857         if (newfdp->fd_lastfile < NDFILE) {
4858                 i = NDFILE;
4859         } else {
4860                 /*
4861                  * Compute the smallest multiple of NDEXTENT needed
4862                  * for the file descriptors currently in use,
4863                  * allowing the table to shrink.
4864                  */
4865                 i = newfdp->fd_nfiles;
4866                 while (i > 1 + 2 * NDEXTENT && i > 1 + newfdp->fd_lastfile * 2) {
4867                         i /= 2;
4868                 }
4869         }
4870         proc_fdunlock(p);
4871
4872         MALLOC(newfdp->fd_ofiles, struct fileproc **,
4873             i * OFILESIZE, M_OFILETABL, M_WAITOK);
4874         if (newfdp->fd_ofiles == NULL) {
4875                 if (newfdp->fd_cdir) {
4876                         vnode_rele(newfdp->fd_cdir);
4877                 }
4878                 if (newfdp->fd_rdir) {
4879                         vnode_rele(newfdp->fd_rdir);
4880                 }
4881
4882                 zfree(fdp_zone, newfdp);
4883                 return NULL;
4884         }
4885         (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE);
4886         proc_fdlock(p);
4887
4888         newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
4889         newfdp->fd_nfiles = i;
4890
4891         if (fdp->fd_nfiles > 0) {
4892                 struct fileproc **fpp;
4893                 char *flags;
4894
4895                 (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles,
4896                     (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofiles));
4897                 (void) memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags,
4898                     (newfdp->fd_lastfile + 1) * sizeof(*fdp->fd_ofileflags));
4899
4900                 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
4901                 flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile];
4902                 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--, flags--) {
4903                         if ((ofp = *fpp) != NULL &&
4904                             0 == (ofp->fp_glob->fg_lflags & FG_CONFINED) &&
4905                             0 == (*flags & (UF_FORKCLOSE | UF_RESERVED))) {
4906 #if DEBUG
4907                                 if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) {
4908                                         panic("complex fileproc");
4909                                 }
4910 #endif
4911                                 fp = fileproc_alloc_init(NULL);
4912                                 if (fp == NULL) {
4913                                         /*
4914                                          * XXX no room to copy, unable to
4915                                          * XXX safely unwind state at present
4916                                          */
4917                                         *fpp = NULL;
4918                                 } else {
4919                                         fp->fp_flags |=
4920                                             (ofp->fp_flags & ~FP_TYPEMASK);
4921                                         fp->fp_glob = ofp->fp_glob;
4922                                         fg_ref(fp->fp_glob);
4923                                         *fpp = fp;
4924                                 }
4925                         } else {
4926                                 *fpp = NULL;
4927                                 *flags = 0;
4928                         }
4929                         if (*fpp == NULL) {
4930                                 if (i == newfdp->fd_lastfile && i > 0) {
4931                                         newfdp->fd_lastfile--;
4932                                 }
4933                                 if (i < newfdp->fd_freefile) {
4934                                         newfdp->fd_freefile = i;
4935                                 }
4936                         }
4937                 }
4938         }
4939
4940         proc_fdunlock(p);
4941
4942         /*
4943          * Initialize knote and kqueue tracking structs
4944          */
4945         newfdp->fd_knlist = NULL;
4946         newfdp->fd_knlistsize = 0;
4947         newfdp->fd_knhash = NULL;
4948         newfdp->fd_knhashmask = 0;
4949         newfdp->fd_kqhash = NULL;
4950         newfdp->fd_kqhashmask = 0;
4951         newfdp->fd_wqkqueue = NULL;
4952         lck_mtx_init(&newfdp->fd_kqhashlock, proc_kqhashlock_grp, proc_lck_attr);
4953         lck_mtx_init(&newfdp->fd_knhashlock, proc_knhashlock_grp, proc_lck_attr);
4954
4955         return newfdp;
4956 }
4957
4958
4959 /*
4960  * fdfree
4961  *
4962  * Description: Release a filedesc (per process open file table) structure;
4963  *              this is done on process exit(), or from forkproc_free() if
4964  *              the fork fails for some reason subsequent to a successful
4965  *              call to fdcopy()
4966  *
4967  * Parameters:  p                               Pointer to process going away
4968  *
4969  * Returns:     void
4970  *
4971  * Locks:       This function internally takes and drops proc_fdlock()
4972  */
4973 void
4974 fdfree(proc_t p)
4975 {
4976         struct filedesc *fdp;
4977         struct fileproc *fp;
4978         struct kqworkq *dealloc_kqwq = NULL;
4979         int i;
4980
4981         proc_fdlock(p);
4982
4983         if (p == kernproc || NULL == (fdp = p->p_fd)) {
4984                 proc_fdunlock(p);
4985                 return;
4986         }
4987
4988         extern struct filedesc filedesc0;
4989
4990         if (&filedesc0 == fdp) {
4991                 panic("filedesc0");
4992         }
4993
4994         /*
4995          * deallocate all the knotes up front and claim empty
4996          * tables to make any subsequent kqueue closes faster.
4997          */
4998         knotes_dealloc(p);
4999         assert(fdp->fd_knlistsize == 0);
5000         assert(fdp->fd_knhashmask == 0);
5001
5002         /*
5003          * dealloc all workloops that have outstanding retains
5004          * when created with scheduling parameters.
5005          */
5006         kqworkloops_dealloc(p);
5007
5008         /* close file descriptors */
5009         if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) {
5010                 for (i = fdp->fd_lastfile; i >= 0; i--) {
5011                         if ((fp = fdp->fd_ofiles[i]) != NULL) {
5012                                 if (fdp->fd_ofileflags[i] & UF_RESERVED) {
5013                                         panic("fdfree: found fp with UF_RESERVED");
5014                                 }
5015                                 fp_close_and_unlock(p, i, fp, 0);
5016                                 proc_fdlock(p);
5017                         }
5018                 }
5019                 FREE(fdp->fd_ofiles, M_OFILETABL);
5020                 fdp->fd_ofiles = NULL;
5021                 fdp->fd_nfiles = 0;
5022         }
5023
5024         if (fdp->fd_wqkqueue) {
5025                 dealloc_kqwq = fdp->fd_wqkqueue;
5026                 fdp->fd_wqkqueue = NULL;
5027         }
5028
5029         proc_fdunlock(p);
5030
5031         if (dealloc_kqwq) {
5032                 kqworkq_dealloc(dealloc_kqwq);
5033         }
5034         if (fdp->fd_cdir) {
5035                 vnode_rele(fdp->fd_cdir);
5036         }
5037         if (fdp->fd_rdir) {
5038                 vnode_rele(fdp->fd_rdir);
5039         }
5040
5041         proc_fdlock_spin(p);
5042         p->p_fd = NULL;
5043         proc_fdunlock(p);
5044
5045         if (fdp->fd_kqhash) {
5046                 for (uint32_t j = 0; j <= fdp->fd_kqhashmask; j++) {
5047                         assert(LIST_EMPTY(&fdp->fd_kqhash[j]));
5048                 }
5049                 hashdestroy(fdp->fd_kqhash, M_KQUEUE, fdp->fd_kqhashmask);
5050         }
5051
5052         lck_mtx_destroy(&fdp->fd_kqhashlock, proc_kqhashlock_grp);
5053         lck_mtx_destroy(&fdp->fd_knhashlock, proc_knhashlock_grp);
5054
5055         zfree(fdp_zone, fdp);
5056 }
5057
5058 /*
5059  * fileproc_drain
5060  *
5061  * Description: Drain out pending I/O operations
5062  *
5063  * Parameters:  p                               Process closing this file
5064  *              fp                              fileproc struct for the open
5065  *                                              instance on the file
5066  *
5067  * Returns:     void
5068  *
5069  * Locks:       Assumes the caller holds the proc_fdlock
5070  *
5071  * Notes:       For character devices, this occurs on the last close of the
5072  *              device; for all other file descriptors, this occurs on each
5073  *              close to prevent fd's from being closed out from under
5074  *              operations currently in progress and blocked
5075  *
5076  * See Also:    file_vnode(), file_socket(), file_drop(), and the cautions
5077  *              regarding their use and interaction with this function.
5078  */
5079 void
5080 fileproc_drain(proc_t p, struct fileproc * fp)
5081 {
5082         struct vfs_context context;
5083         thread_t thread;
5084         bool is_current_proc;
5085
5086         is_current_proc = (p == current_proc());
5087
5088         if (!is_current_proc) {
5089                 proc_lock(p);
5090                 thread = proc_thread(p); /* XXX */
5091                 thread_reference(thread);
5092                 proc_unlock(p);
5093         } else {
5094                 thread = current_thread();
5095         }
5096
5097         context.vc_thread = thread;
5098         context.vc_ucred = fp->fp_glob->fg_cred;
5099
5100         /* Set the vflag for drain */
5101         fileproc_modify_vflags(fp, FPV_DRAIN, FALSE);
5102
5103         while (os_ref_get_count(&fp->fp_iocount) > 1) {
5104                 lck_mtx_convert_spin(&p->p_fdmlock);
5105
5106                 fo_drain(fp, &context);
5107                 if ((fp->fp_flags & FP_INSELECT) == FP_INSELECT) {
5108                         if (waitq_wakeup64_all((struct waitq *)fp->fp_wset, NO_EVENT64,
5109                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5110                                 panic("bad wait queue for waitq_wakeup64_all %p (fp:%p)", fp->fp_wset, fp);
5111                         }
5112                 }
5113                 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5114                         if (waitq_wakeup64_all(&select_conflict_queue, NO_EVENT64,
5115                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES) == KERN_INVALID_ARGUMENT) {
5116                                 panic("bad select_conflict_queue");
5117                         }
5118                 }
5119                 p->p_fpdrainwait = 1;
5120
5121                 msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
5122         }
5123 #if DIAGNOSTIC
5124         if ((fp->fp_flags & FP_INSELECT) != 0) {
5125                 panic("FP_INSELECT set on drained fp");
5126         }
5127 #endif
5128         if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
5129                 fp->fp_flags &= ~FP_SELCONFLICT;
5130         }
5131
5132         if (!is_current_proc) {
5133                 thread_deallocate(thread);
5134         }
5135 }
5136
5137
5138 /*
5139  * fp_free
5140  *
5141  * Description: Release the fd and free the fileproc associated with the fd
5142  *              in the per process open file table of the specified process;
5143  *              these values must correspond.
5144  *
5145  * Parameters:  p                               Process containing fd
5146  *              fd                              fd to be released
5147  *              fp                              fileproc to be freed
5148  */
5149 void
5150 fp_free(proc_t p, int fd, struct fileproc * fp)
5151 {
5152         proc_fdlock_spin(p);
5153         fdrelse(p, fd);
5154         proc_fdunlock(p);
5155
5156         fg_free(fp->fp_glob);
5157         os_ref_release_live(&fp->fp_iocount);
5158         fileproc_free(fp);
5159 }
5160
5161
5162 /*
5163  * sys_flock
5164  *
5165  * Description: Apply an advisory lock on a file descriptor.
5166  *
5167  * Parameters:  p                               Process making request
5168  *              uap->fd                         fd on which the lock is to be
5169  *                                              attempted
5170  *              uap->how                        (Un)Lock bits, including type
5171  *              retval                          Pointer to the call return area
5172  *
5173  * Returns:     0                               Success
5174  *      fp_getfvp:EBADF                         Bad file descriptor
5175  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5176  *      vnode_getwithref:???
5177  *      VNOP_ADVLOCK:???
5178  *
5179  * Implicit returns:
5180  *              *retval (modified)              Size of dtable
5181  *
5182  * Notes:       Just attempt to get a record lock of the requested type on
5183  *              the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
5184  */
5185 int
5186 sys_flock(proc_t p, struct flock_args *uap, __unused int32_t *retval)
5187 {
5188         int fd = uap->fd;
5189         int how = uap->how;
5190         struct fileproc *fp;
5191         struct vnode *vp;
5192         struct flock lf;
5193         vfs_context_t ctx = vfs_context_current();
5194         int error = 0;
5195
5196         AUDIT_ARG(fd, uap->fd);
5197         if ((error = fp_getfvp(p, fd, &fp, &vp))) {
5198                 return error;
5199         }
5200         if ((error = vnode_getwithref(vp))) {
5201                 goto out1;
5202         }
5203         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5204
5205         lf.l_whence = SEEK_SET;
5206         lf.l_start = 0;
5207         lf.l_len = 0;
5208         if (how & LOCK_UN) {
5209                 lf.l_type = F_UNLCK;
5210                 error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
5211                 goto out;
5212         }
5213         if (how & LOCK_EX) {
5214                 lf.l_type = F_WRLCK;
5215         } else if (how & LOCK_SH) {
5216                 lf.l_type = F_RDLCK;
5217         } else {
5218                 error = EBADF;
5219                 goto out;
5220         }
5221 #if CONFIG_MACF
5222         error = mac_file_check_lock(proc_ucred(p), fp->fp_glob, F_SETLK, &lf);
5223         if (error) {
5224                 goto out;
5225         }
5226 #endif
5227         error = VNOP_ADVLOCK(vp, (caddr_t)fp->fp_glob, F_SETLK, &lf,
5228             (how & LOCK_NB ? F_FLOCK : F_FLOCK | F_WAIT),
5229             ctx, NULL);
5230         if (!error) {
5231                 os_atomic_or(&fp->fp_glob->fg_flag, FWASLOCKED, relaxed);
5232         }
5233 out:
5234         (void)vnode_put(vp);
5235 out1:
5236         fp_drop(p, fd, fp, 0);
5237         return error;
5238 }
5239
5240 /*
5241  * sys_fileport_makeport
5242  *
5243  * Description: Obtain a Mach send right for a given file descriptor.
5244  *
5245  * Parameters:  p               Process calling fileport
5246  *              uap->fd         The fd to reference
5247  *              uap->portnamep  User address at which to place port name.
5248  *
5249  * Returns:     0               Success.
5250  *              EBADF           Bad file descriptor.
5251  *              EINVAL          File descriptor had type that cannot be sent, misc. other errors.
5252  *              EFAULT          Address at which to store port name is not valid.
5253  *              EAGAIN          Resource shortage.
5254  *
5255  * Implicit returns:
5256  *              On success, name of send right is stored at user-specified address.
5257  */
5258 int
5259 sys_fileport_makeport(proc_t p, struct fileport_makeport_args *uap,
5260     __unused int *retval)
5261 {
5262         int err;
5263         int fd = uap->fd;
5264         user_addr_t user_portaddr = uap->portnamep;
5265         struct fileproc *fp = FILEPROC_NULL;
5266         struct fileglob *fg = NULL;
5267         ipc_port_t fileport;
5268         mach_port_name_t name = MACH_PORT_NULL;
5269
5270         proc_fdlock(p);
5271         err = fp_lookup(p, fd, &fp, 1);
5272         if (err != 0) {
5273                 goto out_unlock;
5274         }
5275
5276         fg = fp->fp_glob;
5277         if (!fg_sendable(fg)) {
5278                 err = EINVAL;
5279                 goto out_unlock;
5280         }
5281
5282         if (FP_ISGUARDED(fp, GUARD_FILEPORT)) {
5283                 err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT);
5284                 goto out_unlock;
5285         }
5286
5287         proc_fdunlock(p);
5288
5289         /* Allocate and initialize a port */
5290         fileport = fileport_alloc(fg);
5291         if (fileport == IPC_PORT_NULL) {
5292                 err = EAGAIN;
5293                 goto out;
5294         }
5295
5296         /* Dropped when port is deallocated */
5297         fg_ref(fg);
5298
5299         /* Add an entry.  Deallocates port on failure. */
5300         name = ipc_port_copyout_send(fileport, get_task_ipcspace(p->task));
5301         if (!MACH_PORT_VALID(name)) {
5302                 err = EINVAL;
5303                 goto out;
5304         }
5305
5306         err = copyout(&name, user_portaddr, sizeof(mach_port_name_t));
5307         if (err != 0) {
5308                 goto out;
5309         }
5310
5311         /* Tag the fileglob for debugging purposes */
5312         lck_mtx_lock_spin(&fg->fg_lock);
5313         fg->fg_lflags |= FG_PORTMADE;
5314         lck_mtx_unlock(&fg->fg_lock);
5315
5316         fp_drop(p, fd, fp, 0);
5317
5318         return 0;
5319
5320 out_unlock:
5321         proc_fdunlock(p);
5322 out:
5323         if (MACH_PORT_VALID(name)) {
5324                 /* Don't care if another thread races us to deallocate the entry */
5325                 (void) mach_port_deallocate(get_task_ipcspace(p->task), name);
5326         }
5327
5328         if (fp != FILEPROC_NULL) {
5329                 fp_drop(p, fd, fp, 0);
5330         }
5331
5332         return err;
5333 }
5334
5335 void
5336 fileport_releasefg(struct fileglob *fg)
5337 {
5338         (void)fg_drop(PROC_NULL, fg);
5339 }
5340
5341 /*
5342  * fileport_makefd
5343  *
5344  * Description: Obtain the file descriptor for a given Mach send right.
5345  *
5346  * Returns:     0               Success
5347  *              EINVAL          Invalid Mach port name, or port is not for a file.
5348  *      fdalloc:EMFILE
5349  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5350  *
5351  * Implicit returns:
5352  *              *retval (modified)              The new descriptor
5353  */
5354 int
5355 fileport_makefd(proc_t p, ipc_port_t port, int uf_flags, int *retval)
5356 {
5357         struct fileglob *fg;
5358         struct fileproc *fp = FILEPROC_NULL;
5359         int fd;
5360         int err;
5361
5362         fg = fileport_port_to_fileglob(port);
5363         if (fg == NULL) {
5364                 err = EINVAL;
5365                 goto out;
5366         }
5367
5368         fp = fileproc_alloc_init(NULL);
5369         if (fp == FILEPROC_NULL) {
5370                 err = ENOMEM;
5371                 goto out;
5372         }
5373
5374         proc_fdlock(p);
5375         err = fdalloc(p, 0, &fd);
5376         if (err != 0) {
5377                 proc_fdunlock(p);
5378                 goto out;
5379         }
5380         if (uf_flags) {
5381                 *fdflags(p, fd) |= uf_flags;
5382         }
5383
5384         fp->fp_glob = fg;
5385         fg_ref(fg);
5386
5387         procfdtbl_releasefd(p, fd, fp);
5388         proc_fdunlock(p);
5389
5390         *retval = fd;
5391         err = 0;
5392 out:
5393         if ((fp != NULL) && (0 != err)) {
5394                 fileproc_free(fp);
5395         }
5396
5397         return err;
5398 }
5399
5400 /*
5401  * sys_fileport_makefd
5402  *
5403  * Description: Obtain the file descriptor for a given Mach send right.
5404  *
5405  * Parameters:  p               Process calling fileport
5406  *              uap->port       Name of send right to file port.
5407  *
5408  * Returns:     0               Success
5409  *              EINVAL          Invalid Mach port name, or port is not for a file.
5410  *      fdalloc:EMFILE
5411  *      fdalloc:ENOMEM          Unable to allocate fileproc or extend file table.
5412  *
5413  * Implicit returns:
5414  *              *retval (modified)              The new descriptor
5415  */
5416 int
5417 sys_fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
5418 {
5419         ipc_port_t port = IPC_PORT_NULL;
5420         mach_port_name_t send = uap->port;
5421         kern_return_t res;
5422         int err;
5423
5424         res = ipc_object_copyin(get_task_ipcspace(p->task),
5425             send, MACH_MSG_TYPE_COPY_SEND, &port, 0, NULL, IPC_KMSG_FLAGS_ALLOW_IMMOVABLE_SEND);
5426
5427         if (res == KERN_SUCCESS) {
5428                 err = fileport_makefd(p, port, UF_EXCLOSE, retval);
5429         } else {
5430                 err = EINVAL;
5431         }
5432
5433         if (IPC_PORT_NULL != port) {
5434                 ipc_port_release_send(port);
5435         }
5436
5437         return err;
5438 }
5439
5440
5441 /*
5442  * dupfdopen
5443  *
5444  * Description: Duplicate the specified descriptor to a free descriptor;
5445  *              this is the second half of fdopen(), above.
5446  *
5447  * Parameters:  fdp                             filedesc pointer to fill in
5448  *              indx                            fd to dup to
5449  *              dfd                             fd to dup from
5450  *              mode                            mode to set on new fd
5451  *              error                           command code
5452  *
5453  * Returns:     0                               Success
5454  *              EBADF                           Source fd is bad
5455  *              EACCES                          Requested mode not allowed
5456  *              !0                              'error', if not ENODEV or
5457  *                                              ENXIO
5458  *
5459  * Notes:       XXX This is not thread safe; see fdopen() above
5460  */
5461 int
5462 dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
5463 {
5464         struct fileproc *wfp;
5465         struct fileproc *fp;
5466 #if CONFIG_MACF
5467         int myerror;
5468 #endif
5469         proc_t p = current_proc();
5470
5471         /*
5472          * If the to-be-dup'd fd number is greater than the allowed number
5473          * of file descriptors, or the fd to be dup'd has already been
5474          * closed, reject.  Note, check for new == old is necessary as
5475          * falloc could allocate an already closed to-be-dup'd descriptor
5476          * as the new descriptor.
5477          */
5478         proc_fdlock(p);
5479
5480         fp = fdp->fd_ofiles[indx];
5481         if (dfd < 0 || dfd >= fdp->fd_nfiles ||
5482             (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp ||
5483             (fdp->fd_ofileflags[dfd] & UF_RESERVED)) {
5484                 proc_fdunlock(p);
5485                 return EBADF;
5486         }
5487 #if CONFIG_MACF
5488         myerror = mac_file_check_dup(proc_ucred(p), wfp->fp_glob, dfd);
5489         if (myerror) {
5490                 proc_fdunlock(p);
5491                 return myerror;
5492         }
5493 #endif
5494         /*
5495          * There are two cases of interest here.
5496          *
5497          * For ENODEV simply dup (dfd) to file descriptor
5498          * (indx) and return.
5499          *
5500          * For ENXIO steal away the file structure from (dfd) and
5501          * store it in (indx).  (dfd) is effectively closed by
5502          * this operation.
5503          *
5504          * Any other error code is just returned.
5505          */
5506         switch (error) {
5507         case ENODEV:
5508                 if (FP_ISGUARDED(wfp, GUARD_DUP)) {
5509                         proc_fdunlock(p);
5510                         return EPERM;
5511                 }
5512
5513                 /*
5514                  * Check that the mode the file is being opened for is a
5515                  * subset of the mode of the existing descriptor.
5516                  */
5517                 if (((flags & (FREAD | FWRITE)) | wfp->f_flag) != wfp->f_flag) {
5518                         proc_fdunlock(p);
5519                         return EACCES;
5520                 }
5521                 if (indx > fdp->fd_lastfile) {
5522                         fdp->fd_lastfile = indx;
5523                 }
5524
5525                 if (fp->fp_glob) {
5526                         fg_free(fp->fp_glob);
5527                 }
5528                 fg_ref(wfp->fp_glob);
5529                 fp->fp_glob = wfp->fp_glob;
5530
5531                 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
5532                     (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
5533
5534                 proc_fdunlock(p);
5535                 return 0;
5536
5537         default:
5538                 proc_fdunlock(p);
5539                 return error;
5540         }
5541         /* NOTREACHED */
5542 }
5543
5544
5545 /*
5546  * fo_read
5547  *
5548  * Description: Generic fileops read indirected through the fileops pointer
5549  *              in the fileproc structure
5550  *
5551  * Parameters:  fp                              fileproc structure pointer
5552  *              uio                             user I/O structure pointer
5553  *              flags                           FOF_ flags
5554  *              ctx                             VFS context for operation
5555  *
5556  * Returns:     0                               Success
5557  *              !0                              Errno from read
5558  */
5559 int
5560 fo_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5561 {
5562         return (*fp->f_ops->fo_read)(fp, uio, flags, ctx);
5563 }
5564
5565 int
5566 fo_no_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5567 {
5568 #pragma unused(fp, uio, flags, ctx)
5569         return ENXIO;
5570 }
5571
5572
5573 /*
5574  * fo_write
5575  *
5576  * Description: Generic fileops write indirected through the fileops pointer
5577  *              in the fileproc structure
5578  *
5579  * Parameters:  fp                              fileproc structure pointer
5580  *              uio                             user I/O structure pointer
5581  *              flags                           FOF_ flags
5582  *              ctx                             VFS context for operation
5583  *
5584  * Returns:     0                               Success
5585  *              !0                              Errno from write
5586  */
5587 int
5588 fo_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5589 {
5590         return (*fp->f_ops->fo_write)(fp, uio, flags, ctx);
5591 }
5592
5593 int
5594 fo_no_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
5595 {
5596 #pragma unused(fp, uio, flags, ctx)
5597         return ENXIO;
5598 }
5599
5600
5601 /*
5602  * fo_ioctl
5603  *
5604  * Description: Generic fileops ioctl indirected through the fileops pointer
5605  *              in the fileproc structure
5606  *
5607  * Parameters:  fp                              fileproc structure pointer
5608  *              com                             ioctl command
5609  *              data                            pointer to internalized copy
5610  *                                              of user space ioctl command
5611  *                                              parameter data in kernel space
5612  *              ctx                             VFS context for operation
5613  *
5614  * Returns:     0                               Success
5615  *              !0                              Errno from ioctl
5616  *
5617  * Locks:       The caller is assumed to have held the proc_fdlock; this
5618  *              function releases and reacquires this lock.  If the caller
5619  *              accesses data protected by this lock prior to calling this
5620  *              function, it will need to revalidate/reacquire any cached
5621  *              protected data obtained prior to the call.
5622  */
5623 int
5624 fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5625 {
5626         int error;
5627
5628         proc_fdunlock(vfs_context_proc(ctx));
5629         error = (*fp->f_ops->fo_ioctl)(fp, com, data, ctx);
5630         proc_fdlock(vfs_context_proc(ctx));
5631         return error;
5632 }
5633
5634 int
5635 fo_no_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
5636 {
5637 #pragma unused(fp, com, data, ctx)
5638         return ENOTTY;
5639 }
5640
5641
5642 /*
5643  * fo_select
5644  *
5645  * Description: Generic fileops select indirected through the fileops pointer
5646  *              in the fileproc structure
5647  *
5648  * Parameters:  fp                              fileproc structure pointer
5649  *              which                           select which
5650  *              wql                             pointer to wait queue list
5651  *              ctx                             VFS context for operation
5652  *
5653  * Returns:     0                               Success
5654  *              !0                              Errno from select
5655  */
5656 int
5657 fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5658 {
5659         return (*fp->f_ops->fo_select)(fp, which, wql, ctx);
5660 }
5661
5662 int
5663 fo_no_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx)
5664 {
5665 #pragma unused(fp, which, wql, ctx)
5666         return ENOTSUP;
5667 }
5668
5669
5670 /*
5671  * fo_close
5672  *
5673  * Description: Generic fileops close indirected through the fileops pointer
5674  *              in the fileproc structure
5675  *
5676  * Parameters:  fp                              fileproc structure pointer for
5677  *                                              file to close
5678  *              ctx                             VFS context for operation
5679  *
5680  * Returns:     0                               Success
5681  *              !0                              Errno from close
5682  */
5683 int
5684 fo_close(struct fileglob *fg, vfs_context_t ctx)
5685 {
5686         return (*fg->fg_ops->fo_close)(fg, ctx);
5687 }
5688
5689
5690 /*
5691  * fo_drain
5692  *
5693  * Description: Generic fileops kqueue filter indirected through the fileops
5694  *              pointer in the fileproc structure
5695  *
5696  * Parameters:  fp                              fileproc structure pointer
5697  *              ctx                             VFS context for operation
5698  *
5699  * Returns:     0                               Success
5700  *              !0                              errno from drain
5701  */
5702 int
5703 fo_drain(struct fileproc *fp, vfs_context_t ctx)
5704 {
5705         return (*fp->f_ops->fo_drain)(fp, ctx);
5706 }
5707
5708 int
5709 fo_no_drain(struct fileproc *fp, vfs_context_t ctx)
5710 {
5711 #pragma unused(fp, ctx)
5712         return ENOTSUP;
5713 }
5714
5715
5716 /*
5717  * fo_kqfilter
5718  *
5719  * Description: Generic fileops kqueue filter indirected through the fileops
5720  *              pointer in the fileproc structure
5721  *
5722  * Parameters:  fp                              fileproc structure pointer
5723  *              kn                              pointer to knote to filter on
5724  *
5725  * Returns:     (kn->kn_flags & EV_ERROR)       error in kn->kn_data
5726  *              0                               Filter is not active
5727  *              !0                              Filter is active
5728  */
5729 int
5730 fo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5731 {
5732         return (*fp->f_ops->fo_kqfilter)(fp, kn, kev);
5733 }
5734
5735 int
5736 fo_no_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
5737 {
5738 #pragma unused(fp, kev)
5739         knote_set_error(kn, ENOTSUP);
5740         return 0;
5741 }
5742
5743
5744 struct fileproc *
5745 fileproc_alloc_init(__unused void *arg)
5746 {
5747         struct fileproc *fp = zalloc_flags(fp_zone, Z_WAITOK | Z_ZERO);
5748
5749         os_ref_init(&fp->fp_iocount, &f_refgrp);
5750         return fp;
5751 }
5752
5753
5754 void
5755 fileproc_free(struct fileproc *fp)
5756 {
5757         os_ref_count_t __unused refc = os_ref_release(&fp->fp_iocount);
5758 #if DEVELOPMENT || DEBUG
5759         if (0 != refc) {
5760                 panic("%s: pid %d refc: %u != 0",
5761                     __func__, proc_pid(current_proc()), refc);
5762         }
5763 #endif
5764         switch (FILEPROC_TYPE(fp)) {
5765         case FTYPE_SIMPLE:
5766                 zfree(fp_zone, fp);
5767                 break;
5768         case FTYPE_GUARDED:
5769                 guarded_fileproc_free(fp);
5770                 break;
5771         default:
5772                 panic("%s: corrupt fp %p flags %x", __func__, fp, fp->fp_flags);
5773         }
5774 }
5775
5776 void
5777 fileproc_modify_vflags(struct fileproc *fp, fileproc_vflags_t vflags, boolean_t clearflags)
5778 {
5779         if (clearflags) {
5780                 os_atomic_andnot(&fp->fp_vflags, vflags, relaxed);
5781         } else {
5782                 os_atomic_or(&fp->fp_vflags, vflags, relaxed);
5783         }
5784 }
5785
5786 fileproc_vflags_t
5787 fileproc_get_vflags(struct fileproc *fp)
5788 {
5789         return os_atomic_load(&fp->fp_vflags, relaxed);
5790 }