bsd/vfs/vfs_vnops.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
  67  *
  68  */
  69 /*
  70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  71  * support for mandatory and extensible security protections.  This notice
  72  * is included in support of clause 2.2 (b) of the Apple Public License,
  73  * Version 2.0.
  74  */
  75
  76 #include <sys/param.h>
  77 #include <sys/types.h>
  78 #include <sys/systm.h>
  79 #include <sys/kernel.h>
  80 #include <sys/file_internal.h>
  81 #include <sys/stat.h>
  82 #include <sys/proc_internal.h>
  83 #include <sys/kauth.h>
  84 #include <sys/mount_internal.h>
  85 #include <sys/namei.h>
  86 #include <sys/vnode_internal.h>
  87 #include <sys/ioctl.h>
  88 #include <sys/tty.h>
  89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
  90 #define ubc_setcred ubc_setcred_deprecated
  91 #include <sys/ubc.h>
  92 #undef ubc_setcred
  93 int     ubc_setcred(struct vnode *, struct proc *);
  94 #include <sys/conf.h>
  95 #include <sys/disk.h>
  96 #include <sys/fsevents.h>
  97 #include <sys/kdebug.h>
  98 #include <sys/xattr.h>
  99 #include <sys/ubc_internal.h>
 100 #include <sys/uio_internal.h>
 101 #include <sys/resourcevar.h>
 102 #include <sys/signalvar.h>
 103
 104 #include <vm/vm_kern.h>
 105 #include <vm/vm_map.h>
 106
 107 #include <miscfs/specfs/specdev.h>
 108 #include <miscfs/fifofs/fifo.h>
 109
 110 #if CONFIG_MACF
 111 #include <security/mac_framework.h>
 112 #endif
 113
 114
 115 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
 116 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
 117                         vfs_context_t ctx);
 118 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
 119                         vfs_context_t ctx);
 120 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
 121                         vfs_context_t ctx);
 122 static int vn_select( struct fileproc *fp, int which, void * wql,
 123                         vfs_context_t ctx);
 124 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
 125                         vfs_context_t ctx);
 126 static void filt_vndetach(struct knote *kn);
 127 static int filt_vnode(struct knote *kn, long hint);
 128 static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
 129 #if 0
 130 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
 131                         vfs_context_t ctx);
 132 #endif
 133
 134 struct  fileops vnops =
 135         { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
 136
 137 struct  filterops vnode_filtops = {
 138         .f_isfd = 1,
 139         .f_attach = NULL,
 140         .f_detach = filt_vndetach,
 141         .f_event = filt_vnode
 142 };
 143
 144 /*
 145  * Common code for vnode open operations.
 146  * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
 147  *
 148  * XXX the profusion of interfaces here is probably a bad thing.
 149  */
 150 int
 151 vn_open(struct nameidata *ndp, int fmode, int cmode)
 152 {
 153         return(vn_open_modflags(ndp, &fmode, cmode));
 154 }
 155
 156 int
 157 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
 158 {
 159         struct vnode_attr va;
 160
 161         VATTR_INIT(&va);
 162         VATTR_SET(&va, va_mode, cmode);
 163
 164         return(vn_open_auth(ndp, fmodep, &va));
 165 }
 166
 167 static int
 168 vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
 169 {
 170         int error;
 171
 172         if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
 173                 goto bad;
 174         }
 175
 176         /* call out to allow 3rd party notification of open.
 177          * Ignore result of kauth_authorize_fileop call.
 178          */
 179         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
 180                                                    (uintptr_t)vp, 0);
 181
 182         return 0;
 183
 184 bad:
 185         return error;
 186
 187 }
 188
 189 /*
 190  * May do nameidone() to allow safely adding an FSEvent.  Cue off of ni_dvp to
 191  * determine whether that has happened.
 192  */
 193 static int
 194 vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
 195 {
 196         uint32_t status = 0;
 197         vnode_t dvp = ndp->ni_dvp;
 198         int batched;
 199         int error;
 200         vnode_t vp;
 201
 202         batched = vnode_compound_open_available(ndp->ni_dvp);
 203         *did_open = FALSE;
 204
 205         VATTR_SET(vap, va_type, VREG);
 206         if (fmode & O_EXCL)
 207                 vap->va_vaflags |= VA_EXCLUSIVE;
 208
 209 #if NAMEDRSRCFORK
 210         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
 211                 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
 212                         goto out;
 213                 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
 214                         goto out;
 215                 *did_create = TRUE;
 216         } else {
 217 #endif
 218                 if (!batched) {
 219                         if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
 220                                 goto out;
 221                 }
 222
 223                 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
 224                 if (error != 0) {
 225                         if (batched) {
 226                                 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
 227                         } else {
 228                                 *did_create = FALSE;
 229                         }
 230
 231                         if (error == EKEEPLOOKING) {
 232                                 if (*did_create) {
 233                                         panic("EKEEPLOOKING, but we did a create?");
 234                                 }
 235                                 if (!batched) {
 236                                         panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
 237                                 }
 238                                 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
 239                                         panic("EKEEPLOOKING, but continue flag not set?");
 240                                 }
 241
 242                                 /*
 243                                  * Do NOT drop the dvp: we need everything to continue the lookup.
 244                                  */
 245                                 return error;
 246                         }
 247                 } else {
 248                         if (batched) {
 249                                 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
 250                                 *did_open = TRUE;
 251                         } else {
 252                                 *did_create = TRUE;
 253                         }
 254                 }
 255 #if NAMEDRSRCFORK
 256         }
 257 #endif
 258
 259         /*
 260         * Unlock the fsnode (if locked) here so that we are free
 261         * to drop the dvp iocount and prevent deadlock in build_path().
 262         * nameidone() will still do the right thing later.
 263         */
 264         vp = ndp->ni_vp;
 265         namei_unlock_fsnode(ndp);
 266
 267         if (*did_create) {
 268                 int     update_flags = 0;
 269
 270                 // Make sure the name & parent pointers are hooked up
 271                 if (vp->v_name == NULL)
 272                         update_flags |= VNODE_UPDATE_NAME;
 273                 if (vp->v_parent == NULLVP)
 274                         update_flags |= VNODE_UPDATE_PARENT;
 275
 276                 if (update_flags)
 277                         vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
 278
 279                 vnode_put(dvp);
 280                 ndp->ni_dvp = NULLVP;
 281
 282 #if CONFIG_FSE
 283                 if (need_fsevent(FSE_CREATE_FILE, vp)) {
 284                         add_fsevent(FSE_CREATE_FILE, ctx,
 285                                         FSE_ARG_VNODE, vp,
 286                                         FSE_ARG_DONE);
 287                 }
 288 #endif
 289         }
 290 out:
 291         if (ndp->ni_dvp != NULLVP) {
 292                 vnode_put(dvp);
 293                 ndp->ni_dvp = NULLVP;
 294         }
 295
 296         return error;
 297 }
 298
 299 /*
 300  * Open a file with authorization, updating the contents of the structures
 301  * pointed to by ndp, fmodep, and vap as necessary to perform the requested
 302  * operation.  This function is used for both opens of existing files, and
 303  * creation of new files.
 304  *
 305  * Parameters:  ndp                     The nami data pointer describing the
 306  *                                      file
 307  *              fmodep                  A pointer to an int containg the mode
 308  *                                      information to be used for the open
 309  *              vap                     A pointer to the vnode attribute
 310  *                                      descriptor to be used for the open
 311  *
 312  * Indirect:    *                       Contents of the data structures pointed
 313  *                                      to by the parameters are modified as
 314  *                                      necessary to the requested operation.
 315  *
 316  * Returns:     0                       Success
 317  *              !0                      errno value
 318  *
 319  * Notes:       The kauth_filesec_t in 'vap', if any, is in host byte order.
 320  *
 321  *              The contents of '*ndp' will be modified, based on the other
 322  *              arguments to this function, and to return file and directory
 323  *              data necessary to satisfy the requested operation.
 324  *
 325  *              If the file does not exist and we are creating it, then the
 326  *              O_TRUNC flag will be cleared in '*fmodep' to indicate to the
 327  *              caller that the file was not truncated.
 328  *
 329  *              If the file exists and the O_EXCL flag was not specified, then
 330  *              the O_CREAT flag will be cleared in '*fmodep' to indicate to
 331  *              the caller that the existing file was merely opened rather
 332  *              than created.
 333  *
 334  *              The contents of '*vap' will be modified as necessary to
 335  *              complete the operation, including setting of supported
 336  *              attribute, clearing of fields containing unsupported attributes
 337  *              in the request, if the request proceeds without them, etc..
 338  *
 339  * XXX:         This function is too complicated in actings on its arguments
 340  *
 341  * XXX:         We should enummerate the possible errno values here, and where
 342  *              in the code they originated.
 343  */
 344 int
 345 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
 346 {
 347         struct vnode *vp;
 348         struct vnode *dvp;
 349         vfs_context_t ctx = ndp->ni_cnd.cn_context;
 350         int error;
 351         int fmode;
 352         uint32_t origcnflags;
 353         boolean_t did_create;
 354         boolean_t did_open;
 355         boolean_t need_vnop_open;
 356         boolean_t batched;
 357         boolean_t ref_failed;
 358
 359 again:
 360         vp = NULL;
 361         dvp = NULL;
 362         batched = FALSE;
 363         did_create = FALSE;
 364         need_vnop_open = TRUE;
 365         ref_failed = FALSE;
 366         fmode = *fmodep;
 367         origcnflags = ndp->ni_cnd.cn_flags;
 368
 369         /*
 370          * O_CREAT
 371          */
 372         if (fmode & O_CREAT) {
 373                 if ( (fmode & O_DIRECTORY) ) {
 374                         error = EINVAL;
 375                         goto out;
 376                 }
 377                 ndp->ni_cnd.cn_nameiop = CREATE;
 378 #if CONFIG_TRIGGERS
 379                 ndp->ni_op = OP_LINK;
 380 #endif
 381                 /* Inherit USEDVP, vnode_open() supported flags only */
 382                 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
 383                 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
 384                 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
 385 #if NAMEDRSRCFORK
 386                 /* open calls are allowed for resource forks. */
 387                 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
 388 #endif
 389                 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
 390                         ndp->ni_cnd.cn_flags |= FOLLOW;
 391
 392 continue_create_lookup:
 393                 if ( (error = namei(ndp)) )
 394                         goto out;
 395
 396                 dvp = ndp->ni_dvp;
 397                 vp = ndp->ni_vp;
 398
 399                 batched = vnode_compound_open_available(dvp);
 400
 401                 /* not found, create */
 402                 if (vp == NULL) {
 403                         /* must have attributes for a new file */
 404                         if (vap == NULL) {
 405                                 error = EINVAL;
 406                                 goto out;
 407                         }
 408                         /*
 409                          * Attempt a create.   For a system supporting compound VNOPs, we may
 410                          * find an existing file or create one; in either case, we will already
 411                          * have the file open and no VNOP_OPEN() will be needed.
 412                          */
 413                         error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
 414
 415                         dvp = ndp->ni_dvp;
 416                         vp = ndp->ni_vp;
 417
 418                         /*
 419                          * Detected a node that the filesystem couldn't handle.  Don't call
 420                          * nameidone() yet, because we need that path buffer.
 421                          */
 422                         if (error == EKEEPLOOKING) {
 423                                 if (!batched) {
 424                                         panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
 425                                 }
 426                                 goto continue_create_lookup;
 427                         }
 428
 429                         nameidone(ndp);
 430                         if (dvp) {
 431                                 panic("Shouldn't have a dvp here.");
 432                         }
 433
 434                         if (error) {
 435                                 /*
 436                                  * Check for a creation or unlink race.
 437                                  */
 438                                 if (((error == EEXIST) && !(fmode & O_EXCL)) ||
 439                                                 ((error == ENOENT) && (fmode & O_CREAT))){
 440                                         if (vp)
 441                                                 vnode_put(vp);
 442                                         goto again;
 443                                 }
 444                                 goto bad;
 445                         }
 446
 447                         need_vnop_open = !did_open;
 448                 } else {
 449                         if (fmode & O_EXCL)
 450                                 error = EEXIST;
 451
 452                         /*
 453                          * We have a vnode.  Use compound open if available
 454                          * or else fall through to "traditional" path.  Note: can't
 455                          * do a compound open for root, because the parent belongs
 456                          * to a different FS.
 457                          */
 458                         if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
 459                                 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
 460
 461                                 if (error == 0) {
 462                                         vp = ndp->ni_vp;
 463                                         need_vnop_open = FALSE;
 464                                 } else if (error == EKEEPLOOKING) {
 465                                         if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
 466                                                 panic("EKEEPLOOKING, but continue flag not set?");
 467                                         }
 468                                         goto continue_create_lookup;
 469                                 }
 470                         }
 471                         nameidone(ndp);
 472                         vnode_put(dvp);
 473                         ndp->ni_dvp = NULLVP;
 474
 475                         if (error) {
 476                                 goto bad;
 477                         }
 478
 479                         fmode &= ~O_CREAT;
 480
 481                         /* Fall through */
 482                 }
 483         } else {
 484                 /*
 485                  * Not O_CREAT
 486                  */
 487                 ndp->ni_cnd.cn_nameiop = LOOKUP;
 488                 /* Inherit USEDVP, vnode_open() supported flags only */
 489                 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
 490                 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
 491 #if NAMEDRSRCFORK
 492                 /* open calls are allowed for resource forks. */
 493                 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
 494 #endif
 495                 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
 496
 497                 /* preserve NOFOLLOW from vnode_open() */
 498                 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
 499                         ndp->ni_cnd.cn_flags &= ~FOLLOW;
 500                 }
 501
 502                 /* Do a lookup, possibly going directly to filesystem for compound operation */
 503                 do {
 504                         if ( (error = namei(ndp)) )
 505                                 goto out;
 506                         vp = ndp->ni_vp;
 507                         dvp = ndp->ni_dvp;
 508
 509                         /* Check for batched lookup-open */
 510                         batched = vnode_compound_open_available(dvp);
 511                         if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
 512                                 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
 513                                 vp = ndp->ni_vp;
 514                                 if (error == 0) {
 515                                         need_vnop_open = FALSE;
 516                                 } else if (error == EKEEPLOOKING) {
 517                                         if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
 518                                                 panic("EKEEPLOOKING, but continue flag not set?");
 519                                         }
 520                                 }
 521                         }
 522                 } while (error == EKEEPLOOKING);
 523
 524                 nameidone(ndp);
 525                 vnode_put(dvp);
 526                 ndp->ni_dvp = NULLVP;
 527
 528                 if (error) {
 529                         goto bad;
 530                 }
 531         }
 532
 533         /*
 534          * By this point, nameidone() is called, dvp iocount is dropped,
 535          * and dvp pointer is cleared.
 536          */
 537         if (ndp->ni_dvp != NULLVP) {
 538                 panic("Haven't cleaned up adequately in vn_open_auth()");
 539         }
 540
 541         /*
 542          * Expect to use this code for filesystems without compound VNOPs, for the root
 543          * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
 544          * and for shadow files, which do not live on the same filesystems as their "parents."
 545          */
 546         if (need_vnop_open) {
 547                 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
 548                         panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
 549                 }
 550
 551                 if (!did_create) {
 552                         error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
 553                         if (error) {
 554                                 goto bad;
 555                         }
 556                 }
 557
 558                 error = VNOP_OPEN(vp, fmode, ctx);
 559                 if (error) {
 560                         goto bad;
 561                 }
 562                 need_vnop_open = FALSE;
 563         }
 564
 565         // if the vnode is tagged VOPENEVT and the current process
 566         // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
 567         // flag to the open mode so that this open won't count against
 568         // the vnode when carbon delete() does a vnode_isinuse() to see
 569         // if a file is currently in use.  this allows spotlight
 570         // importers to not interfere with carbon apps that depend on
 571         // the no-delete-if-busy semantics of carbon delete().
 572         //
 573         if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
 574                 fmode |= O_EVTONLY;
 575         }
 576
 577         /*
 578          * Grab reference, etc.
 579          */
 580         error = vn_open_auth_finish(vp, fmode, ctx);
 581         if (error) {
 582                 ref_failed = TRUE;
 583                 goto bad;
 584         }
 585
 586         /* Compound VNOP open is responsible for doing the truncate */
 587         if (batched || did_create)
 588                 fmode &= ~O_TRUNC;
 589
 590         *fmodep = fmode;
 591         return (0);
 592
 593 bad:
 594         /* Opened either explicitly or by a batched create */
 595         if (!need_vnop_open) {
 596                 VNOP_CLOSE(vp, fmode, ctx);
 597         }
 598
 599         ndp->ni_vp = NULL;
 600         if (vp) {
 601 #if NAMEDRSRCFORK
 602                 /* Aggressively recycle shadow files if we error'd out during open() */
 603                 if ((vnode_isnamedstream(vp)) &&
 604                         (vp->v_parent != NULLVP) &&
 605                         (vnode_isshadow(vp))) {
 606                                 vnode_recycle(vp);
 607                 }
 608 #endif
 609                 vnode_put(vp);
 610                 /*
 611                  * Check for a race against unlink.  We had a vnode
 612                  * but according to vnode_authorize or VNOP_OPEN it
 613                  * no longer exists.
 614                  *
 615                  * EREDRIVEOPEN: means that we were hit by the tty allocation race.
 616                  */
 617                 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
 618                         goto again;
 619                 }
 620         }
 621
 622 out:
 623         return (error);
 624 }
 625
 626 #if vn_access_DEPRECATED
 627 /*
 628  * Authorize an action against a vnode.  This has been the canonical way to
 629  * ensure that the credential/process/etc. referenced by a vfs_context
 630  * is granted the rights called out in 'mode' against the vnode 'vp'.
 631  *
 632  * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
 633  * to add support for more rights.  As such, this interface will be deprecated
 634  * and callers will use vnode_authorize instead.
 635  */
 636 int
 637 vn_access(vnode_t vp, int mode, vfs_context_t context)
 638 {
 639         kauth_action_t  action;
 640
 641         action = 0;
 642         if (mode & VREAD)
 643                 action |= KAUTH_VNODE_READ_DATA;
 644         if (mode & VWRITE)
 645                 action |= KAUTH_VNODE_WRITE_DATA;
 646         if (mode & VEXEC)
 647                 action |= KAUTH_VNODE_EXECUTE;
 648
 649         return(vnode_authorize(vp, NULL, action, context));
 650 }
 651 #endif  /* vn_access_DEPRECATED */
 652
 653 /*
 654  * Vnode close call
 655  */
 656 int
 657 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
 658 {
 659         int error;
 660
 661 #if NAMEDRSRCFORK
 662         /* Sync data from resource fork shadow file if needed. */
 663         if ((vp->v_flag & VISNAMEDSTREAM) &&
 664             (vp->v_parent != NULLVP) &&
 665             vnode_isshadow(vp)) {
 666                 if (flags & FWASWRITTEN) {
 667                         (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
 668                 }
 669         }
 670 #endif
 671
 672         /* work around for foxhound */
 673         if (vnode_isspec(vp))
 674                 (void)vnode_rele_ext(vp, flags, 0);
 675
 676         error = VNOP_CLOSE(vp, flags, ctx);
 677
 678 #if CONFIG_FSE
 679         if (flags & FWASWRITTEN) {
 680                 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
 681                         add_fsevent(FSE_CONTENT_MODIFIED, ctx,
 682                                     FSE_ARG_VNODE, vp,
 683                                     FSE_ARG_DONE);
 684                 }
 685         }
 686 #endif
 687
 688         if (!vnode_isspec(vp))
 689                 (void)vnode_rele_ext(vp, flags, 0);
 690
 691         return (error);
 692 }
 693
 694 static int
 695 vn_read_swapfile(
 696         struct vnode    *vp,
 697         uio_t           uio)
 698 {
 699         int     error;
 700         off_t   swap_count, this_count;
 701         off_t   file_end, read_end;
 702         off_t   prev_resid;
 703         char    *my_swap_page;
 704
 705         /*
 706          * Reading from a swap file will get you zeroes.
 707          */
 708
 709         my_swap_page = NULL;
 710         error = 0;
 711         swap_count = uio_resid(uio);
 712
 713         file_end = ubc_getsize(vp);
 714         read_end = uio->uio_offset + uio_resid(uio);
 715         if (uio->uio_offset >= file_end) {
 716                 /* uio starts after end of file: nothing to read */
 717                 swap_count = 0;
 718         } else if (read_end > file_end) {
 719                 /* uio extends beyond end of file: stop before that */
 720                 swap_count -= (read_end - file_end);
 721         }
 722
 723         while (swap_count > 0) {
 724                 if (my_swap_page == NULL) {
 725                         MALLOC(my_swap_page, char *, PAGE_SIZE,
 726                                M_TEMP, M_WAITOK);
 727                         memset(my_swap_page, '\0', PAGE_SIZE);
 728                         /* add an end-of-line to keep line counters happy */
 729                         my_swap_page[PAGE_SIZE-1] = '\n';
 730                 }
 731                 this_count = swap_count;
 732                 if (this_count > PAGE_SIZE) {
 733                         this_count = PAGE_SIZE;
 734                 }
 735
 736                 prev_resid = uio_resid(uio);
 737                 error = uiomove((caddr_t) my_swap_page,
 738                                 this_count,
 739                                 uio);
 740                 if (error) {
 741                         break;
 742                 }
 743                 swap_count -= (prev_resid - uio_resid(uio));
 744         }
 745         if (my_swap_page != NULL) {
 746                 FREE(my_swap_page, M_TEMP);
 747                 my_swap_page = NULL;
 748         }
 749
 750         return error;
 751 }
 752 /*
 753  * Package up an I/O request on a vnode into a uio and do it.
 754  */
 755 int
 756 vn_rdwr(
 757         enum uio_rw rw,
 758         struct vnode *vp,
 759         caddr_t base,
 760         int len,
 761         off_t offset,
 762         enum uio_seg segflg,
 763         int ioflg,
 764         kauth_cred_t cred,
 765         int *aresid,
 766         proc_t p)
 767 {
 768         int64_t resid;
 769         int result;
 770
 771         result = vn_rdwr_64(rw,
 772                         vp,
 773                         (uint64_t)(uintptr_t)base,
 774                         (int64_t)len,
 775                         offset,
 776                         segflg,
 777                         ioflg,
 778                         cred,
 779                         &resid,
 780                         p);
 781
 782         /* "resid" should be bounded above by "len," which is an int */
 783         if (aresid != NULL) {
 784                 *aresid = resid;
 785         }
 786
 787         return result;
 788 }
 789
 790
 791 int
 792 vn_rdwr_64(
 793         enum uio_rw rw,
 794         struct vnode *vp,
 795         uint64_t base,
 796         int64_t len,
 797         off_t offset,
 798         enum uio_seg segflg,
 799         int ioflg,
 800         kauth_cred_t cred,
 801         int64_t *aresid,
 802         proc_t p)
 803 {
 804         uio_t auio;
 805         int spacetype;
 806         struct vfs_context context;
 807         int error=0;
 808         char uio_buf[ UIO_SIZEOF(1) ];
 809
 810         context.vc_thread = current_thread();
 811         context.vc_ucred = cred;
 812
 813         if (UIO_SEG_IS_USER_SPACE(segflg)) {
 814                 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
 815         }
 816         else {
 817                 spacetype = UIO_SYSSPACE;
 818         }
 819         auio = uio_createwithbuffer(1, offset, spacetype, rw,
 820                                                                   &uio_buf[0], sizeof(uio_buf));
 821         uio_addiov(auio, base, len);
 822
 823 #if CONFIG_MACF
 824         /* XXXMAC
 825          *      IO_NOAUTH should be re-examined.
 826          *      Likely that mediation should be performed in caller.
 827          */
 828         if ((ioflg & IO_NOAUTH) == 0) {
 829         /* passed cred is fp->f_cred */
 830                 if (rw == UIO_READ)
 831                         error = mac_vnode_check_read(&context, cred, vp);
 832                 else
 833                         error = mac_vnode_check_write(&context, cred, vp);
 834         }
 835 #endif
 836
 837         if (error == 0) {
 838                 if (rw == UIO_READ) {
 839                         if (vnode_isswap(vp)) {
 840                                 error = vn_read_swapfile(vp, auio);
 841                         } else {
 842                                 error = VNOP_READ(vp, auio, ioflg, &context);
 843                         }
 844                 } else {
 845                         error = VNOP_WRITE(vp, auio, ioflg, &context);
 846                 }
 847         }
 848
 849         if (aresid)
 850                 *aresid = uio_resid(auio);
 851         else
 852                 if (uio_resid(auio) && error == 0)
 853                         error = EIO;
 854         return (error);
 855 }
 856
 857 /*
 858  * File table vnode read routine.
 859  */
 860 static int
 861 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 862 {
 863         struct vnode *vp;
 864         int error, ioflag;
 865         off_t count;
 866
 867         vp = (struct vnode *)fp->f_fglob->fg_data;
 868         if ( (error = vnode_getwithref(vp)) ) {
 869                 return(error);
 870         }
 871
 872 #if CONFIG_MACF
 873         error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
 874         if (error) {
 875                 (void)vnode_put(vp);
 876                 return (error);
 877         }
 878 #endif
 879
 880         ioflag = 0;
 881         if (fp->f_fglob->fg_flag & FNONBLOCK)
 882                 ioflag |= IO_NDELAY;
 883         if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
 884                 ioflag |= IO_NOCACHE;
 885         if (fp->f_fglob->fg_flag & FNORDAHEAD)
 886                 ioflag |= IO_RAOFF;
 887
 888         if ((flags & FOF_OFFSET) == 0)
 889                 uio->uio_offset = fp->f_fglob->fg_offset;
 890         count = uio_resid(uio);
 891
 892         if (vnode_isswap(vp)) {
 893                 /* special case for swap files */
 894                 error = vn_read_swapfile(vp, uio);
 895         } else {
 896                 error = VNOP_READ(vp, uio, ioflag, ctx);
 897         }
 898         if ((flags & FOF_OFFSET) == 0)
 899                 fp->f_fglob->fg_offset += count - uio_resid(uio);
 900
 901         (void)vnode_put(vp);
 902         return (error);
 903 }
 904
 905
 906 /*
 907  * File table vnode write routine.
 908  */
 909 static int
 910 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 911 {
 912         struct vnode *vp;
 913         int error, ioflag;
 914         off_t count;
 915         int clippedsize = 0;
 916         int partialwrite=0;
 917         int residcount, oldcount;
 918         proc_t p = vfs_context_proc(ctx);
 919
 920         count = 0;
 921         vp = (struct vnode *)fp->f_fglob->fg_data;
 922         if ( (error = vnode_getwithref(vp)) ) {
 923                 return(error);
 924         }
 925
 926 #if CONFIG_MACF
 927         error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
 928         if (error) {
 929                 (void)vnode_put(vp);
 930                 return (error);
 931         }
 932 #endif
 933
 934         ioflag = IO_UNIT;
 935         if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
 936                 ioflag |= IO_APPEND;
 937         if (fp->f_fglob->fg_flag & FNONBLOCK)
 938                 ioflag |= IO_NDELAY;
 939         if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
 940                 ioflag |= IO_NOCACHE;
 941         if (fp->f_fglob->fg_flag & FNODIRECT)
 942                 ioflag |= IO_NODIRECT;
 943
 944         /*
 945          * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
 946          *
 947          * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay
 948          * XXX the non-essential metadata without some additional VFS work;
 949          * XXX the intent at this point is to plumb the interface for it.
 950          */
 951         if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) ||
 952                 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) {
 953                 ioflag |= IO_SYNC;
 954         }
 955
 956         if ((flags & FOF_OFFSET) == 0) {
 957                 uio->uio_offset = fp->f_fglob->fg_offset;
 958                 count = uio_resid(uio);
 959         }
 960         if (((flags & FOF_OFFSET) == 0) &&
 961                 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
 962             (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
 963              ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
 964                 /*
 965                  * If the requested residual would cause us to go past the
 966                  * administrative limit, then we need to adjust the residual
 967                  * down to cause fewer bytes than requested to be written.  If
 968                  * we can't do that (e.g. the residual is already 1 byte),
 969                  * then we fail the write with EFBIG.
 970                  */
 971                 residcount = uio_resid(uio);
 972                 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 973                         clippedsize =  (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
 974                 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
 975                         clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
 976                 }
 977                 if (clippedsize >= residcount) {
 978                         psignal(p, SIGXFSZ);
 979                         vnode_put(vp);
 980                         return (EFBIG);
 981                 }
 982                 partialwrite = 1;
 983                 uio_setresid(uio, residcount-clippedsize);
 984         }
 985         if ((flags & FOF_OFFSET) != 0) {
 986                 /* for pwrite, append should  be ignored */
 987                 ioflag &= ~IO_APPEND;
 988                 if (p && (vp->v_type == VREG) &&
 989                 ((rlim_t)uio->uio_offset  >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
 990                 psignal(p, SIGXFSZ);
 991                 vnode_put(vp);
 992                 return (EFBIG);
 993         }
 994                 if (p && (vp->v_type == VREG) &&
 995                         ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
 996                         //Debugger("vn_bwrite:overstepping the bounds");
 997                         residcount = uio_resid(uio);
 998                         clippedsize =  (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
 999                         partialwrite = 1;
1000                         uio_setresid(uio, residcount-clippedsize);
1001                 }
1002         }
1003
1004         error = VNOP_WRITE(vp, uio, ioflag, ctx);
1005
1006         if (partialwrite) {
1007                 oldcount = uio_resid(uio);
1008                 uio_setresid(uio, oldcount + clippedsize);
1009         }
1010
1011         if ((flags & FOF_OFFSET) == 0) {
1012                 if (ioflag & IO_APPEND)
1013                         fp->f_fglob->fg_offset = uio->uio_offset;
1014                 else
1015                         fp->f_fglob->fg_offset += count - uio_resid(uio);
1016         }
1017
1018         /*
1019          * Set the credentials on successful writes
1020          */
1021         if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
1022                 /*
1023                  * When called from aio subsystem, we only have the proc from
1024                  * which to get the credential, at this point, so use that
1025                  * instead.  This means aio functions are incompatible with
1026                  * per-thread credentials (aio operations are proxied).  We
1027                  * can't easily correct the aio vs. settid race in this case
1028                  * anyway, so we disallow it.
1029                  */
1030                 if ((flags & FOF_PCRED) == 0) {
1031                         ubc_setthreadcred(vp, p, current_thread());
1032                 } else {
1033                         ubc_setcred(vp, p);
1034                 }
1035         }
1036         (void)vnode_put(vp);
1037         return (error);
1038 }
1039
1040 /*
1041  * File table vnode stat routine.
1042  *
1043  * Returns:     0                       Success
1044  *              EBADF
1045  *              ENOMEM
1046  *      vnode_getattr:???
1047  */
1048 int
1049 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1050 {
1051         struct vnode_attr va;
1052         int error;
1053         u_short mode;
1054         kauth_filesec_t fsec;
1055         struct stat *sb = (struct stat *)0;     /* warning avoidance ; protected by isstat64 */
1056         struct stat64 * sb64 = (struct stat64 *)0;  /* warning avoidance ; protected by isstat64 */
1057
1058         if (isstat64 != 0)
1059                 sb64 = (struct stat64 *)sbptr;
1060         else
1061                 sb = (struct stat *)sbptr;
1062         memset(&va, 0, sizeof(va));
1063         VATTR_INIT(&va);
1064         VATTR_WANTED(&va, va_fsid);
1065         VATTR_WANTED(&va, va_fileid);
1066         VATTR_WANTED(&va, va_mode);
1067         VATTR_WANTED(&va, va_type);
1068         VATTR_WANTED(&va, va_nlink);
1069         VATTR_WANTED(&va, va_uid);
1070         VATTR_WANTED(&va, va_gid);
1071         VATTR_WANTED(&va, va_rdev);
1072         VATTR_WANTED(&va, va_data_size);
1073         VATTR_WANTED(&va, va_access_time);
1074         VATTR_WANTED(&va, va_modify_time);
1075         VATTR_WANTED(&va, va_change_time);
1076         VATTR_WANTED(&va, va_create_time);
1077         VATTR_WANTED(&va, va_flags);
1078         VATTR_WANTED(&va, va_gen);
1079         VATTR_WANTED(&va, va_iosize);
1080         /* lower layers will synthesise va_total_alloc from va_data_size if required */
1081         VATTR_WANTED(&va, va_total_alloc);
1082         if (xsec != NULL) {
1083                 VATTR_WANTED(&va, va_uuuid);
1084                 VATTR_WANTED(&va, va_guuid);
1085                 VATTR_WANTED(&va, va_acl);
1086         }
1087         error = vnode_getattr(vp, &va, ctx);
1088         if (error)
1089                 goto out;
1090         /*
1091          * Copy from vattr table
1092          */
1093         if (isstat64 != 0) {
1094                 sb64->st_dev = va.va_fsid;
1095                 sb64->st_ino = (ino64_t)va.va_fileid;
1096
1097         } else {
1098                 sb->st_dev = va.va_fsid;
1099                 sb->st_ino = (ino_t)va.va_fileid;
1100         }
1101         mode = va.va_mode;
1102         switch (vp->v_type) {
1103         case VREG:
1104                 mode |= S_IFREG;
1105                 break;
1106         case VDIR:
1107                 mode |= S_IFDIR;
1108                 break;
1109         case VBLK:
1110                 mode |= S_IFBLK;
1111                 break;
1112         case VCHR:
1113                 mode |= S_IFCHR;
1114                 break;
1115         case VLNK:
1116                 mode |= S_IFLNK;
1117                 break;
1118         case VSOCK:
1119                 mode |= S_IFSOCK;
1120                 break;
1121         case VFIFO:
1122                 mode |= S_IFIFO;
1123                 break;
1124         default:
1125                 error = EBADF;
1126                 goto out;
1127         };
1128         if (isstat64 != 0) {
1129                 sb64->st_mode = mode;
1130                 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1131                 sb64->st_uid = va.va_uid;
1132                 sb64->st_gid = va.va_gid;
1133                 sb64->st_rdev = va.va_rdev;
1134                 sb64->st_size = va.va_data_size;
1135                 sb64->st_atimespec = va.va_access_time;
1136                 sb64->st_mtimespec = va.va_modify_time;
1137                 sb64->st_ctimespec = va.va_change_time;
1138                 sb64->st_birthtimespec =
1139                                 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
1140                 sb64->st_blksize = va.va_iosize;
1141                 sb64->st_flags = va.va_flags;
1142                 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1143         } else {
1144                 sb->st_mode = mode;
1145                 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1146                 sb->st_uid = va.va_uid;
1147                 sb->st_gid = va.va_gid;
1148                 sb->st_rdev = va.va_rdev;
1149                 sb->st_size = va.va_data_size;
1150                 sb->st_atimespec = va.va_access_time;
1151                 sb->st_mtimespec = va.va_modify_time;
1152                 sb->st_ctimespec = va.va_change_time;
1153                 sb->st_blksize = va.va_iosize;
1154                 sb->st_flags = va.va_flags;
1155                 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1156         }
1157
1158         /* if we're interested in extended security data and we got an ACL */
1159         if (xsec != NULL) {
1160                 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
1161                     !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
1162                     !VATTR_IS_SUPPORTED(&va, va_guuid)) {
1163                         *xsec = KAUTH_FILESEC_NONE;
1164                 } else {
1165
1166                         if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1167                                 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
1168                         } else {
1169                                 fsec = kauth_filesec_alloc(0);
1170                         }
1171                         if (fsec == NULL) {
1172                                 error = ENOMEM;
1173                                 goto out;
1174                         }
1175                         fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
1176                         if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
1177                                 fsec->fsec_owner = va.va_uuuid;
1178                         } else {
1179                                 fsec->fsec_owner = kauth_null_guid;
1180                         }
1181                         if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
1182                                 fsec->fsec_group = va.va_guuid;
1183                         } else {
1184                                 fsec->fsec_group = kauth_null_guid;
1185                         }
1186                         if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1187                                 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
1188                         } else {
1189                                 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1190                         }
1191                         *xsec = fsec;
1192                 }
1193         }
1194
1195         /* Do not give the generation number out to unpriviledged users */
1196         if (va.va_gen && !vfs_context_issuser(ctx)) {
1197                 if (isstat64 != 0)
1198                         sb64->st_gen = 0;
1199                 else
1200                         sb->st_gen = 0;
1201         } else {
1202                 if (isstat64 != 0)
1203                         sb64->st_gen = va.va_gen;
1204                 else
1205                         sb->st_gen = va.va_gen;
1206         }
1207
1208         error = 0;
1209 out:
1210         if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1211                 kauth_acl_free(va.va_acl);
1212         return (error);
1213 }
1214
1215 int
1216 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1217 {
1218         int error;
1219
1220 #if CONFIG_MACF
1221         error = mac_vnode_check_stat(ctx, NOCRED, vp);
1222         if (error)
1223                 return (error);
1224 #endif
1225
1226         /* authorize */
1227         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1228                 return(error);
1229
1230         /* actual stat */
1231         return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1232 }
1233
1234
1235 /*
1236  * File table vnode ioctl routine.
1237  */
1238 static int
1239 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1240 {
1241         struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1242         off_t file_size;
1243         int error;
1244         struct vnode *ttyvp;
1245         int funnel_state;
1246         struct session * sessp;
1247
1248         if ( (error = vnode_getwithref(vp)) ) {
1249                 return(error);
1250         }
1251
1252 #if CONFIG_MACF
1253         error = mac_vnode_check_ioctl(ctx, vp, com);
1254         if (error)
1255                 goto out;
1256 #endif
1257
1258         switch (vp->v_type) {
1259         case VREG:
1260         case VDIR:
1261                 if (com == FIONREAD) {
1262                         if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1263                                 goto out;
1264                         *(int *)data = file_size - fp->f_fglob->fg_offset;
1265                         goto out;
1266                 }
1267                 if (com == FIONBIO || com == FIOASYNC) {        /* XXX */
1268                         goto out;
1269                 }
1270                 /* fall into ... */
1271
1272         default:
1273                 error = ENOTTY;
1274                 goto out;
1275
1276         case VFIFO:
1277         case VCHR:
1278         case VBLK:
1279
1280                 /* Should not be able to set block size from user space */
1281                 if (com == DKIOCSETBLOCKSIZE) {
1282                         error = EPERM;
1283                         goto out;
1284                 }
1285
1286                 if (com == FIODTYPE) {
1287                         if (vp->v_type == VBLK) {
1288                                 if (major(vp->v_rdev) >= nblkdev) {
1289                                         error = ENXIO;
1290                                         goto out;
1291                                 }
1292                                 *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
1293
1294                         } else if (vp->v_type == VCHR) {
1295                                 if (major(vp->v_rdev) >= nchrdev) {
1296                                         error = ENXIO;
1297                                         goto out;
1298                                 }
1299                                 *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
1300                         } else {
1301                                 error = ENOTTY;
1302                                 goto out;
1303                         }
1304                         goto out;
1305                 }
1306                 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1307
1308                 if (error == 0 && com == TIOCSCTTY) {
1309                         error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
1310                         if (error != 0) {
1311                                 panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
1312                         }
1313
1314                         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1315                         sessp = proc_session(vfs_context_proc(ctx));
1316
1317                         session_lock(sessp);
1318                         ttyvp = sessp->s_ttyvp;
1319                         sessp->s_ttyvp = vp;
1320                         sessp->s_ttyvid = vnode_vid(vp);
1321                         session_unlock(sessp);
1322                         session_rele(sessp);
1323                         thread_funnel_set(kernel_flock, funnel_state);
1324
1325                         if (ttyvp)
1326                                 vnode_rele(ttyvp);
1327                 }
1328         }
1329 out:
1330         (void)vnode_put(vp);
1331         return(error);
1332 }
1333
1334 /*
1335  * File table vnode select routine.
1336  */
1337 static int
1338 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1339 {
1340         int error;
1341         struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1342         struct vfs_context context;
1343
1344         if ( (error = vnode_getwithref(vp)) == 0 ) {
1345                 context.vc_thread = current_thread();
1346                 context.vc_ucred = fp->f_fglob->fg_cred;
1347
1348 #if CONFIG_MACF
1349                 /*
1350                  * XXX We should use a per thread credential here; minimally,
1351                  * XXX the process credential should have a persistent
1352                  * XXX reference on it before being passed in here.
1353                  */
1354                 error = mac_vnode_check_select(ctx, vp, which);
1355                 if (error == 0)
1356 #endif
1357                 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1358
1359                 (void)vnode_put(vp);
1360         }
1361         return(error);
1362
1363 }
1364
1365 /*
1366  * File table vnode close routine.
1367  */
1368 static int
1369 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1370 {
1371         struct vnode *vp = (struct vnode *)fg->fg_data;
1372         int error;
1373         struct flock lf;
1374
1375         if ( (error = vnode_getwithref(vp)) == 0 ) {
1376
1377                 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1378                         lf.l_whence = SEEK_SET;
1379                         lf.l_start = 0;
1380                         lf.l_len = 0;
1381                         lf.l_type = F_UNLCK;
1382
1383                         (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1384                 }
1385                 error = vn_close(vp, fg->fg_flag, ctx);
1386
1387                 (void)vnode_put(vp);
1388         }
1389         return(error);
1390 }
1391
1392 /*
1393  * Returns:     0                       Success
1394  *      VNOP_PATHCONF:???
1395  */
1396 int
1397 vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
1398 {
1399         int     error = 0;
1400         struct vfs_attr vfa;
1401
1402         switch(name) {
1403         case _PC_EXTENDED_SECURITY_NP:
1404                 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1405                 break;
1406         case _PC_AUTH_OPAQUE_NP:
1407                 *retval = vfs_authopaque(vnode_mount(vp));
1408                 break;
1409         case _PC_2_SYMLINKS:
1410                 *retval = 1;    /* XXX NOTSUP on MSDOS, etc. */
1411                 break;
1412         case _PC_ALLOC_SIZE_MIN:
1413                 *retval = 1;    /* XXX lie: 1 byte */
1414                 break;
1415         case _PC_ASYNC_IO:      /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1416                 *retval = 1;    /* [AIO] option is supported */
1417                 break;
1418         case _PC_PRIO_IO:       /* unistd.h: _POSIX_PRIORITIZED_IO */
1419                 *retval = 0;    /* [PIO] option is not supported */
1420                 break;
1421         case _PC_REC_INCR_XFER_SIZE:
1422                 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1423                 break;
1424         case _PC_REC_MIN_XFER_SIZE:
1425                 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1426                 break;
1427         case _PC_REC_MAX_XFER_SIZE:
1428                 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1429                 break;
1430         case _PC_REC_XFER_ALIGN:
1431                 *retval = 4096; /* XXX recommend page aligned buffers */
1432                 break;
1433         case _PC_SYMLINK_MAX:
1434                 *retval = 255;  /* Minimum acceptable POSIX value */
1435                 break;
1436         case _PC_SYNC_IO:       /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1437                 *retval = 0;    /* [SIO] option is not supported */
1438                 break;
1439         case _PC_XATTR_SIZE_BITS:
1440                 /* The number of bits used to store maximum extended
1441                  * attribute size in bytes.  For example, if the maximum
1442                  * attribute size supported by a file system is 128K, the
1443                  * value returned will be 18.  However a value 18 can mean
1444                  * that the maximum attribute size can be anywhere from
1445                  * (256KB - 1) to 128KB.  As a special case, the resource
1446                  * fork can have much larger size, and some file system
1447                  * specific extended attributes can have smaller and preset
1448                  * size; for example, Finder Info is always 32 bytes.
1449                  */
1450                 memset(&vfa, 0, sizeof(vfa));
1451                 VFSATTR_INIT(&vfa);
1452                 VFSATTR_WANTED(&vfa, f_capabilities);
1453                 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
1454                     (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) &&
1455                     (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1456                     (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1457                         /* Supports native extended attributes */
1458                         error = VNOP_PATHCONF(vp, name, retval, ctx);
1459                 } else {
1460                         /* Number of bits used to represent the maximum size of
1461                          * extended attribute stored in an Apple Double file.
1462                          */
1463                         *retval = AD_XATTR_SIZE_BITS;
1464                 }
1465                 break;
1466         default:
1467                 error = VNOP_PATHCONF(vp, name, retval, ctx);
1468                 break;
1469         }
1470
1471         return (error);
1472 }
1473
1474 static int
1475 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1476 {
1477         int error;
1478         struct vnode *vp;
1479
1480         vp = (struct vnode *)fp->f_fglob->fg_data;
1481
1482         /*
1483          * Don't attach a knote to a dead vnode.
1484          */
1485         if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) {
1486                 switch (kn->kn_filter) {
1487                         case EVFILT_READ:
1488                         case EVFILT_WRITE:
1489                                 if (vnode_isfifo(vp)) {
1490                                         /* We'll only watch FIFOs that use our fifofs */
1491                                         if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) {
1492                                                 error = ENOTSUP;
1493                                         }
1494
1495                                 } else if (!vnode_isreg(vp)) {
1496                                         if (vnode_ischr(vp) &&
1497                                                         (error = spec_kqfilter(vp, kn)) == 0) {
1498                                                 /* claimed by a special device */
1499                                                 vnode_put(vp);
1500                                                 return 0;
1501                                         }
1502
1503                                         error = EINVAL;
1504                                 }
1505                                 break;
1506                         case EVFILT_VNODE:
1507                                 break;
1508                         default:
1509                                 error = EINVAL;
1510                 }
1511
1512                 if (error) {
1513                         vnode_put(vp);
1514                         return error;
1515                 }
1516
1517 #if CONFIG_MACF
1518                 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1519                 if (error) {
1520                         vnode_put(vp);
1521                         return error;
1522                 }
1523 #endif
1524
1525                 kn->kn_hook = (void*)vp;
1526                 kn->kn_hookid = vnode_vid(vp);
1527                 kn->kn_fop = &vnode_filtops;
1528
1529                 vnode_lock(vp);
1530                 KNOTE_ATTACH(&vp->v_knotes, kn);
1531                 vnode_unlock(vp);
1532
1533                 /* Ask the filesystem to provide remove notifications, but ignore failure */
1534                 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn,  ctx);
1535
1536                 vnode_put(vp);
1537         }
1538
1539         return (error);
1540 }
1541
1542 static void
1543 filt_vndetach(struct knote *kn)
1544 {
1545         vfs_context_t ctx = vfs_context_current();
1546         struct vnode *vp;
1547         vp = (struct vnode *)kn->kn_hook;
1548         if (vnode_getwithvid(vp, kn->kn_hookid))
1549                 return;
1550
1551         vnode_lock(vp);
1552         KNOTE_DETACH(&vp->v_knotes, kn);
1553         vnode_unlock(vp);
1554
1555         /*
1556          * Tell a (generally networked) filesystem that we're no longer watching
1557          * If the FS wants to track contexts, it should still be using the one from
1558          * the VNODE_MONITOR_BEGIN.
1559          */
1560         VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx);
1561         vnode_put(vp);
1562 }
1563
1564
1565 /*
1566  * Used for EVFILT_READ
1567  *
1568  * Takes only VFIFO or VREG. vnode is locked.  We handle the "poll" case
1569  * differently than the regular case for VREG files.  If not in poll(),
1570  * then we need to know current fileproc offset for VREG.
1571  */
1572 static intptr_t
1573 vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
1574 {
1575         if (vnode_isfifo(vp)) {
1576                 int cnt;
1577                 int err = fifo_charcount(vp, &cnt);
1578                 if (err == 0) {
1579                         return (intptr_t)cnt;
1580                 } else {
1581                         return (intptr_t)0;
1582                 }
1583         } else if (vnode_isreg(vp)) {
1584                 if (ispoll) {
1585                         return (intptr_t)1;
1586                 }
1587
1588                 off_t amount;
1589                 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset;
1590                 if (amount > (off_t)INTPTR_MAX) {
1591                         return INTPTR_MAX;
1592                 } else if (amount < (off_t)INTPTR_MIN) {
1593                         return INTPTR_MIN;
1594                 } else {
1595                         return (intptr_t)amount;
1596                 }
1597         } else {
1598                 panic("Should never have an EVFILT_READ except for reg or fifo.");
1599                 return 0;
1600         }
1601 }
1602
1603 /*
1604  * Used for EVFILT_WRITE.
1605  *
1606  * For regular vnodes, we can always write (1).  For named pipes,
1607  * see how much space there is in the buffer.  Nothing else is covered.
1608  */
1609 static intptr_t
1610 vnode_writable_space_count(vnode_t vp)
1611 {
1612         if (vnode_isfifo(vp)) {
1613                 long spc;
1614                 int err = fifo_freespace(vp, &spc);
1615                 if (err == 0) {
1616                         return (intptr_t)spc;
1617                 } else {
1618                         return (intptr_t)0;
1619                 }
1620         } else if (vnode_isreg(vp)) {
1621                 return (intptr_t)1;
1622         } else {
1623                 panic("Should never have an EVFILT_READ except for reg or fifo.");
1624                 return 0;
1625         }
1626 }
1627
1628 /*
1629  * Determine whether this knote should be active
1630  *
1631  * This is kind of subtle.
1632  *      --First, notice if the vnode has been revoked: in so, override hint
1633  *      --EVFILT_READ knotes are checked no matter what the hint is
1634  *      --Other knotes activate based on hint.
1635  *      --If hint is revoke, set special flags and activate
1636  */
1637 static int
1638 filt_vnode(struct knote *kn, long hint)
1639 {
1640         vnode_t vp = (struct vnode *)kn->kn_hook;
1641         int activate = 0;
1642         long orig_hint = hint;
1643
1644         if (0 == hint) {
1645                 vnode_lock(vp);
1646
1647                 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
1648                         /* Is recycled */
1649                         hint = NOTE_REVOKE;
1650                 }
1651         } else {
1652                 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
1653         }
1654
1655         /* Special handling for vnodes that are in recycle or already gone */
1656         if (NOTE_REVOKE == hint) {
1657                 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1658                 activate = 1;
1659
1660                 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) {
1661                         kn->kn_fflags |= NOTE_REVOKE;
1662                 }
1663         } else {
1664                 switch(kn->kn_filter) {
1665                         case EVFILT_READ:
1666                                 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL));
1667
1668                                 if (kn->kn_data != 0) {
1669                                         activate = 1;
1670                                 }
1671                                 break;
1672                         case EVFILT_WRITE:
1673                                 kn->kn_data = vnode_writable_space_count(vp);
1674
1675                                 if (kn->kn_data != 0) {
1676                                         activate = 1;
1677                                 }
1678                                 break;
1679                         case EVFILT_VNODE:
1680                                 /* Check events this note matches against the hint */
1681                                 if (kn->kn_sfflags & hint) {
1682                                         kn->kn_fflags |= hint; /* Set which event occurred */
1683                                 }
1684                                 if (kn->kn_fflags != 0) {
1685                                         activate = 1;
1686                                 }
1687                                 break;
1688                         default:
1689                                 panic("Invalid knote filter on a vnode!\n");
1690                 }
1691         }
1692
1693         if (orig_hint == 0) {
1694                 /*
1695                  * Definitely need to unlock, may need to put
1696                  */
1697                 if (hint == 0) {
1698                         vnode_put_locked(vp);
1699                 }
1700                 vnode_unlock(vp);
1701         }
1702
1703         return (activate);
1704 }