xnu-2782.40.9.tar.gz

[apple/xnu.git] / bsd / vfs / vfs_vnops.c
diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c

index d7e2b5f1488a6bc89ef1416c23cde2d4b786b0b7..9b431080fc494d6bc088d6589a377615b9260b0f 100644 (file)
--- a/bsd/vfs/vfs_vnops.c
+++ b/bsd/vfs/vfs_vnops.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -111,6 +111,11 @@ int        ubc_setcred(struct vnode *, struct proc *);
  #include <security/mac_framework.h>
  #endif
  
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+
+extern void    sigpup_attach_vnode(vnode_t); /* XXX */
  
  static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
  static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
@@ -131,8 +136,16 @@ static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
                         vfs_context_t ctx);
  #endif
  
-struct         fileops vnops =
-       { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
+const struct fileops vnops = {
+       DTYPE_VNODE,
+       vn_read,
+       vn_write,
+       vn_ioctl,
+       vn_select,
+       vn_closefile,
+       vn_kqfilt_add,
+       NULL
+};
  
  struct  filterops vnode_filtops = { 
         .f_isfd = 1, 
@@ -173,12 +186,17 @@ vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
                 goto bad;
         }
  
-       /* call out to allow 3rd party notification of open. 
+       /* Call out to allow 3rd party notification of open. 
          * Ignore result of kauth_authorize_fileop call.
          */
+#if CONFIG_MACF
+       mac_vnode_notify_open(ctx, vp, fmode);
+#endif
         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 
                                                    (uintptr_t)vp, 0);
  
+       sigpup_attach_vnode(vp);
+
         return 0;
  
  bad:
@@ -256,13 +274,7 @@ vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode,
         }
  #endif
  
-       /* 
-       * Unlock the fsnode (if locked) here so that we are free
-       * to drop the dvp iocount and prevent deadlock in build_path().
-       * nameidone() will still do the right thing later.
-       */
         vp = ndp->ni_vp;
-       namei_unlock_fsnode(ndp);
  
         if (*did_create) {
                 int     update_flags = 0;
@@ -379,7 +391,7 @@ again:
                 ndp->ni_op = OP_LINK;
  #endif
                 /* Inherit USEDVP, vnode_open() supported flags only */
-               ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
+               ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT);
                 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
                 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
  #if NAMEDRSRCFORK
@@ -402,6 +414,7 @@ continue_create_lookup:
                 if (vp == NULL) {
                         /* must have attributes for a new file */
                         if (vap == NULL) {
+                               vnode_put(dvp);
                                 error = EINVAL;
                                 goto out;
                         }
@@ -445,7 +458,8 @@ continue_create_lookup:
                         }
  
                         need_vnop_open = !did_open;
-               } else {
+               } 
+               else {
                         if (fmode & O_EXCL)
                                 error = EEXIST;
  
@@ -480,13 +494,14 @@ continue_create_lookup:
  
                         /* Fall through */
                 }
-       } else {
+       }
+    else {
                 /*
                  * Not O_CREAT
                  */
                 ndp->ni_cnd.cn_nameiop = LOOKUP;
                 /* Inherit USEDVP, vnode_open() supported flags only */
-               ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
+               ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT);
                 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
  #if NAMEDRSRCFORK
                 /* open calls are allowed for resource forks. */
@@ -555,6 +570,25 @@ continue_create_lookup:
                         }
                 }
  
+#if CONFIG_PROTECT
+               /* 
+                * Perform any content protection access checks prior to calling 
+                * into the filesystem, if the raw encrypted mode was not 
+                * requested.  
+                * 
+                * If the va_dataprotect_flags are NOT active, or if they are,
+                * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need 
+                * to perform the checks.
+                */
+               if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
+                               ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
+                       error = cp_handle_open (vp, fmode);     
+                       if (error) {
+                               goto bad;
+                       }
+               }
+#endif
+
                 error = VNOP_OPEN(vp, fmode, ctx);
                 if (error) {
                         goto bad;
@@ -657,6 +691,7 @@ int
  vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
  {
         int error;
+       int flusherror = 0;
  
  #if NAMEDRSRCFORK
         /* Sync data from resource fork shadow file if needed. */
@@ -664,7 +699,7 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
             (vp->v_parent != NULLVP) &&
             vnode_isshadow(vp)) {
                 if (flags & FWASWRITTEN) {
-                       (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
+                       flusherror = vnode_flushnamedstream(vp->v_parent, vp, ctx);
                 }
         }
  #endif
@@ -673,6 +708,18 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
         if (vnode_isspec(vp))
                 (void)vnode_rele_ext(vp, flags, 0);
  
+       /*
+        * On HFS, we flush when the last writer closes.  We do this
+        * because resource fork vnodes hold a reference on data fork
+        * vnodes and that will prevent them from getting VNOP_INACTIVE
+        * which will delay when we flush cached data.  In future, we
+        * might find it beneficial to do this for all file systems.
+        * Note that it's OK to access v_writecount without the lock
+        * in this context.
+        */
+       if (vp->v_tag == VT_HFS && (flags & FWRITE) && vp->v_writecount == 1)
+               VNOP_FSYNC(vp, MNT_NOWAIT, ctx);
+
         error = VNOP_CLOSE(vp, flags, ctx);
  
  #if CONFIG_FSE
@@ -688,6 +735,9 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
         if (!vnode_isspec(vp))
                 (void)vnode_rele_ext(vp, flags, 0);
         
+       if (flusherror) {
+               error = flusherror;
+       }
         return (error);
  }
  
@@ -836,7 +886,7 @@ vn_rdwr_64(
  
         if (error == 0) {
                 if (rw == UIO_READ) {
-                       if (vnode_isswap(vp)) {
+                       if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) {
                                 error = vn_read_swapfile(vp, auio);
                         } else {
                                 error = VNOP_READ(vp, auio, ioflg, &context);
@@ -854,6 +904,35 @@ vn_rdwr_64(
         return (error);
  }
  
+static inline void
+vn_offset_lock(struct fileglob *fg)
+{
+       lck_mtx_lock_spin(&fg->fg_lock);
+       while (fg->fg_lflags & FG_OFF_LOCKED) {
+               fg->fg_lflags |= FG_OFF_LOCKWANT;
+               msleep(&fg->fg_lflags, &fg->fg_lock, PVFS | PSPIN,
+                   "fg_offset_lock_wait", 0);
+       }
+       fg->fg_lflags |= FG_OFF_LOCKED;
+       lck_mtx_unlock(&fg->fg_lock);
+}
+
+static inline void
+vn_offset_unlock(struct fileglob *fg)
+{
+       int lock_wanted = 0;
+
+       lck_mtx_lock_spin(&fg->fg_lock);
+       if (fg->fg_lflags & FG_OFF_LOCKWANT) {
+               lock_wanted = 1;
+       }
+       fg->fg_lflags &= ~(FG_OFF_LOCKED | FG_OFF_LOCKWANT);
+       lck_mtx_unlock(&fg->fg_lock);
+       if (lock_wanted) {
+               wakeup(&fg->fg_lflags);
+       }
+}
+
  /*
   * File table vnode read routine.
   */
@@ -861,8 +940,10 @@ static int
  vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
  {
         struct vnode *vp;
-       int error, ioflag;
+       int error;
+       int ioflag;
         off_t count;
+       int offset_locked = 0;
  
         vp = (struct vnode *)fp->f_fglob->fg_data;
         if ( (error = vnode_getwithref(vp)) ) {
@@ -877,16 +958,26 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
         }
  #endif
  
-       ioflag = 0;
+       /* This signals to VNOP handlers that this read came from a file table read */
+       ioflag = IO_SYSCALL_DISPATCH;
+
         if (fp->f_fglob->fg_flag & FNONBLOCK)
                 ioflag |= IO_NDELAY;
         if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
-               ioflag |= IO_NOCACHE;
+           ioflag |= IO_NOCACHE;
+       if (fp->f_fglob->fg_flag & FENCRYPTED) {
+               ioflag |= IO_ENCRYPTED;
+       }
         if (fp->f_fglob->fg_flag & FNORDAHEAD)
-               ioflag |= IO_RAOFF;
+           ioflag |= IO_RAOFF;
  
-       if ((flags & FOF_OFFSET) == 0)
+       if ((flags & FOF_OFFSET) == 0) {
+               if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) {
+                       vn_offset_lock(fp->f_fglob);
+                       offset_locked = 1;
+               }
                 uio->uio_offset = fp->f_fglob->fg_offset;
+       }
         count = uio_resid(uio);
  
         if (vnode_isswap(vp)) {
@@ -895,8 +986,13 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
         } else {
                 error = VNOP_READ(vp, uio, ioflag, ctx);
         }
-       if ((flags & FOF_OFFSET) == 0)
+       if ((flags & FOF_OFFSET) == 0) {
                 fp->f_fglob->fg_offset += count - uio_resid(uio);
+               if (offset_locked) {
+                       vn_offset_unlock(fp->f_fglob);
+                       offset_locked = 0;
+               }
+       }
  
         (void)vnode_put(vp);
         return (error);
@@ -915,6 +1011,7 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
         int clippedsize = 0;
         int partialwrite=0;
         int residcount, oldcount;
+       int offset_locked = 0;
         proc_t p = vfs_context_proc(ctx);
  
         count = 0;
@@ -931,7 +1028,12 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
         }
  #endif
  
-       ioflag = IO_UNIT;
+       /*
+        * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from
+        * a file table write
+        */
+       ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH);
+
         if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
                 ioflag |= IO_APPEND;
         if (fp->f_fglob->fg_flag & FNONBLOCK)
@@ -940,6 +1042,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
                 ioflag |= IO_NOCACHE;
         if (fp->f_fglob->fg_flag & FNODIRECT)
                 ioflag |= IO_NODIRECT;
+       if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
+               ioflag |= IO_SINGLE_WRITER;
  
         /*
          * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
@@ -954,6 +1058,10 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
         }
  
         if ((flags & FOF_OFFSET) == 0) {
+               if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) {
+                       vn_offset_lock(fp->f_fglob);
+                       offset_locked = 1;
+               }
                 uio->uio_offset = fp->f_fglob->fg_offset;
                 count = uio_resid(uio);
         }
@@ -976,8 +1084,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
                 }
                 if (clippedsize >= residcount) {
                         psignal(p, SIGXFSZ);
-                       vnode_put(vp);
-                       return (EFBIG);
+                       error = EFBIG;
+                       goto error_out;
                 }
                 partialwrite = 1;
                 uio_setresid(uio, residcount-clippedsize);
@@ -988,8 +1096,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
                 if (p && (vp->v_type == VREG) &&
                 ((rlim_t)uio->uio_offset  >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
                 psignal(p, SIGXFSZ);
-               vnode_put(vp);
-               return (EFBIG);
+               error = EFBIG;
+               goto error_out;
         }
                 if (p && (vp->v_type == VREG) &&
                         ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
@@ -1013,6 +1121,10 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
                         fp->f_fglob->fg_offset = uio->uio_offset;
                 else
                         fp->f_fglob->fg_offset += count - uio_resid(uio);
+               if (offset_locked) {
+                       vn_offset_unlock(fp->f_fglob);
+                       offset_locked = 0;
+               }
         }
  
         /*
@@ -1035,6 +1147,13 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
         }
         (void)vnode_put(vp);
         return (error);
+
+error_out:
+       if (offset_locked) {
+               vn_offset_unlock(fp->f_fglob);
+       }
+       (void)vnode_put(vp);
+       return (error);
  }
  
  /*
@@ -1242,7 +1361,6 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
         off_t file_size;
         int error;
         struct vnode *ttyvp;
-       int funnel_state;
         struct session * sessp;
         
         if ( (error = vnode_getwithref(vp)) ) {
@@ -1306,12 +1424,6 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
                 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
  
                 if (error == 0 && com == TIOCSCTTY) {
-                       error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
-                       if (error != 0) {
-                               panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
-                       }
-
-                       funnel_state = thread_funnel_set(kernel_flock, TRUE);
                         sessp = proc_session(vfs_context_proc(ctx));
  
                         session_lock(sessp);
@@ -1320,10 +1432,6 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
                         sessp->s_ttyvid = vnode_vid(vp);
                         session_unlock(sessp);
                         session_rele(sessp);
-                       thread_funnel_set(kernel_flock, funnel_state);
-
-                       if (ttyvp)
-                               vnode_rele(ttyvp);
                 }
         }
  out:
@@ -1374,13 +1482,14 @@ vn_closefile(struct fileglob *fg, vfs_context_t ctx)
  
         if ( (error = vnode_getwithref(vp)) == 0 ) {
  
-               if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
+               if ((fg->fg_flag & FHASLOCK) &&
+                   FILEGLOB_DTYPE(fg) == DTYPE_VNODE) {
                         lf.l_whence = SEEK_SET;
                         lf.l_start = 0;
                         lf.l_len = 0;
                         lf.l_type = F_UNLCK;
  
-                       (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
+                       (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
                 }
                 error = vn_close(vp, fg->fg_flag, ctx);
  
@@ -1573,11 +1682,14 @@ static intptr_t
  vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
  {
         if (vnode_isfifo(vp)) {
+#if FIFO
                 int cnt;
                 int err = fifo_charcount(vp, &cnt);
                 if (err == 0) {
                         return (intptr_t)cnt;
-               } else {
+               } else 
+#endif
+               {
                         return (intptr_t)0;
                 }
         } else if (vnode_isreg(vp)) {
@@ -1610,11 +1722,14 @@ static intptr_t
  vnode_writable_space_count(vnode_t vp) 
  {
         if (vnode_isfifo(vp)) {
+#if FIFO
                 long spc;
                 int err = fifo_freespace(vp, &spc);
                 if (err == 0) {
                         return (intptr_t)spc;
-               } else {
+               } else 
+#endif
+               {
                         return (intptr_t)0;
                 }
         } else if (vnode_isreg(vp)) {