X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..bd504ef0e0b883cdd7917b73b3574eb9ce669905:/bsd/hfs/hfs_cnode.c

diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c
index 8703ecb9b..65f2825d0 100644
--- a/bsd/hfs/hfs_cnode.c
+++ b/bsd/hfs/hfs_cnode.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2013 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -37,6 +37,7 @@
 #include <sys/quota.h>
 #include <sys/kdebug.h>
 #include <libkern/OSByteOrder.h>
+#include <sys/buf_internal.h>
 
 #include <kern/locks.h>
 
@@ -59,11 +60,12 @@ static void  hfs_reclaim_cnode(struct cnode *);
 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
 static int hfs_isordered(struct cnode *, struct cnode *);
 
+extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
+
 __inline__ int hfs_checkdeleted (struct cnode *cp) {
 	return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);	
 }
 
-
 /*
  * Function used by a special fcntl() that decorates a cnode/vnode that
  * indicates it is backing another filesystem, like a disk image.
@@ -77,7 +79,7 @@ int hfs_set_backingstore (struct vnode *vp, int val) {
 	int err = 0;
 	
 	cp = VTOC(vp);
-	if (vnode_isdir(vp)) {
+	if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
 		return EINVAL;
 	}
 
@@ -113,7 +115,7 @@ int hfs_is_backingstore (struct vnode *vp, int *val) {
 	struct cnode *cp = NULL;
 	int err = 0;
 
-	if (!vnode_isreg(vp)) {
+	if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
 		*val = 0;
 		return 0;
 	}
@@ -240,8 +242,35 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
 		(!ISSET(cp->c_flag, C_NOEXISTS)) &&
 		(VTOF(vp)->ff_blocks) &&
 		(reclaim == 0)) {
+		/* 
+		 * Note that if content protection is enabled, then this is where we will
+		 * attempt to issue IOs for all dirty regions of this file.  
+		 *
+		 * If we're called from hfs_vnop_inactive, all this means is at the time 
+		 * the logic for deciding to call this function, there were not any lingering
+		 * mmap/fd references for this file.  However, there is nothing preventing the system
+		 * from creating a new reference in between the time that logic was checked
+		 * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
+		 * that there aren't any references is during vnop_reclaim.
+		 */
 		hfs_filedone(vp, ctx);
 	}
+
+ 	/* 
+	 * We're holding the cnode lock now.  Stall behind any shadow BPs that may
+	 * be involved with this vnode if it is a symlink.  We don't want to allow 
+	 * the blocks that we're about to release to be put back into the pool if there
+	 * is pending I/O to them.
+	 */
+	if (v_type == VLNK) {	
+		/* 
+		 * This will block if the asynchronous journal flush is in progress.
+		 * If this symlink is not being renamed over and doesn't have any open FDs,
+		 * then we'll remove it from the journal's bufs below in kill_block.
+		 */
+		buf_wait_for_shadow_io (vp, 0);
+	}
+
 	/* 
 	 * Remove any directory hints or cached origins
 	 */
@@ -282,67 +311,151 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
 	if ((v_type == VREG || v_type == VLNK) && 
 		(cp->c_flag & C_DELETED) &&
 		((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
-		
+
 		/* Truncate away our own fork data. (Case A, B, C above) */
 		if (VTOF(vp)->ff_blocks != 0) {
-			/*
-			 * Since we're already inside a transaction,
-			 * tell hfs_truncate to skip the ubc_setsize.
+
+			/* 
+			 * SYMLINKS only:
 			 *
+			 * Encapsulate the entire change (including truncating the link) in 
+			 * nested transactions if we are modifying a symlink, because we know that its
+			 * file length will be at most 4k, and we can fit both the truncation and 
+			 * any relevant bitmap changes into a single journal transaction.  We also want
+			 * the kill_block code to execute in the same transaction so that any dirty symlink
+			 * blocks will not be written. Otherwise, rely on
+			 * hfs_truncate doing its own transactions to ensure that we don't blow up
+			 * the journal.
+			 */ 
+			if ((started_tr == 0) && (v_type == VLNK)) {
+				if (hfs_start_transaction(hfsmp) != 0) {
+					error = EINVAL;
+					goto out;
+				}
+				else {
+					started_tr = 1;
+				}
+			}
+
+ 			/*
+			 * At this point, we have decided that this cnode is
+			 * suitable for full removal.  We are about to deallocate
+			 * its blocks and remove its entry from the catalog. 
+			 * If it was a symlink, then it's possible that the operation
+			 * which created it is still in the current transaction group
+			 * due to coalescing.  Take action here to kill the data blocks
+			 * of the symlink out of the journal before moving to 
+			 * deallocate the blocks.  We need to be in the middle of
+			 * a transaction before calling buf_iterate like this.
+			 * 
+			 * Note: we have to kill any potential symlink buffers out of 
+			 * the journal prior to deallocating their blocks.  This is so 
+			 * that we don't race with another thread that may be doing an 
+			 * an allocation concurrently and pick up these blocks. It could
+			 * generate I/O against them which could go out ahead of our journal
+			 * transaction.
+			 */
+
+			if (hfsmp->jnl && vnode_islnk(vp)) {
+				buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+			}
+	
+			/*
 			 * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
 			 * context because we're only removing blocks, not zero-filling new 
 			 * ones.  The C_DELETED check above makes things much simpler. 
 			 */
-			error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+			error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, 0, ctx);
 			if (error) {
 				goto out;
 			}
 			truncated = 1;
+
+			/* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
+			if (started_tr) {
+				hfs_end_transaction(hfsmp);
+				started_tr = 0;
+			}
 		}
 		
 		/* 
 		 * Truncate away the resource fork, if we represent the data fork and
 		 * it is the last fork.  That means, by definition, the rsrc fork is not in 
-		 * core.  So we bring it into core, and then truncate it away. 
+		 * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
+		 * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
+		 * to get rid of the resource fork's data. Note that because we are holding the 
+		 * cnode lock, it is impossible for a competing thread to create the resource fork
+		 * vnode from underneath us while we do this.
 		 * 
 		 * This is invoked via case A above only.
 		 */
 		if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
-			struct vnode *rvp = NULLVP;
+			struct cat_lookup_buffer *lookup_rsrc = NULL;
+			struct cat_desc *desc_ptr = NULL;
+			lockflags = 0;
+
+			MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
+			if (lookup_rsrc == NULL) {
+				printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
+				error = ENOMEM;
+				goto out;
+			}
+			else {
+				bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
+			}
+
+			if (cp->c_desc.cd_namelen == 0) {
+				/* Initialize the rsrc descriptor for lookup if necessary*/
+				MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
+				
+				lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
+				lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
+				lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+				lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;	
+				
+				desc_ptr = &lookup_rsrc->lookup_desc;
+			}
+			else {
+				desc_ptr = &cp->c_desc;	
+			}
+
+			lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+			error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL, 
+					(struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
+
+			hfs_systemfile_unlock (hfsmp, lockflags);
 			
-			/* 
-			 * It is safe for us to pass FALSE to the argument can_drop_lock
-			 * on this call to hfs_vgetrsrc.  We know that the resource fork does not 
-			 * exist in core, so we'll have to go to the catalog to retrieve its 
-			 * information.  That will attach the resource fork vnode to our cnode. 
-			 */
-			error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE, FALSE);
 			if (error) {
+				FREE (lookup_rsrc, M_TEMP);
 				goto out;
 			}
+
 			/*
-			 * Defer the vnode_put and ubc_setsize on rvp until hfs_unlock().
-			 * 
-			 * By bringing the vnode into core above, we may force hfs_vnop_reclaim 
-			 * to only partially finish if that's what called us.  Bringing the 
-			 * resource fork into core results in a new rsrc vnode that will get 
-			 * immediately marked for termination below.  It will get recycled/reclaimed 
-			 * as soon as possible, but that could cause another round of inactive and reclaim. 
+			 * Make the filefork in our temporary struct look like a real 
+			 * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
 			 */
-			cp->c_flag |= C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE;
-			error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+			rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
+			lookup_rsrc->lookup_fork.ff_cp = cp;
+
+			/* 
+			 * If there were no errors, then we have the catalog's fork information 
+			 * for the resource fork in question.  Go ahead and delete the data in it now.
+			 */
+
+			error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
+			FREE(lookup_rsrc, M_TEMP);
+
 			if (error) {
 				goto out;
 			}
-			
-			/* 
-			 * Note that the following call to vnode_recycle is safe from within the
-			 * context of hfs_vnop_inactive or hfs_vnop_reclaim.  It is being invoked
-			 * on the RSRC fork vp (which is not our current vnode) As such, we hold 
-			 * an iocount on it and vnode_recycle will just add the MARKTERM bit at this
-			 * point.
+
+			/*
+			 * This fileid's resource fork extents have now been fully deleted on-disk
+			 * and this CNID is no longer valid. At this point, we should be able to
+			 * zero out cp->c_blocks to indicate there is no data left in this file.
 			 */
-			vnode_recycle(rvp);  /* all done with this vnode */
+			cp->c_blocks = 0;
 		}
 	}
 	
@@ -485,13 +598,44 @@ out:
         hfs_end_transaction(hfsmp);
         started_tr = 0;
     }
-	
+
+#if 0
+#if CONFIG_PROTECT
+	/* 
+	 * cnode truncate lock and cnode lock are both held exclusive here. 
+	 *
+	 * Go ahead and flush the keys out if this cnode is the last fork
+	 * and it is not class F.  Class F keys should not be purged because they only
+	 * exist in memory and have no persistent keys.  Only do this 
+	 * if we haven't already done it yet (maybe a vnode skipped inactive 
+	 * and went straight to reclaim).  This function gets called from both reclaim and
+	 * inactive, so it will happen first in inactive if possible.
+	 * 
+	 * We need to be mindful that all pending IO for this file has already been
+	 * issued and completed before we bzero out the key.  This is because
+	 * if it isn't, tossing the key here could result in garbage IO being
+	 * written (by using the bzero'd key) if the writes are happening asynchronously.
+	 * 
+	 * In addition, class A files may have already been purged due to the 
+	 * lock event occurring.
+	 */
+	if (forkcount == 1) {
+		struct cprotect *entry = cp->c_cpentry;
+		if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
+			if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
+				cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
+				bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
+				bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
+			}
+		}
+	}
+#endif
+#endif
 	
 	return error;	
 }
 
 
-
 /*
  * hfs_vnop_inactive
  *
@@ -600,6 +744,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 	struct rl_entry *invalid_range;
 	off_t leof;
 	u_int32_t blks, blocksize;
+	/* flags for zero-filling sparse ranges */
 	int cluster_flags = IO_CLOSE;
 	int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 
@@ -611,6 +756,25 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
 		return (0);
 
+#if CONFIG_PROTECT
+	/* 
+	 * Figure out if we need to do synchronous IO. 
+	 * 
+	 * If the file represents a content-protected file, we may need
+	 * to issue synchronous IO when we dispatch to the cluster layer.
+	 * If we didn't, then the IO would go out to the disk asynchronously.
+	 * If the vnode hits the end of inactive before getting reclaimed, the
+	 * content protection keys would be wiped/bzeroed out, and we'd end up
+	 * trying to issue the IO with an invalid key.  This will lead to file 
+	 * corruption.  IO_SYNC will force the cluster_push to wait until all IOs
+	 * have completed (though they may be in the track cache).
+	 */
+	if (cp_fs_protected(VTOVFS(vp))) {
+		cluster_flags |= IO_SYNC;
+		cluster_zero_flags |= IO_SYNC;
+	}
+#endif
+
 	/* 
 	 * If we are being invoked from F_SWAPDATAEXTENTS, then we 
 	 * need to issue synchronous IO; Unless we are sure that all 
@@ -654,8 +818,10 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 	/*
 	 * Shrink the peof to the smallest size neccessary to contain the leof.
 	 */
-	if (blks < fp->ff_blocks)
+	if (blks < fp->ff_blocks) {
 		(void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
+	}
+
 	hfs_unlock(cp);
 	(void) cluster_push(vp, cluster_flags);
 	hfs_lock(cp, HFS_FORCE_LOCK);
@@ -843,6 +1009,13 @@ hfs_getnewvnode(
 	issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
 	wantrsrc = flags & GNV_WANTRSRC;
 
+	/* Sanity check the vtype and mode */
+	if (vtype == VBAD) {
+		/* Mark the FS as corrupt and bail out */
+		hfs_mark_volume_inconsistent(hfsmp);
+		return (EINVAL);
+	}
+
 	/* Zero out the out_flags */
 	*out_flags = 0;
 
@@ -871,7 +1044,7 @@ hfs_getnewvnode(
 	if (cp == NULL) {
 		return (ENOENT);
 	}
-	
+
 	/* 
 	 * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the 
 	 * descriptor in the cnode as needed if the cnode represents a hardlink.  
@@ -913,7 +1086,7 @@ hfs_getnewvnode(
 					vnode_put (*vpp);
 					*vpp = NULL;
 				}
-
+				
 				/*
 				 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
 				 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
@@ -925,18 +1098,23 @@ hfs_getnewvnode(
 				if (hflags) {
 					hfs_chashwakeup(hfsmp, cp, hflags);
 				}
-
+				
 				*out_flags = GNV_CAT_ATTRCHANGED;
 				return ERECYCLE;	
 			}
 			else {
-				/* Otherwise, CNID != fileid. Go ahead and copy in the new descriptor */
+				/* 
+				 * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor. 
+				 *
+				 * Replacing the descriptor here is fine because we looked up the item without
+				 * a vnode in hand before.  If a vnode existed, its identity must be attached to this
+				 * item.  We are not susceptible to the lookup fastpath issue at this point.
+				 */
 				replace_desc(cp, descp);
 			}
 		}
 	}
 	
-	
 	/* Check if we found a matching vnode */
 	if (*vpp != NULL) {
 		return (0);
@@ -1210,8 +1388,10 @@ hfs_getnewvnode(
 	}
 	
 #if CONFIG_PROTECT
-	if (!issystemfile && (*out_flags & GNV_NEW_CNODE))
+	/* Initialize the cp data structures. The key should be in place now. */
+	if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
 		cp_entry_init(cp, mp);
+	}
 #endif
 
 	*vpp = vp;
@@ -1264,7 +1444,7 @@ hfs_reclaim_cnode(struct cnode *cp)
 	}
 #endif
 #if CONFIG_PROTECT
-	cp_entry_destroy(cp);
+	cp_entry_destroy(&cp->c_cpentry);
 #endif
 	
 	
@@ -1303,7 +1483,6 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
 	if (dvp && cnp) {
 		int lookup = 0;
 		struct cat_fork fork;
-
 		bzero(&cndesc, sizeof(cndesc));
 		cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
 		cndesc.cd_namelen = cnp->cn_namelen;
@@ -1327,6 +1506,7 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
 		 */
 		
 		lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
+
 		if ((lookup == 0) && (cnid == attr.ca_fileid)) {
 			stillvalid = 1;
 			*error = 0;
@@ -1380,7 +1560,7 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
 			}
 		}
 	} else {
-		if (cat_idlookup(hfsmp, cnid, 0, NULL, NULL, NULL) == 0) {
+		if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
 			stillvalid = 1;
 			*error = 0;
 		}
@@ -1394,6 +1574,7 @@ notvalid:
 	return (stillvalid);
 }
 
+
 /*
  * Per HI and Finder requirements, HFS should add in the
  * date/time that a particular directory entry was added 
@@ -1408,11 +1589,11 @@ notvalid:
  */
 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
 	u_int8_t *finfo = NULL;
-	
+
 	/* overlay the FinderInfo to the correct pointer, and advance */
 	finfo = (u_int8_t*)attrp->ca_finderinfo;
 	finfo = finfo + 16;
-	
+
 	/* 
 	 * Make sure to write it out as big endian, since that's how
 	 * finder info is defined.  
@@ -1427,27 +1608,27 @@ void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
 	else if (S_ISDIR(attrp->ca_mode)) {
 		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
 		extinfo->date_added = OSSwapHostToBigInt32(dateadded);		
-		attrp->ca_recflags |= kHFSHasDateAddedMask; 
+				attrp->ca_recflags |= kHFSHasDateAddedMask; 
 	}
-    
 	/* If it were neither directory/file, then we'd bail out */
 	return;
 }
 
+
 u_int32_t hfs_get_dateadded (struct cnode *cp) {
 	u_int8_t *finfo = NULL;
 	u_int32_t dateadded = 0;
-	
+
 	if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
 		/* Date added was never set.  Return 0. */
 		return dateadded;
 	}
-	
-    
+
+
 	/* overlay the FinderInfo to the correct pointer, and advance */
 	finfo = (u_int8_t*)cp->c_finderinfo;
 	finfo = finfo + 16;
-	
+
 	/* 
 	 * FinderInfo is written out in big endian... make sure to convert it to host
 	 * native before we use it.
@@ -1460,12 +1641,10 @@ u_int32_t hfs_get_dateadded (struct cnode *cp) {
 		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
 		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
 	}
-    
+
 	return dateadded;
 }
 
-
-
 /*
  * Touch cnode times based on c_touch_xxx flags
  *
@@ -1546,7 +1725,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 			cp->c_flag |= C_MODIFIED;
 			touchvol = 1;
 		}
-		
+
 		if (cp->c_flag & C_NEEDS_DATEADDED) {
 			hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
 			cp->c_flag |= C_MODIFIED;