xnu-3247.10.11.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_hotfiles.c
diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c

index 2516461a973b6372676082e205ee09a461af69ee..143bc1983e5479e7f519cddaf532027c2ab27da5 100644 (file)
--- a/bsd/hfs/hfs_hotfiles.c
+++ b/bsd/hfs/hfs_hotfiles.c
@@ -1,5 +1,5 @@
  /*
  /*
- * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2013 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -32,6 +32,7 @@
  #include <sys/kernel.h>
  #include <sys/malloc.h>
  #include <sys/ubc.h>
  #include <sys/kernel.h>
  #include <sys/malloc.h>
  #include <sys/ubc.h>
+#include <sys/ubc_internal.h>
  #include <sys/vnode.h>
  #include <sys/vnode_internal.h>
  #include <sys/kauth.h>
  #include <sys/vnode.h>
  #include <sys/vnode_internal.h>
  #include <sys/kauth.h>
@@ -49,6 +50,11 @@
  #define HFC_VERBOSE 0
  
  
  #define HFC_VERBOSE 0
  
  
+/*
+ * Minimum post Tiger base time.
+ * Thu Mar 31 17:00:00 2005
+ */
+#define HFC_MIN_BASE_TIME   0x424c8f00L
  
  /*
   * Hot File List (runtime).
  
  /*
   * Hot File List (runtime).
@@ -83,13 +89,47 @@ typedef struct hotfile_entry {
         u_int32_t  blocks;
  } hotfile_entry_t;
  
         u_int32_t  blocks;
  } hotfile_entry_t;
  
+
+//
+// We cap the max temperature for non-system files to "MAX_NORMAL_TEMP"
+// so that they will always have a lower temperature than system (aka 
+// "auto-cached") files.  System files have MAX_NORMAL_TEMP added to
+// their temperature which produces two bands of files (all non-system
+// files will have a temp less than MAX_NORMAL_TEMP and all system
+// files will have a temp greatern than MAX_NORMAL_TEMP).
+//
+// This puts non-system files on the left side of the hotfile btree 
+// (and we start evicting from the left-side of the tree).  The idea is 
+// that we will evict non-system files more aggressively since their
+// working set changes much more dynamically than system files (which 
+// are for the most part, static).
+//
+// NOTE: these values have to fit into a 32-bit int.  We use a
+//       value of 1-billion which gives a pretty broad range
+//       and yet should not run afoul of any sign issues.
+//
+#define MAX_NORMAL_TEMP    1000000000
+#define HF_TEMP_RANGE      MAX_NORMAL_TEMP
+
+
+//
+// These used to be defines of the hard coded values.  But if
+// we're on an cooperative fusion (CF) system we need to change 
+// the values (which happens in hfs_recording_init()
+// 
+uint32_t hfc_default_file_count = 1000;
+uint32_t hfc_default_duration   = (3600 * 60);
+uint32_t hfc_max_file_count     = 5000;
+uint64_t hfc_max_file_size      = (10 * 1024 * 1024);
+
+
  /*
   * Hot File Recording Data (runtime).
   */
  typedef struct hotfile_data {
         struct hfsmount *hfsmp;
         long             refcount;
  /*
   * Hot File Recording Data (runtime).
   */
  typedef struct hotfile_data {
         struct hfsmount *hfsmp;
         long             refcount;
-       int              activefiles;  /* active number of hot files */
+       u_int32_t        activefiles;  /* active number of hot files */
         u_int32_t        threshold;
         u_int32_t        maxblocks;
         hotfile_entry_t *rootentry;
         u_int32_t        threshold;
         u_int32_t        maxblocks;
         hotfile_entry_t *rootentry;
@@ -101,11 +141,15 @@ typedef struct hotfile_data {
  static int  hfs_recording_start (struct hfsmount *);
  static int  hfs_recording_stop (struct hfsmount *);
  
  static int  hfs_recording_start (struct hfsmount *);
  static int  hfs_recording_stop (struct hfsmount *);
  
+/* Hotfiles pinning routines */
+static int hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned);
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned);
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc);
  
  /*
   * Hot File Data recording functions (in-memory binary tree).
   */
  
  /*
   * Hot File Data recording functions (in-memory binary tree).
   */
-static void              hf_insert (hotfile_data_t *, hotfile_entry_t *);
+static int               hf_insert (hotfile_data_t *, hotfile_entry_t *);
  static void              hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
  static hotfile_entry_t * hf_coldest (hotfile_data_t *);
  static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
  static void              hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
  static hotfile_entry_t * hf_coldest (hotfile_data_t *);
  static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
@@ -122,27 +166,28 @@ static void  hf_printtree (hotfile_entry_t *);
   */
  static int  hotfiles_collect (struct hfsmount *);
  static int  hotfiles_age (struct hfsmount *);
   */
  static int  hotfiles_collect (struct hfsmount *);
  static int  hotfiles_age (struct hfsmount *);
-static int  hotfiles_adopt (struct hfsmount *);
-static int  hotfiles_evict (struct hfsmount *, struct proc *);
+static int  hotfiles_adopt (struct hfsmount *, vfs_context_t);
+static int  hotfiles_evict (struct hfsmount *, vfs_context_t);
  static int  hotfiles_refine (struct hfsmount *);
  static int  hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
  static int  hfs_addhotfile_internal(struct vnode *);
  static int  hotfiles_refine (struct hfsmount *);
  static int  hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
  static int  hfs_addhotfile_internal(struct vnode *);
+static int  hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp);
  
  
  /*
   * Hot File Cluster B-tree (on disk) functions.
   */
  
  
  /*
   * Hot File Cluster B-tree (on disk) functions.
   */
-static int  hfc_btree_create (struct hfsmount *, int, int);
+static int  hfc_btree_create (struct hfsmount *, unsigned int, unsigned int);
  static int  hfc_btree_open (struct hfsmount *, struct vnode **);
  static int  hfc_btree_open (struct hfsmount *, struct vnode **);
+static int  hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs);
  static int  hfc_btree_close (struct hfsmount *, struct vnode *);
  static int  hfc_btree_close (struct hfsmount *, struct vnode *);
+static int  hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key);
+static int  hfc_btree_delete(struct hfsmount *hfsmp);
  static int  hfc_comparekeys (HotFileKey *, HotFileKey *);
  
  
  char hfc_tag[] = "CLUSTERED HOT FILES B-TREE     ";
  
  static int  hfc_comparekeys (HotFileKey *, HotFileKey *);
  
  
  char hfc_tag[] = "CLUSTERED HOT FILES B-TREE     ";
  
-extern int  UBCINFOEXISTS(struct vnode * vp);
-extern int  hfs_vnop_write(struct vnop_write_args *ap);
-
  
  /*
   *========================================================================
  
  /*
   *========================================================================
@@ -151,7 +196,7 @@ extern int  hfs_vnop_write(struct vnop_write_args *ap);
   */
  
  /*
   */
  
  /*
- * Start recording the hotest files on a file system.
+ * Start recording the hottest files on a file system.
   *
   * Requires that the hfc_mutex be held.
   */
   *
   * Requires that the hfc_mutex be held.
   */
@@ -189,7 +234,7 @@ hfs_recording_start(struct hfsmount *hfsmp)
                 FREE(tmp, M_TEMP);
         }
  
                 FREE(tmp, M_TEMP);
         }
  
-       microuptime(&tv);
+       microtime(&tv);  /* Times are base on GMT time. */
  
         /*
          * On first startup check for suspended recording.
  
         /*
          * On first startup check for suspended recording.
@@ -203,12 +248,31 @@ hfs_recording_start(struct hfsmount *hfsmp)
                     (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
                     (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
                     (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
                     (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
                     (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
                     (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
-                       hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
-                       hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ;
+                       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                               if (hfsmp->hfs_hotfile_freeblks == 0) {
+                                       hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+                               }
+                               hfsmp->hfc_maxfiles = 0x7fffffff;
+                               printf("hfs: %s: %s: hotfile freeblocks: %d, max: %d\n", hfsmp->vcbVN, __FUNCTION__,
+                                      hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks);
+                       } else {
+                               hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
+                       }
                         hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
                         hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
+                       int timeleft = (int)SWAP_BE32(hotfileinfo.timeleft);
+                       if (timeleft < 0 || timeleft > (int)(HFC_DEFAULT_DURATION*2)) {
+                               // in case this field got botched, don't let it screw things up
+                               // printf("hfs: hotfiles: bogus looking timeleft: %d\n", timeleft);
+                               timeleft = HFC_DEFAULT_DURATION;
+                       }
+                       hfsmp->hfc_timeout = timeleft + tv.tv_sec ;
+                       /* Fix up any bogus timebase values. */
+                       if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
+                               hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
+                       }
  #if HFC_VERBOSE
  #if HFC_VERBOSE
-                       printf("Resume recording hot files on %s (%d secs left)\n",
-                               hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft));
+                       printf("hfs: Resume recording hot files on %s (%d secs left (%d); timeout %ld)\n",
+                              hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft), timeleft, hfsmp->hfc_timeout - tv.tv_sec);
  #endif
                 } else {
                         hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
  #endif
                 } else {
                         hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
@@ -233,7 +297,10 @@ hfs_recording_start(struct hfsmount *hfsmp)
                         return (error);
                 }
  #if HFC_VERBOSE
                         return (error);
                 }
  #if HFC_VERBOSE
-               printf("HFS: begin recording hot files on %s\n", hfsmp->vcbVN);
+               printf("hfs: begin recording hot files on %s (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+                      hfsmp->vcbVN,
+                      hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+                      hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
  #endif
                 hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
                 hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
  #endif
                 hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
                 hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
@@ -257,6 +324,13 @@ hfs_recording_start(struct hfsmount *hfsmp)
  
         size = sizeof(hotfile_data_t) + (maxentries * sizeof(hotfile_entry_t));
         MALLOC(hotdata, hotfile_data_t *, size, M_TEMP, M_WAITOK);
  
         size = sizeof(hotfile_data_t) + (maxentries * sizeof(hotfile_entry_t));
         MALLOC(hotdata, hotfile_data_t *, size, M_TEMP, M_WAITOK);
+       if (hotdata == NULL) {
+               hfsmp->hfc_recdata = NULL;
+               hfsmp->hfc_stage = HFC_IDLE;
+               wakeup((caddr_t)&hfsmp->hfc_stage);
+               return(ENOMEM);
+       }
+
         bzero(hotdata, size);
  
         for (i = 1; i < maxentries ; i++)
         bzero(hotdata, size);
  
         for (i = 1; i < maxentries ; i++)
@@ -298,13 +372,14 @@ hfs_recording_stop(struct hfsmount *hfsmp)
  
         hotfiles_collect(hfsmp);
  
  
         hotfiles_collect(hfsmp);
  
+
         /*
          * Convert hot file data into a simple file id list....
          *
          * then dump the sample data
          */
  #if HFC_VERBOSE
         /*
          * Convert hot file data into a simple file id list....
          *
          * then dump the sample data
          */
  #if HFC_VERBOSE
-       printf("HFS: end of hot file recording on %s\n", hfsmp->vcbVN);
+       printf("hfs: end of hot file recording on %s\n", hfsmp->vcbVN);
  #endif
         hotdata = (hotfile_data_t *)hfsmp->hfc_recdata;
         if (hotdata == NULL)
  #endif
         hotdata = (hotfile_data_t *)hfsmp->hfc_recdata;
         if (hotdata == NULL)
@@ -314,7 +389,7 @@ hfs_recording_stop(struct hfsmount *hfsmp)
         wakeup((caddr_t)&hfsmp->hfc_stage);
  
  #if HFC_VERBOSE
         wakeup((caddr_t)&hfsmp->hfc_stage);
  
  #if HFC_VERBOSE
-       printf("  curentries: %d\n", hotdata->activefiles);
+       printf("hfs:   curentries: %d\n", hotdata->activefiles);
  #endif
         /*
          * If no hot files recorded then we're done.
  #endif
         /*
          * If no hot files recorded then we're done.
@@ -326,7 +401,7 @@ hfs_recording_stop(struct hfsmount *hfsmp)
  
         /* Open the B-tree file for writing... */
         if (hfsmp->hfc_filevp)
  
         /* Open the B-tree file for writing... */
         if (hfsmp->hfc_filevp)
-               panic("hfs_recording_stop: hfc_filevp exists (vp = 0x%08x)", hfsmp->hfc_filevp);
+               panic("hfs_recording_stop: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
  
         error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
         if (error) {
  
         error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
         if (error) {
@@ -349,10 +424,17 @@ hfs_recording_stop(struct hfsmount *hfsmp)
         size = sizeof(hotfilelist_t);
         size += sizeof(hotfileinfo_t) * (hotdata->activefiles - 1);
         MALLOC(listp, hotfilelist_t *, size, M_TEMP, M_WAITOK);
         size = sizeof(hotfilelist_t);
         size += sizeof(hotfileinfo_t) * (hotdata->activefiles - 1);
         MALLOC(listp, hotfilelist_t *, size, M_TEMP, M_WAITOK);
+       if (listp == NULL) {
+               error = ENOMEM;
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               hfsmp->hfc_filevp = NULL;
+               goto out;
+       }
+
         bzero(listp, size);
  
         hf_getsortedlist(hotdata, listp);       /* NOTE: destroys hot file tree! */
         bzero(listp, size);
  
         hf_getsortedlist(hotdata, listp);       /* NOTE: destroys hot file tree! */
-       microuptime(&tv);
+       microtime(&tv);
         listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase;
         hfsmp->hfc_recdata = listp;
  
         listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase;
         hfsmp->hfc_recdata = listp;
  
@@ -369,7 +451,7 @@ hfs_recording_stop(struct hfsmount *hfsmp)
         /*
          * Compute the amount of space to reclaim...
          */
         /*
          * Compute the amount of space to reclaim...
          */
-       if (listp->hfl_totalblocks > hfsmp->hfs_hotfile_freeblks) {
+       if (listp->hfl_totalblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
                 listp->hfl_reclaimblks =
                         MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
                         hfsmp->hfs_hotfile_freeblks;
                 listp->hfl_reclaimblks =
                         MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
                         hfsmp->hfs_hotfile_freeblks;
@@ -392,9 +474,9 @@ hfs_recording_stop(struct hfsmount *hfsmp)
  out:
  #if HFC_VERBOSE
         if (newstage == HFC_EVICTION)
  out:
  #if HFC_VERBOSE
         if (newstage == HFC_EVICTION)
-               printf("HFS: evicting coldest files\n");
+               printf("hfs: evicting coldest files\n");
         else if (newstage == HFC_ADOPTION)
         else if (newstage == HFC_ADOPTION)
-               printf("HFS: adopting hotest files\n");
+               printf("hfs: adopting hotest files\n");
  #endif
         FREE(hotdata, M_TEMP);
  
  #endif
         FREE(hotdata, M_TEMP);
  
@@ -403,16 +485,40 @@ out:
         return (error);
  }
  
         return (error);
  }
  
+static void
+save_btree_user_info(struct hfsmount *hfsmp)
+{
+       HotFilesInfo hotfileinfo;
+       struct timeval tv;
+
+       microtime(&tv);
+       hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
+       hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
+       hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
+       hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
+       hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
+       hotfileinfo.threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
+       hotfileinfo.maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               hotfileinfo.usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfs_hotfile_cur_freeblks(hfsmp));
+#if HFC_VERBOSE
+               printf("hfs: %s: saving usedblocks = %d (timeleft: %d; timeout %ld)\n", hfsmp->vcbVN, (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks),
+                      SWAP_BE32(hotfileinfo.timeleft), hfsmp->hfc_timeout);
+#endif
+       } else {
+               hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+       }
+       strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
+       (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
+}
+
  /*
   * Suspend recording the hotest files on a file system.
   */
  /*
   * Suspend recording the hotest files on a file system.
   */
-__private_extern__
  int
  hfs_recording_suspend(struct hfsmount *hfsmp)
  {
  int
  hfs_recording_suspend(struct hfsmount *hfsmp)
  {
-       HotFilesInfo hotfileinfo;
         hotfile_data_t *hotdata = NULL;
         hotfile_data_t *hotdata = NULL;
-       struct timeval tv;
         int  error;
  
         if (hfsmp->hfc_stage == HFC_DISABLED)
         int  error;
  
         if (hfsmp->hfc_stage == HFC_DISABLED)
@@ -435,7 +541,7 @@ hfs_recording_suspend(struct hfsmount *hfsmp)
         hfsmp->hfc_stage = HFC_BUSY;
  
  #if HFC_VERBOSE
         hfsmp->hfc_stage = HFC_BUSY;
  
  #if HFC_VERBOSE
-       printf("HFS: suspend hot file recording on %s\n", hfsmp->vcbVN);
+       printf("hfs: suspend hot file recording on %s\n", hfsmp->vcbVN);
  #endif
         error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
         if (error) {
  #endif
         error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
         if (error) {
@@ -444,28 +550,19 @@ hfs_recording_suspend(struct hfsmount *hfsmp)
         }
  
         if (hfs_start_transaction(hfsmp) != 0) {
         }
  
         if (hfs_start_transaction(hfsmp) != 0) {
-           error = EINVAL;
             goto out;
         }
             goto out;
         }
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
-               error = EPERM;
-               goto out;
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+               goto end_transaction;
         }
  
         }
  
-       microuptime(&tv);
-       hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
-       hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
-       hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
-       hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
-       hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
-       hotfileinfo.threshold   = SWAP_BE32 (hotdata->threshold);
-       hotfileinfo.maxfileblks = SWAP_BE32 (hotdata->maxblocks);
-       hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
-       strcpy(hotfileinfo.tag, hfc_tag);
-       (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
+       save_btree_user_info(hfsmp);
  
         hfs_unlock(VTOC(hfsmp->hfc_filevp));
  
         hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+end_transaction:
         hfs_end_transaction(hfsmp);
         hfs_end_transaction(hfsmp);
+
  out:
         if (hfsmp->hfc_filevp) {
                 (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
  out:
         if (hfsmp->hfc_filevp) {
                 (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
@@ -477,108 +574,239 @@ out:
         }
         hfsmp->hfc_stage = HFC_DISABLED;
         wakeup((caddr_t)&hfsmp->hfc_stage);
         }
         hfsmp->hfc_stage = HFC_DISABLED;
         wakeup((caddr_t)&hfsmp->hfc_stage);
-exit:
+
         lck_mtx_unlock(&hfsmp->hfc_mutex);
         return (error);
  }
  
  
         lck_mtx_unlock(&hfsmp->hfc_mutex);
         return (error);
  }
  
  
-/*
- *
- */
-__private_extern__
-int
-hfs_recording_init(struct hfsmount *hfsmp)
+static void
+reset_file_ids(struct hfsmount *hfsmp, uint32_t *fileid_table, int num_ids)
+{
+       int i, error;
+
+       for(i=0; i < num_ids; i++) {
+               struct vnode *vp;
+
+               error = hfs_vget(hfsmp, fileid_table[i], &vp, 0, 0);
+               if (error) {
+                       if (error == ENOENT) {
+                               error = 0;
+                               continue;  /* stale entry, go to next */
+                       }
+                       continue;
+               }
+
+               // hfs_vget returns a locked cnode so no need to lock here
+
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+                       error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, NULL, vfs_context_kernel());
+               }
+
+               /*
+                * The updates to the catalog must be journaled
+                */
+               hfs_start_transaction(hfsmp);
+
+               //
+               // turn off _all_ the hotfile related bits since we're resetting state
+               //
+               if (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
+                       vnode_clearfastdevicecandidate(vp);
+               }
+
+               VTOC(vp)->c_attr.ca_recflags &= ~(kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+               VTOC(vp)->c_flag |= C_MODIFIED;
+
+               hfs_update(vp, 0);
+
+               hfs_end_transaction(hfsmp);
+               
+               hfs_unlock(VTOC(vp));
+               vnode_put(vp);
+       }
+}
+
+static int
+flag_hotfile(struct hfsmount *hfsmp, const char *filename)
+{
+       struct vnode *dvp = NULL, *fvp = NULL;
+       vfs_context_t ctx = vfs_context_kernel();
+       struct componentname cname;
+       int  error=0;
+       size_t fname_len;
+       const char *orig_fname = filename;
+       
+       if (filename == NULL) {
+               return EINVAL;
+       }
+
+       fname_len = strlen(filename);    // do NOT include the trailing '\0' so that we break out of the loop below
+       
+       error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
+       if (error) {
+               return (error);
+       }
+
+       /* At this point, 'dvp' must be considered iocounted */
+       const char *ptr;
+       ptr = filename;
+
+       while (ptr < (orig_fname + fname_len - 1)) {
+               for(; ptr < (orig_fname + fname_len) && *ptr && *ptr != '/'; ptr++) {
+                       /* just keep advancing till we reach the end of the string or a slash */
+               }
+
+               cname.cn_nameiop = LOOKUP;
+               cname.cn_flags = ISLASTCN;
+               cname.cn_context = ctx;
+               cname.cn_ndp = NULL;
+               cname.cn_pnbuf = __DECONST(char *, orig_fname);
+        cname.cn_nameptr = __DECONST(char *, filename);
+               cname.cn_pnlen = fname_len;
+               cname.cn_namelen = ptr - filename;
+               cname.cn_hash = 0;
+               cname.cn_consume = 0;
+
+               error = VNOP_LOOKUP(dvp, &fvp, &cname, ctx);
+               if (error) {
+                       /*
+                        * If 'dvp' is non-NULL, then it has an iocount.  Make sure to release it
+                        * before bailing out.  VNOP_LOOKUP could legitimately return ENOENT
+                        * if the item didn't exist or if we raced with a delete.
+                        */
+                       if (dvp) {
+                               vnode_put(dvp);
+                               dvp = NULL;
+                       }
+                       return error;
+               }
+
+               if (ptr < orig_fname + fname_len - 1) {
+                       //
+                       // we've got a multi-part pathname so drop the ref on the dir,
+                       // make dvp become what we just looked up, and advance over
+                       // the slash character in the pathname to get to the next part
+                       // of the component
+                       //
+                       vnode_put(dvp);
+                       dvp = fvp;
+                       fvp = NULL;
+
+                       filename = ++ptr;   // skip the slash character
+               }
+       }
+       
+       if (fvp == NULL) {
+               error = ENOENT;
+               goto out;
+       }
+
+       struct cnode *cp = VTOC(fvp);
+       if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+               goto out;
+       }
+
+       hfs_start_transaction(hfsmp);
+       
+       cp->c_attr.ca_recflags |= (kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+       cp->c_flag |= C_MODIFIED;
+
+       hfs_update(fvp, 0);
+
+       hfs_end_transaction(hfsmp);
+
+       hfs_unlock(cp);
+       //printf("hfs: flagged /%s with the fast-dev-candidate|auto-candidate flags\n", filename);
+
+
+out:
+       if (fvp) {
+               vnode_put(fvp);
+               fvp = NULL;
+       }
+
+       if (dvp) {
+               vnode_put(dvp);
+               dvp = NULL;
+       }
+
+       return error;
+}
+
+
+static void
+hfs_setup_default_cf_hotfiles(struct hfsmount *hfsmp)
+{
+       const char *system_default_hotfiles[] = {
+               "usr",
+               "System",
+               "Applications",
+               "private/var/db/dyld"
+       };
+       int i;
+
+       for(i=0; i < (int)(sizeof(system_default_hotfiles)/sizeof(char *)); i++) {
+               flag_hotfile(hfsmp, system_default_hotfiles[i]);
+       }
+}
+
+
+#define NUM_FILE_RESET_IDS   4096    // so we allocate 16k to hold file-ids
+
+static void
+hfs_hotfile_reset(struct hfsmount *hfsmp)
  {
         CatalogKey * keyp;
         CatalogRecord * datap;
         u_int32_t  dataSize;
  {
         CatalogKey * keyp;
         CatalogRecord * datap;
         u_int32_t  dataSize;
-       HFSPlusCatalogFile *filep;
         BTScanState scanstate;
         BTScanState scanstate;
-       BTreeIterator * iterator;
+       BTreeIterator * iterator = NULL;
         FSBufferDescriptor  record;
         FSBufferDescriptor  record;
-       HotFileKey * key;
-       filefork_t * filefork;
         u_int32_t  data;
         u_int32_t  data;
-       struct cat_attr cattr;
         u_int32_t  cnid;
         int error = 0;
         u_int32_t  cnid;
         int error = 0;
+       uint32_t *fileids=NULL;
+       int cur_id_index = 0;
  
  
-       int inserted = 0;  /* debug variables */
+       int cleared = 0;  /* debug variables */
         int filecount = 0;
         int filecount = 0;
+       int dircount = 0;
  
  
-       /*
-        * For now, only the boot volume is supported.
-        */
-       if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
-               hfsmp->hfc_stage = HFC_DISABLED;
-               return (EPERM);
-       }
-
-       /*
-        * If the Hot File btree exists then metadata zone is ready.
-        */
-       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
-       if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
-               if (hfsmp->hfc_stage == HFC_DISABLED)
-                       hfsmp->hfc_stage = HFC_IDLE;
-               return (0);
-       }
-       error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
-       if (error) {
-#if HFC_VERBOSE
-               printf("Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
-#endif
-               return (error);
-       }
-       /*
-        * Open the Hot File B-tree file for writing.
-        */
-       if (hfsmp->hfc_filevp)
-               panic("hfs_recording_init: hfc_filevp exists (vp = 0x%08x)", hfsmp->hfc_filevp);
-       error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
-       if (error) {
  #if HFC_VERBOSE
  #if HFC_VERBOSE
-               printf("Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
+       printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
  #endif
  #endif
-               return (error);
-       }
+
         MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
         MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               error = ENOMEM;
+               goto out;
+       }
         bzero(iterator, sizeof(*iterator));
         bzero(iterator, sizeof(*iterator));
-       key = (HotFileKey*) &iterator->key;
-       key->keyLength = HFC_KEYLENGTH;
+
+       MALLOC(fileids, uint32_t *, NUM_FILE_RESET_IDS * sizeof(uint32_t), M_TEMP, M_WAITOK);
+       if (fileids == NULL) {
+               error = ENOMEM;
+               goto out;
+       }
  
         record.bufferAddress = &data;
         record.itemSize = sizeof(u_int32_t);
         record.itemCount = 1;
  
         record.bufferAddress = &data;
         record.itemSize = sizeof(u_int32_t);
         record.itemCount = 1;
-#if HFC_VERBOSE
-       printf("Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN);
-#endif
+
         /*
          * Get ready to scan the Catalog file.
          */
         error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
                                kCatSearchBufferSize, &scanstate);
         if (error) {
         /*
          * Get ready to scan the Catalog file.
          */
         error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
                                kCatSearchBufferSize, &scanstate);
         if (error) {
-               printf("hfs_recording_init: err %d BTScanInit\n", error);
-               goto out2;
-       }
-
-       /*
-        * The writes to Hot File B-tree file are journaled.
-        */
-       if (hfs_start_transaction(hfsmp) != 0) {
-           error = EINVAL;
-           goto out1;
-       } 
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
-               error = EPERM;
-               goto out1;
+               printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+               goto out;
         }
         }
-       filefork = VTOF(hfsmp->hfc_filevp);
  
         /*
  
         /*
-        * Visit all the catalog btree leaf records.
+        * Visit all the catalog btree leaf records, clearing any that have the
+        * HotFileCached bit set.
          */
         for (;;) {
                 error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
          */
         for (;;) {
                 error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
@@ -586,92 +814,1014 @@ hfs_recording_init(struct hfsmount *hfsmp)
                         if (error == btNotFound)
                                 error = 0;
                         else
                         if (error == btNotFound)
                                 error = 0;
                         else
-                               printf("hfs_recording_init: err %d BTScanNext\n", error);
+                               printf("hfs_hotfile_reset: err %d BTScanNext\n", error);
                         break;
                 }
                         break;
                 }
-               if ((datap->recordType != kHFSPlusFileRecord) ||
-                   (dataSize != sizeof(HFSPlusCatalogFile))) {
-                       continue;
-               }
-               filep = (HFSPlusCatalogFile *)datap;
-               filecount++;
-               if (filep->dataFork.totalBlocks == 0) {
-                       continue;
-               }
-               /*
-                * Any file that has blocks inside the hot file
-                * space is recorded for later eviction.
-                *
-                * For now, resource forks are ignored.
-                */
-               if (!hotextents(hfsmp, &filep->dataFork.extents[0])) {
+
+               if (datap->recordType == kHFSPlusFolderRecord && (dataSize == sizeof(HFSPlusCatalogFolder))) {
+                       HFSPlusCatalogFolder *dirp = (HFSPlusCatalogFolder *)datap;
+
+                       dircount++;
+               
+                       if ((dirp->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+                               continue;
+                       }
+
+                       cnid = dirp->folderID;
+               } else if ((datap->recordType == kHFSPlusFileRecord) && (dataSize == sizeof(HFSPlusCatalogFile))) {
+                       HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)datap;   
+
+                       filecount++;
+
+                       /*
+                        * If the file doesn't have any of the HotFileCached bits set, ignore it.
+                        */
+                       if ((filep->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+                               continue;
+                       }
+
+                       cnid = filep->fileID;
+               } else {
                         continue;
                 }
                         continue;
                 }
-               cnid = filep->fileID;
  
                 /* Skip over journal files. */
                 if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
                         continue;
                 }
  
                 /* Skip over journal files. */
                 if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
                         continue;
                 }
-               /*
-                * XXX - need to skip quota files as well.
-                */
  
  
-               /* Insert a hot file entry. */
-               key->keyLength   = HFC_KEYLENGTH;
-               key->temperature = HFC_MINIMUM_TEMPERATURE;
-               key->fileID      = cnid;
-               key->forkType    = 0;
-               data = 0x3f3f3f3f;
-               error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
-               if (error) {
-                       printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
-                       error = MacToVFSError(error);
-                       break;
+               //
+               // Just record the cnid of the file for now.  We will modify it separately
+               // because we can't modify the catalog while we're scanning it.
+               //
+               fileids[cur_id_index++] = cnid;
+               if (cur_id_index >= NUM_FILE_RESET_IDS) {
+                       //
+                       // We're over the limit of file-ids so we have to terminate this
+                       // scan, go modify all the catalog records, then restart the scan.
+                       // This is required because it's not permissible to modify the
+                       // catalog while scanning it.
+                       //
+                       (void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+                       reset_file_ids(hfsmp, fileids, cur_id_index);
+                       cleared += cur_id_index;
+                       cur_id_index = 0;
+
+                       // restart the scan
+                       error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+                                                kCatSearchBufferSize, &scanstate);
+                       if (error) {
+                               printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+                               goto out;
+                       }
+                       continue;
                 }
                 }
+       }
  
  
-               /* Insert the corresponding thread record. */
-               key->keyLength = HFC_KEYLENGTH;
-               key->temperature = HFC_LOOKUPTAG;
-               key->fileID = cnid;
-               key->forkType = 0;
-               data = HFC_MINIMUM_TEMPERATURE;
-               error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+       if (cur_id_index) {
+               reset_file_ids(hfsmp, fileids, cur_id_index);
+               cleared += cur_id_index;
+               cur_id_index = 0;
+       }
+
+       printf("hfs: cleared HotFileCache related bits on %d files out of %d (dircount %d)\n", cleared, filecount, dircount);
+
+       (void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+out:   
+       if (fileids)
+               FREE(fileids, M_TEMP);
+       
+       if (iterator)
+               FREE(iterator, M_TEMP);
+
+       //
+       // If the hotfile btree exists, delete it.  We need to open
+       // it to be able to delete it because we need the hfc_filevp
+       // for deletion.
+       //
+       error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+       if (!error) {
+               printf("hfs: hotfile_reset: deleting existing hotfile btree\n");
+               hfc_btree_delete(hfsmp);
+       }
+       
+       if (hfsmp->hfc_filevp) {
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               hfsmp->hfc_filevp = NULL;
+       }
+
+       hfsmp->hfs_hotfile_blk_adjust = 0;
+       hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+}
+
+
+//
+// This should ONLY be called by hfs_recording_init() and the special fsctl.
+//
+// We assume that the hotfile btree is already opened.
+//
+static int
+hfs_hotfile_repin_files(struct hfsmount *hfsmp)
+{
+       BTreeIterator * iterator = NULL;
+       HotFileKey * key;
+       filefork_t * filefork;
+       int  error = 0;
+       int  bt_op;
+       enum hfc_stage stage;
+       uint32_t pinned_blocks;
+       uint32_t num_files=0, nrsrc=0;
+       uint32_t total_pinned=0;
+
+       if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || !hfsmp->hfc_filevp) {
+               //
+               // this is only meaningful if we're pinning hotfiles
+               // (as opposed to the regular form of hotfiles that
+               // get relocated to the hotfile zone)
+               //
+               return 0;
+       }
+
+#if HFC_VERBOSE
+       printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
+#endif
+       
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+               return (EPERM);
+       }
+
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               hfs_unlock(VTOC(hfsmp->hfc_filevp));
+               return (ENOMEM);
+       }
+
+       stage = hfsmp->hfc_stage;
+       hfsmp->hfc_stage = HFC_BUSY;
+
+       bt_op = kBTreeFirstRecord;
+
+       bzero(iterator, sizeof(*iterator));
+       key = (HotFileKey*) &iterator->key;
+
+       filefork = VTOF(hfsmp->hfc_filevp);
+       int lockflags;
+
+       while (1) {
+
+               lockflags = 0;
+               /*
+                * Obtain the first record (ie the coldest one).
+                */
+               if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
+                       // no more records
+                       error = 0;
+                       break;
+               }
+               if (key->keyLength != HFC_KEYLENGTH) {
+                       // printf("hfs: hotfiles_repin_files: invalid key length %d\n", key->keyLength);
+                       error = EFTYPE;
+                       break;
+               }               
+               if (key->temperature == HFC_LOOKUPTAG) {
+                       // ran into thread records in the hotfile btree
+                       error = 0;
+                       break;
+               }
+
+        //
+               // Just lookup the records in the catalog and pin the direct
+               // mapped extents.  Faster than instantiating full vnodes
+               // (and thereby thrashing the system vnode cache).
+               //
+               struct cat_desc fdesc;
+               struct cat_attr attr;
+               struct cat_fork fork;
+        uint8_t forktype = 0;
+
+               lockflags = hfs_systemfile_lock(hfsmp, (SFL_CATALOG | SFL_EXTENTS), HFS_SHARED_LOCK);
+        /*
+         * Snoop the cnode hash to find out if the item we want is in-core already.
+         *
+         * We largely expect this function to fail (the items we want are probably not in the hash).
+         * we use the special variant which bails out as soon as it finds a vnode (even if it is
+         * marked as open-unlinked or actually removed on-disk.  If we find a vnode, then we
+         * release the systemfile locks and go through the pin-vnode path instead.
+         */
+        if (hfs_chash_snoop (hfsmp, key->fileID, 1, NULL, NULL) == 0) {
+            pinned_blocks = 0;
+
+            /* unlock immediately and go through the in-core path */
+            hfs_systemfile_unlock(hfsmp, lockflags);
+                       lockflags = 0;
+
+            error = hfs_getvnode_and_pin (hfsmp, key->fileID, &pinned_blocks);
+            if (error) {
+                /* if ENOENT, then it was deleted in the catalog. Remove from our hotfiles tracking */
+                if (error == ENOENT) {
+                    hfc_btree_delete_record(hfsmp, iterator, key);
+                }
+                /* other errors, just ignore and move on with life */
+            }
+            else { //!error
+                total_pinned += pinned_blocks;
+                num_files++;
+            }
+
+            goto next;
+        }
+
+        /* If we get here, we're still holding the systemfile locks */
+               error = cat_idlookup(hfsmp, key->fileID, 1, 0, &fdesc, &attr, &fork);
+               if (error) {
+                       //
+                       // this file system could have been mounted while booted from a
+                       // different partition and thus the hotfile btree would not have
+                       // been maintained.  thus a file that was hotfile cached could
+                       // have been deleted while booted from a different partition which
+                       // means we need to delete it from the hotfile btree.
+                       //
+                       // block accounting is taken care of at the end: we re-assign
+                       // hfsmp->hfs_hotfile_freeblks based on how many blocks we actually
+                       // pinned.
+                       //
+                       hfc_btree_delete_record(hfsmp, iterator, key);
+
+                       goto next;
+               }
+
+               if (fork.cf_size == 0) {
+                       // hmmm, the data is probably in the resource fork (aka a compressed file)
+                       error = cat_idlookup(hfsmp, key->fileID, 1, 1, &fdesc, &attr, &fork);
+                       if (error) {
+                               hfc_btree_delete_record(hfsmp, iterator, key);
+                               goto next;
+                       }
+            forktype = 0xff;
+                       nrsrc++;
+               }
+
+               pinned_blocks = 0;
+
+        /* Can't release the catalog /extents lock yet, we may need to go find the overflow blocks */
+        error = hfs_pin_extent_record (hfsmp, fork.cf_extents, &pinned_blocks);
+        if (error) {
+            goto next;  //skip to next
+        }
+               /* add in the blocks from the inline 8 */
+        total_pinned += pinned_blocks;
+        pinned_blocks = 0;
+
+        /* Could this file have overflow extents? */
+        if (fork.cf_extents[kHFSPlusExtentDensity-1].startBlock) {
+            /* better pin them, too */
+            error = hfs_pin_overflow_extents (hfsmp, key->fileID, forktype, &pinned_blocks);
+            if (error) {
+                               /* If we fail to pin all of the overflow extents, then just skip to the next file */
+                goto next;
+            }
+        }
+
+               num_files++;
+        if (pinned_blocks) {
+            /* now add in any overflow also */
+            total_pinned += pinned_blocks;
+        }
+
+       next:
+               if (lockflags) {
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       lockflags = 0;
+               }
+               bt_op = kBTreeNextRecord;
+
+       } /* end while */
+
+#if HFC_VERBOSE
+       printf("hfs: hotfiles_repin_files: re-pinned %d files (nrsrc %d, total pinned %d blks; freeblock %d, maxblocks %d, calculated free: %d)\n",
+              num_files, nrsrc, total_pinned, hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks,
+             hfsmp->hfs_hotfile_maxblks - total_pinned);
+#endif
+       //
+       // make sure this is accurate based on how many blocks we actually pinned
+       //
+       hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - total_pinned;
+
+       hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+       FREE(iterator, M_TEMP); 
+       hfsmp->hfc_stage = stage;
+       wakeup((caddr_t)&hfsmp->hfc_stage);
+       return (error);
+}
+
+void
+hfs_repin_hotfiles(struct hfsmount *hfsmp)
+{
+       int error, need_close;
+       
+       lck_mtx_lock(&hfsmp->hfc_mutex);
+
+       if (hfsmp->hfc_filevp == NULL) {
+               error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+               if (!error) {
+                       need_close = 1;
+               } else {
+                       printf("hfs: failed to open the btree err=%d.  Unable to re-pin hotfiles.\n", error);
+                       lck_mtx_unlock(&hfsmp->hfc_mutex);
+                       return;
+               }
+       } else {
+               need_close = 0;
+       }
+
+       hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel());
+                       
+       hfs_hotfile_repin_files(hfsmp);
+
+       if (need_close) {
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               hfsmp->hfc_filevp = NULL;
+       }
+
+       lck_mtx_unlock(&hfsmp->hfc_mutex);
+}
+
+/*
+ * For a given file ID, find and pin all of its overflow extents to the underlying CS
+ * device.  Assumes that the extents overflow b-tree is locked for the duration of this call.
+ *
+ * Emit the number of blocks pinned in output argument 'pinned'
+ *
+ * Return success or failure (errno) in return value.
+ *
+ */
+int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+                                     uint8_t forktype, uint32_t *pinned) {
+
+    struct BTreeIterator *ext_iter = NULL;
+    ExtentKey *ext_key_ptr = NULL;
+    ExtentRecord ext_data;
+    FSBufferDescriptor btRecord;
+    uint16_t btRecordSize;
+    int error = 0;
+
+    uint32_t pinned_blocks = 0;
+
+
+    MALLOC (ext_iter, struct BTreeIterator*, sizeof (struct BTreeIterator), M_TEMP, M_WAITOK);
+    if (ext_iter == NULL) {
+        return ENOMEM;
+    }
+    bzero (ext_iter, sizeof(*ext_iter));
+
+    BTInvalidateHint (ext_iter);
+    ext_key_ptr = (ExtentKey*)&ext_iter->key;
+    btRecord.bufferAddress = &ext_data;
+    btRecord.itemCount = 1;
+
+    /*
+     * This is like when you delete a file; we don't actually need most of the search machinery because
+     * we are going to need all of the extent records that belong to this file (for a given fork type),
+     * so we might as well use a straight-up iterator.
+     *
+     * Position the B-Tree iterator at the first record with this file ID
+     */
+    btRecord.itemSize = sizeof (HFSPlusExtentRecord);
+    ext_key_ptr->hfsPlus.keyLength = kHFSPlusExtentKeyMaximumLength;
+    ext_key_ptr->hfsPlus.forkType = forktype;
+    ext_key_ptr->hfsPlus.pad = 0;
+    ext_key_ptr->hfsPlus.fileID = fileid;
+    ext_key_ptr->hfsPlus.startBlock = 0;
+
+    error = BTSearchRecord (VTOF(hfsmp->hfs_extents_vp), ext_iter, &btRecord, &btRecordSize, ext_iter);
+    if (error ==  btNotFound) {
+        /* empty b-tree, so that's ok. we'll fall out during error check below. */
+        error = 0;
+    }
+
+    while (1) {
+        uint32_t found_fileid;
+        uint32_t pblocks;
+
+        error = BTIterateRecord (VTOF(hfsmp->hfs_extents_vp), kBTreeNextRecord, ext_iter, &btRecord, &btRecordSize);
+        if (error) {
+            /* swallow it if it's btNotFound, otherwise just bail out */
+            if (error == btNotFound)
+                error = 0;
+            break;
+        }
+
+        found_fileid = ext_key_ptr->hfsPlus.fileID;
+        /*
+         * We only do one fork type at a time. So if either the fork-type doesn't
+         * match what we are looking for (resource or data), OR the file id doesn't match
+         * which indicates that there's nothing more with this file ID as the key, then bail out
+         */
+        if ((found_fileid != fileid) || (ext_key_ptr->hfsPlus.forkType != forktype))  {
+            error = 0;
+            break;
+        }
+
+        /* Otherwise, we now have an extent record. Process and pin all of the file extents. */
+        pblocks = 0;
+        error = hfs_pin_extent_record (hfsmp, ext_data.hfsPlus, &pblocks);
+
+        if (error) {
+            break;
+        }
+        pinned_blocks += pblocks;
+
+        /* if 8th extent is empty, then bail out */
+        if (ext_data.hfsPlus[kHFSPlusExtentDensity-1].startBlock == 0) {
+            error = 0;
+            break;
+        }
+
+    } // end extent-getting loop
+
+    /* dump the iterator */
+    FREE (ext_iter, M_TEMP);
+
+    if (error == 0) {
+        /*
+         * In the event that the file has no overflow extents, pinned_blocks
+         * will never be updated, so we'll properly export 0 pinned blocks to caller
+         */
+        *pinned = pinned_blocks;
+    }
+
+    return error;
+
+}
+
+
+static int
+hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned) {
+    struct vnode *vp;
+    int error = 0;
+    *pinned = 0;
+    uint32_t pblocks;
+
+    /*
+     * Acquire the vnode for this file.  This returns a locked cnode on success
+     */
+    error = hfs_vget(hfsmp, fileid, &vp, 0, 0);
+    if (error) {
+        /* It's possible the file was open-unlinked. In this case, we'll get ENOENT back. */
+        return error;
+    }
+
+    /*
+     * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck
+     * here.  We do not want to move them.
+     */
+    if (!vnode_isreg(vp)) {
+        hfs_unlock(VTOC(vp));
+        vnode_put(vp);
+        return EPERM;
+    }
+
+    if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+        hfs_unlock(VTOC(vp));
+        vnode_put(vp);
+        return EINVAL;
+    }
+
+    error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pblocks, vfs_context_kernel());
+    if (error == 0) {
+        *pinned = pblocks;
+    }
+
+    hfs_unlock(VTOC(vp));
+    vnode_put(vp);
+
+    return error;
+
+}
+
+/*
+ * Pins an HFS Extent record to the underlying CoreStorage.  Assumes that Catalog & Extents overflow
+ * B-trees are held locked, as needed.
+ *
+ * Returns the number of blocks pinned in the output argument 'pinned'
+ *
+ * Returns error status (0 || errno) in return value.
+ */
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned) {
+    uint32_t pb = 0;
+    int i;
+    int error;
+
+       if (pinned == NULL) {
+               return EINVAL;
+       }
+    *pinned = 0;
+
+
+
+       /* iterate through the extents */
+       for ( i = 0; i < kHFSPlusExtentDensity; i++) {
+               if (extents[i].startBlock == 0) {
+                       break;
+               }
+
+               error = hfs_pin_block_range (hfsmp, HFS_PIN_IT, extents[i].startBlock,
+                               extents[i].blockCount, vfs_context_kernel());
+
+               if (error) {
+                       break;
+               }
+               pb += extents[i].blockCount;
+       }
+
+    *pinned = pb;
+
+       return error;
+}
+
+/*
+ * Consume an HFS Plus on-disk catalog record and pin its blocks
+ * to the underlying CS devnode.
+ *
+ * NOTE: This is an important distinction!
+ * This function takes in an HFSPlusCatalogFile* which is the actual
+ * 200-some-odd-byte on-disk representation in the Catalog B-Tree (not
+ * one of the run-time structs that we normally use.
+ *
+ * This assumes that the catalog and extents-overflow btrees
+ * are locked, at least in shared mode
+ */
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc) {
+       uint32_t pinned_blocks = 0;
+       HFSPlusForkData *forkdata;
+       int error = 0;
+       uint8_t forktype = 0;
+
+       if (rsrc) {
+        forkdata = &cfp->resourceFork;
+               forktype = 0xff;
+       }
+       else {
+               forkdata = &cfp->dataFork;
+       }
+
+       uint32_t pblocks = 0;
+
+       /* iterate through the inline extents */
+       error = hfs_pin_extent_record (hfsmp, forkdata->extents, &pblocks);
+       if (error) {
+        return error;
+       }
+
+       pinned_blocks += pblocks;
+    pblocks = 0;
+
+       /* it may have overflow extents */
+       if (forkdata->extents[kHFSPlusExtentDensity-1].startBlock != 0) {
+        error = hfs_pin_overflow_extents (hfsmp, cfp->fileID, forktype, &pblocks);
+       }
+    pinned_blocks += pblocks;
+
+       hfsmp->hfs_hotfile_freeblks -= pinned_blocks;
+
+       return error;
+}
+
+
+/*
+ *
+ */
+int
+hfs_recording_init(struct hfsmount *hfsmp)
+{
+       CatalogKey * keyp;
+       CatalogRecord * datap;
+       u_int32_t  dataSize;
+       HFSPlusCatalogFile *filep;
+       BTScanState scanstate;
+       BTreeIterator * iterator = NULL;
+       FSBufferDescriptor  record;
+       HotFileKey * key;
+       filefork_t * filefork;
+       u_int32_t  data;
+       struct cat_attr cattr;
+       u_int32_t  cnid;
+       int error = 0;
+       long starting_temp;
+
+       int started_tr = 0;
+       int started_scan = 0;
+
+       int inserted = 0;  /* debug variables */
+       int filecount = 0;
+       int uncacheable = 0;
+
+       /*
+        * For now, only the boot volume is supported.
+        */
+       if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
+               hfsmp->hfc_stage = HFC_DISABLED;
+               return (EPERM);
+       }
+
+       /* We grab the HFC mutex even though we're not fully mounted yet, just for orderliness */
+       lck_mtx_lock (&hfsmp->hfc_mutex);
+
+       /*
+        * Tracking of hot files requires up-to-date access times.
+        * So if access time updates are disabled, then we disable
+        * hot files, too.
+        */
+       if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
+               hfsmp->hfc_stage = HFC_DISABLED;
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EPERM;
+       }
+       
+       //
+       // Check if we've been asked to suspend operation
+       //
+       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-suspend", &cattr, NULL);
+       if (cnid != 0) {
+               printf("hfs: %s: %s: hotfiles explicitly disabled!  remove /.hotfiles-suspend to re-enable\n", hfsmp->vcbVN, __FUNCTION__);
+               hfsmp->hfc_stage = HFC_DISABLED;
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EPERM;
+       }
+
+       //
+       // Check if we've been asked to reset our state.
+       //
+       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-reset", &cattr, NULL);
+       if (cnid != 0) {
+               hfs_hotfile_reset(hfsmp);
+       }
+
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               //
+               // Cooperative Fusion (CF) systems use different constants 
+               // than traditional hotfile systems.  These were picked after a bit of
+               // experimentation - we can cache many more files on the
+               // ssd in an CF system and we can do so more rapidly
+               // so bump the limits considerably (and turn down the
+               // duration so that it doesn't take weeks to adopt all
+               // the files).
+               //
+               hfc_default_file_count = 20000;
+               hfc_default_duration   = 300;    // 5min
+               hfc_max_file_count     = 50000;
+               hfc_max_file_size      = (512ULL * 1024ULL * 1024ULL);
+       }
+
+       /*
+        * If the Hot File btree exists then metadata zone is ready.
+        */
+       cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
+       if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
+               int recreate = 0;
+               
+               if (hfsmp->hfc_stage == HFC_DISABLED)
+                       hfsmp->hfc_stage = HFC_IDLE;
+               hfsmp->hfs_hotfile_freeblks = 0;
+
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && cattr.ca_blocks > 0) {
+                       //
+                       // make sure the hotfile btree is pinned
+                       //
+                       error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+                       if (!error) {
+                               /* XXX: must fix hfs_pin_vnode too */
+                               hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel());
+                               
+                       } else {
+                               printf("hfs: failed to open the btree err=%d.  Recreating hotfile btree.\n", error);
+                               recreate = 1;
+                       }
+                       
+                       hfs_hotfile_repin_files(hfsmp);
+
+                       if (hfsmp->hfc_filevp) {
+                               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+                               hfsmp->hfc_filevp = NULL;
+                       }
+
+               } else if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       // hmmm, the hotfile btree is zero bytes long?  how odd.  let's recreate it.
+                       printf("hfs: hotfile btree is zero bytes long?!  recreating it.\n");
+                       recreate = 1;
+               }
+
+               if (!recreate) {
+                       /* don't forget to unlock the mutex */
+                       lck_mtx_unlock (&hfsmp->hfc_mutex);
+                       return (0);
+               } else {
+                       //
+                       // open the hotfile btree file ignoring errors because
+                       // we need the vnode pointer for hfc_btree_delete() to
+                       // be able to do its work
+                       //
+                       error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+                       if (!error) {
+                               // and delete it!
+                               error = hfc_btree_delete(hfsmp);
+                               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+                               hfsmp->hfc_filevp = NULL;
+                       }
+               }
+       }
+
+       printf("hfs: %s: %s: creating the hotfile btree\n", hfsmp->vcbVN, __FUNCTION__);
+       if (hfs_start_transaction(hfsmp) != 0) {
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EINVAL;
+       }
+
+       /* B-tree creation must be journaled */
+       started_tr = 1;
+
+       error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
+       if (error) {
+#if HFC_VERBOSE
+               printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+               goto recording_init_out;
+       }
+
+       hfs_end_transaction (hfsmp);
+       started_tr = 0;
+       /*
+        * Do a journal flush + flush track cache. We have to ensure that the async I/Os have been issued to the media
+        * before proceeding.
+        */
+       hfs_flush (hfsmp, HFS_FLUSH_FULL);
+
+       /* now re-start a new transaction */
+       if (hfs_start_transaction (hfsmp) != 0) {
+               lck_mtx_unlock (&hfsmp->hfc_mutex);
+               return EINVAL;
+       }
+       started_tr = 1;
+
+       /*
+        * Open the Hot File B-tree file for writing.
+        */
+       if (hfsmp->hfc_filevp)
+               panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+       error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+       if (error) {
+#if HFC_VERBOSE
+               printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+               goto recording_init_out;
+       }
+
+       /*
+        * This function performs work similar to namei; we must NOT hold the catalog lock while
+        * calling it. This will decorate catalog records as being pinning candidates. (no hotfiles work)
+        */
+       hfs_setup_default_cf_hotfiles(hfsmp);
+
+       /*
+        * now grab the hotfiles b-tree vnode/cnode lock first, as it is not classified as a systemfile.
+        */
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+               error = EPERM;
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               /* zero it out to avoid pinning later on */
+               hfsmp->hfc_filevp = NULL;
+               goto recording_init_out;
+       }
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               error = ENOMEM;
+               hfs_unlock (VTOC(hfsmp->hfc_filevp));
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+               /* zero it out to avoid pinning */
+               hfsmp->hfc_filevp = NULL;
+               goto recording_init_out;
+       }
+
+       bzero(iterator, sizeof(*iterator));
+       key = (HotFileKey*) &iterator->key;
+       key->keyLength = HFC_KEYLENGTH;
+
+       record.bufferAddress = &data;
+       record.itemSize = sizeof(u_int32_t);
+       record.itemCount = 1;
+
+#if HFC_VERBOSE
+       printf("hfs: Evaluating space for \"%s\" metadata zone... (freeblks %d)\n", HFSTOVCB(hfsmp)->vcbVN,
+              hfsmp->hfs_hotfile_freeblks);
+#endif
+
+       /*
+        * Get ready to scan the Catalog file. We explicitly do NOT grab the catalog lock because
+        * we're fully single-threaded at the moment (by virtue of being called during mount()),
+        * and if we have to grow the hotfile btree, then we would need to grab the catalog lock
+        * and if we take a shared lock here, it would deadlock (see <rdar://problem/21486585>)
+        *
+        * We already started a transaction so we should already be holding the journal lock at this point.
+        * Note that we have to hold the journal lock / start a txn BEFORE the systemfile locks.
+        */
+
+       error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+                              kCatSearchBufferSize, &scanstate);
+       if (error) {
+               printf("hfs_recording_init: err %d BTScanInit\n", error);
+
+               /* drop the systemfile locks */
+               hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+               (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+
+               /* zero it out to avoid pinning */
+               hfsmp->hfc_filevp = NULL;
+               goto recording_init_out;
+       }
+
+       started_scan = 1;
+
+       filefork = VTOF(hfsmp->hfc_filevp);
+
+       starting_temp = random() % HF_TEMP_RANGE;
+
+       /*
+        * Visit all the catalog btree leaf records. We have to hold the catalog lock to do this.
+        *
+        * NOTE: The B-Tree scanner reads from the media itself. Under normal circumstances it would be
+        * fine to simply use b-tree routines to read blocks that correspond to b-tree nodes, because the
+        * block cache is going to ensure you always get the cached copy of a block (even if a journal
+        * txn has modified one of those blocks).  That is NOT true when
+        * using the scanner.  In particular, it will always read whatever is on-disk. So we have to ensure
+        * that the journal has flushed and that the async I/Os to the metadata files have been issued.
+        */
+       for (;;) {
+               error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
+               if (error) {
+                       if (error == btNotFound)
+                               error = 0;
+                       else
+                               printf("hfs_recording_init: err %d BTScanNext\n", error);
+                       break;
+               }
+               if ((datap->recordType != kHFSPlusFileRecord) ||
+                   (dataSize != sizeof(HFSPlusCatalogFile))) {
+                       continue;
+               }
+               filep = (HFSPlusCatalogFile *)datap;
+               filecount++;
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       if (filep->flags & kHFSDoNotFastDevPinMask) {
+                               uncacheable++;
+                       }
+
+                       //
+                       // If the file does not have the FastDevPinnedMask set, we
+                       // can ignore it and just go to the next record.
+                       //
+                       if ((filep->flags & kHFSFastDevPinnedMask) == 0) {
+                               continue;
+                       }
+               } else if (filep->dataFork.totalBlocks == 0) {
+                       continue;
+               }
+
+               /*
+                * On a regular hdd, any file that has blocks inside
+                * the hot file space is recorded for later eviction.
+                *
+                * For now, resource forks are ignored.
+                *
+                * We don't do this on CF systems as there is no real
+                * hotfile area - we just pin/unpin blocks belonging to
+                * interesting files.
+                */
+               if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && !hotextents(hfsmp, &filep->dataFork.extents[0])) {
+                       continue;
+               }
+               cnid = filep->fileID;
+
+               /* Skip over journal files. */
+               if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
+                       continue;
+               }
+               /*
+                * XXX - need to skip quota files as well.
+                */
+
+               uint32_t temp;
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       int rsrc = 0;
+
+                       temp = (uint32_t)starting_temp++;
+                       if (filep->flags & kHFSAutoCandidateMask) {
+                               temp += MAX_NORMAL_TEMP;
+                       }
+
+                       /* use the data fork by default */
+                       if (filep->dataFork.totalBlocks == 0) {
+                               /*
+                 * but if empty, switch to rsrc as its likely
+                 * a compressed file
+                 */
+                               rsrc = 1;
+                       }
+
+                       error =  hfs_pin_catalog_rec (hfsmp, filep, rsrc);
+                       if (error)
+                               break;
+
+               } else {
+                       temp = HFC_MINIMUM_TEMPERATURE;
+               }
+
+               /* Insert a hot file entry. */
+               key->keyLength   = HFC_KEYLENGTH;
+               key->temperature = temp;
+               key->fileID      = cnid;
+               key->forkType    = 0;
+               data = 0x3f3f3f3f;
+               error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+               if (error) {
+                       printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+                       error = MacToVFSError(error);
+                       break;
+               }
+
+               /* Insert the corresponding thread record. */
+               key->keyLength = HFC_KEYLENGTH;
+               key->temperature = HFC_LOOKUPTAG;
+               key->fileID = cnid;
+               key->forkType = 0;
+               data = temp;
+               error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
                         printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
                         error = MacToVFSError(error);
                         break;
                 }
                 inserted++;
                 if (error) {
                         printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
                         error = MacToVFSError(error);
                         break;
                 }
                 inserted++;
-       }
+       } // end catalog iteration loop
+
+       save_btree_user_info(hfsmp);
         (void) BTFlushPath(filefork);
         (void) BTFlushPath(filefork);
-       hfs_unlock(VTOC(hfsmp->hfc_filevp));
  
  
-       hfs_end_transaction(hfsmp);
+recording_init_out:
+
+       /* Unlock first, then pin after releasing everything else */
+       if (hfsmp->hfc_filevp) {
+               hfs_unlock (VTOC(hfsmp->hfc_filevp));
+       }
+
+       if (started_scan) {
+               (void) BTScanTerminate (&scanstate, &data, &data, &data);
+       }
+
+       if (started_tr) {
+               hfs_end_transaction(hfsmp);
+       }
+
  #if HFC_VERBOSE
  #if HFC_VERBOSE
-       printf("%d files identified out of %d\n", inserted, filecount);
+       printf("hfs: %d files identified out of %d (freeblocks is now: %d)\n", inserted, filecount, hfsmp->hfs_hotfile_freeblks);
+       if (uncacheable) {
+               printf("hfs: %d files were marked as uncacheable\n", uncacheable);
+       }
  #endif
         
  #endif
         
-out1:
-       (void) BTScanTerminate(&scanstate, &data, &data, &data);
-out2:  
-       FREE(iterator, M_TEMP);
+       if (iterator)
+               FREE(iterator, M_TEMP);
+
         if (hfsmp->hfc_filevp) {
         if (hfsmp->hfc_filevp) {
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL, vfs_context_kernel());
+               }
                 (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
                 hfsmp->hfc_filevp = NULL;
         }
                 (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
                 hfsmp->hfc_filevp = NULL;
         }
+
         if (error == 0)
                 hfsmp->hfc_stage = HFC_IDLE;
  
         if (error == 0)
                 hfsmp->hfc_stage = HFC_IDLE;
  
+       /* Finally, unlock the HFC mutex */
+       lck_mtx_unlock (&hfsmp->hfc_mutex);
+
         return (error);
  }
  
  /*
   * Use sync to perform ocassional background work.
   */
         return (error);
  }
  
  /*
   * Use sync to perform ocassional background work.
   */
-__private_extern__
  int
  int
-hfs_hotfilesync(struct hfsmount *hfsmp, struct proc *p)
+hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
  {
         if (hfsmp->hfc_stage) {
                 struct timeval tv;
  {
         if (hfsmp->hfc_stage) {
                 struct timeval tv;
@@ -684,17 +1834,17 @@ hfs_hotfilesync(struct hfsmount *hfsmp, struct proc *p)
                         break;
         
                 case HFC_RECORDING:
                         break;
         
                 case HFC_RECORDING:
-                       microuptime(&tv);
+                       microtime(&tv);
                         if (tv.tv_sec > hfsmp->hfc_timeout)
                                 (void) hfs_recording_stop(hfsmp);
                         break;
         
                 case HFC_EVICTION:
                         if (tv.tv_sec > hfsmp->hfc_timeout)
                                 (void) hfs_recording_stop(hfsmp);
                         break;
         
                 case HFC_EVICTION:
-                       (void) hotfiles_evict(hfsmp, p);
+                       (void) hotfiles_evict(hfsmp, ctx);
                         break;
         
                 case HFC_ADOPTION:
                         break;
         
                 case HFC_ADOPTION:
-                       (void) hotfiles_adopt(hfsmp);
+                       (void) hotfiles_adopt(hfsmp, ctx);
                         break;
                 default:
                         break;
                         break;
                 default:
                         break;
@@ -716,7 +1866,6 @@ hfs_hotfilesync(struct hfsmount *hfsmp, struct proc *p)
   *
   * Note: the cnode is locked on entry.
   */
   *
   * Note: the cnode is locked on entry.
   */
-__private_extern__
  int
  hfs_addhotfile(struct vnode *vp)
  {
  int
  hfs_addhotfile(struct vnode *vp)
  {
@@ -733,6 +1882,20 @@ hfs_addhotfile(struct vnode *vp)
         return (error);
  }
  
         return (error);
  }
  
+static int
+hf_ignore_process(const char *pname, size_t maxlen)
+{
+       if (   strncmp(pname, "mds", maxlen) == 0
+           || strncmp(pname, "mdworker", maxlen) == 0
+           || strncmp(pname, "mds_stores", maxlen) == 0
+           || strncmp(pname, "makewhatis", maxlen) == 0) {
+               return 1;
+       }
+
+       return 0;
+       
+}
+
  static int
  hfs_addhotfile_internal(struct vnode *vp)
  {
  static int
  hfs_addhotfile_internal(struct vnode *vp)
  {
@@ -747,9 +1910,17 @@ hfs_addhotfile_internal(struct vnode *vp)
         if (hfsmp->hfc_stage != HFC_RECORDING)
                 return (0);
  
         if (hfsmp->hfc_stage != HFC_RECORDING)
                 return (0);
  
-       if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
+       /* 
+        * Only regular files are eligible for hotfiles addition. 
+        * 
+        * Symlinks were previously added to the list and may exist in 
+        * extant hotfiles regions, but no new ones will be added, and no
+        * symlinks will now be relocated/evicted from the hotfiles region.
+        */
+       if (!vnode_isreg(vp) || vnode_issystem(vp)) {
                 return (0);
         }
                 return (0);
         }
+
         /* Skip resource forks for now. */
         if (VNODE_IS_RSRC(vp)) {
                 return (0);
         /* Skip resource forks for now. */
         if (VNODE_IS_RSRC(vp)) {
                 return (0);
@@ -760,20 +1931,59 @@ hfs_addhotfile_internal(struct vnode *vp)
         ffp = VTOF(vp);
         cp = VTOC(vp);
  
         ffp = VTOF(vp);
         cp = VTOC(vp);
  
-       if ((ffp->ff_bytesread == 0) ||
-           (ffp->ff_blocks == 0) ||
-           (ffp->ff_size == 0) ||
-           (ffp->ff_blocks > hotdata->maxblocks) ||
-           (cp->c_flag & (C_DELETED | C_NOEXISTS)) ||
-           (cp->c_flags & UF_NODUMP) ||
-           (cp->c_atime < hfsmp->hfc_timebase)) {
-               return (0);
+       if (cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask)) {
+               // it's already a hotfile or can't be a hotfile...
+               return 0;
         }
  
         }
  
-       temperature = ffp->ff_bytesread / ffp->ff_size;
-       if (temperature < hotdata->threshold) {
-               return (0);
+       if (vnode_isdir(vp) || vnode_issystem(vp) || (cp->c_flag & (C_DELETED | C_NOEXISTS))) {
+               return 0;
+       }
+
+       if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && vnode_isfastdevicecandidate(vp)) {
+               //
+               // On cooperative fusion (CF) systems we have different criteria for whether something
+               // can be pinned to the ssd.
+               //
+               if (cp->c_flag & (C_DELETED|C_NOEXISTS)) {
+                       //
+                       // dead files are definitely not worth caching
+                       //
+                       return 0;
+               } else if (ffp->ff_blocks == 0 && !(cp->c_bsdflags & UF_COMPRESSED) && !(cp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) {
+                       //
+                       // empty files aren't worth caching but compressed ones might be, as are 
+                       // newly created files that live in WorthCaching directories... 
+                       //
+                       return 0;
+               }
+
+               char pname[256];
+               pname[0] = '\0';
+               proc_selfname(pname, sizeof(pname));
+               if (hf_ignore_process(pname, sizeof(pname))) {
+                       // ignore i/o's from certain system daemons 
+                       return 0;
+               }
+
+               temperature = cp->c_fileid;        // in memory we just keep it sorted by file-id
+       } else {
+               // the normal hard drive based hotfile checks
+               if ((ffp->ff_bytesread == 0) ||
+                   (ffp->ff_blocks == 0) ||
+                   (ffp->ff_size == 0) ||
+                   (ffp->ff_blocks > hotdata->maxblocks) ||
+                   (cp->c_bsdflags & (UF_NODUMP | UF_COMPRESSED)) ||
+                   (cp->c_atime < hfsmp->hfc_timebase)) {
+                       return (0);
+               }
+
+               temperature = ffp->ff_bytesread / ffp->ff_size;
+               if (temperature < hotdata->threshold) {
+                       return (0);
+               }
         }
         }
+
         /*
          * If there is room or this file is hotter than
          * the coldest one then add it to the list.
         /*
          * If there is room or this file is hotter than
          * the coldest one then add it to the list.
@@ -781,13 +1991,21 @@ hfs_addhotfile_internal(struct vnode *vp)
          */
         if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
             (hotdata->coldest == NULL) ||
          */
         if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
             (hotdata->coldest == NULL) ||
-           (temperature > hotdata->coldest->temperature)) {
+           (temperature >= hotdata->coldest->temperature)) {
                 ++hotdata->refcount;
                 entry = hf_getnewentry(hotdata);
                 entry->temperature = temperature;
                 entry->fileid = cp->c_fileid;
                 ++hotdata->refcount;
                 entry = hf_getnewentry(hotdata);
                 entry->temperature = temperature;
                 entry->fileid = cp->c_fileid;
-               entry->blocks = ffp->ff_blocks;
-               hf_insert(hotdata, entry);
+               //
+               // if ffp->ff_blocks is zero, it might be compressed so make sure we record
+               // that there's at least one block.
+               //
+               entry->blocks = ffp->ff_blocks ? ffp->ff_blocks : 1;   
+               if (hf_insert(hotdata, entry) == EEXIST) {
+                       // entry is already present, don't need to add it again
+                       entry->right = hotdata->freelist;
+                       hotdata->freelist = entry;
+               }
                 --hotdata->refcount;
         }
  
                 --hotdata->refcount;
         }
  
@@ -804,7 +2022,6 @@ hfs_addhotfile_internal(struct vnode *vp)
   *
   * Note: the cnode is locked on entry.
   */
   *
   * Note: the cnode is locked on entry.
   */
-__private_extern__
  int
  hfs_removehotfile(struct vnode *vp)
  {
  int
  hfs_removehotfile(struct vnode *vp)
  {
@@ -818,7 +2035,7 @@ hfs_removehotfile(struct vnode *vp)
         if (hfsmp->hfc_stage != HFC_RECORDING)
                 return (0);
  
         if (hfsmp->hfc_stage != HFC_RECORDING)
                 return (0);
  
-       if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) {
+       if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
                 return (0);
         }
  
                 return (0);
         }
  
@@ -850,6 +2067,148 @@ out:
         return (0);
  }
  
         return (0);
  }
  
+int
+hfs_hotfile_deleted(__unused struct vnode *vp)
+{
+#if 1
+       return 0;
+#else  
+       //
+       // XXXdbg - this code, while it would work, would introduce a huge inefficiency
+       //          to deleting files as the way it's written would require us to open
+       //          the hotfile btree on every open, delete two records in it and then
+       //          close the hotfile btree (which involves more writes).
+       //
+       //          We actually can be lazy about deleting hotfile records for files
+       //          that get deleted.  When it's time to evict things, if we encounter
+       //          a record that references a dead file (i.e. a fileid which no
+       //          longer exists), the eviction code will remove the records.  Likewise
+       //          the code that scans the HotFile B-Tree at boot time to re-pin files
+       //          will remove dead records.
+       //
+
+       hotfile_data_t *hotdata;
+       hfsmount_t *hfsmp;
+       cnode_t *cp;
+       filefork_t *filefork;
+       u_int32_t temperature;
+       BTreeIterator * iterator = NULL;
+       FSBufferDescriptor record;
+       HotFileKey *key;
+       u_int32_t data;
+       int error=0;
+
+       cp = VTOC(vp);
+       if (cp == NULL || !(cp->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+               return 0;
+       }
+
+       hfsmp = VTOHFS(vp);
+       if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+               return 0;
+       }
+       
+       if (hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) != 0 || hfsmp->hfc_filevp == NULL) {
+               // either there is no hotfile info or it's damaged
+               return EINVAL;
+       }
+       
+       filefork = VTOF(hfsmp->hfc_filevp);
+       if (filefork == NULL) {
+               return 0;
+       }
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               return ENOMEM;
+       }       
+       bzero(iterator, sizeof(*iterator));
+       key = (HotFileKey*) &iterator->key;
+
+       record.bufferAddress = &data;
+       record.itemSize = sizeof(u_int32_t);
+       record.itemCount = 1;
+
+       key->keyLength = HFC_KEYLENGTH;
+       key->temperature = HFC_LOOKUPTAG;
+       key->fileID = cp->c_fileid;
+       key->forkType = 0;
+
+       lck_mtx_lock(&hfsmp->hfc_mutex);
+       (void) BTInvalidateHint(iterator);
+       if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) == 0) {
+               temperature = key->temperature;
+               hfc_btree_delete_record(hfsmp, iterator, key);
+       } else {
+               //printf("hfs: hotfile_deleted: did not find fileid %d\n", cp->c_fileid);
+               error = ENOENT;
+       }
+
+       if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) != NULL) {
+               // just in case, also make sure it's removed from the in-memory list as well
+               ++hotdata->refcount;
+               hf_delete(hotdata, cp->c_fileid, cp->c_fileid);
+               --hotdata->refcount;
+       }
+
+       lck_mtx_unlock(&hfsmp->hfc_mutex);
+       FREE(iterator, M_TEMP);
+
+       hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+       
+       return error;
+#endif
+}
+
+int
+hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks)
+{
+       hfsmount_t *hfsmp;
+       
+       if (vp == NULL) {
+               return 0;
+       }
+
+       hfsmp = VTOHFS(vp);
+
+       if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || num_blocks == 0 || vp == NULL) {
+               return 0;
+       }
+
+       //
+       // if file is not HotFileCached or it has the CanNotHotFile cache
+       // bit set then there is nothing to do
+       //
+       if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) || (VTOC(vp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) {
+               // it's not a hot file or can't be one so don't bother tracking
+               return 0;
+       }
+       
+       OSAddAtomic(num_blocks, &hfsmp->hfs_hotfile_blk_adjust);
+
+       return (0);
+}
+
+//
+// Assumes hfsmp->hfc_mutex is LOCKED
+//
+static int
+hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp)
+{
+       if (hfsmp->hfc_stage < HFC_IDLE) {
+               return 0;
+       }
+       
+       int cur_blk_adjust = hfsmp->hfs_hotfile_blk_adjust;   // snap a copy of this value
+
+       if (cur_blk_adjust) {
+               OSAddAtomic(-cur_blk_adjust, &hfsmp->hfs_hotfile_blk_adjust);
+               hfsmp->hfs_hotfile_freeblks += cur_blk_adjust;
+       }
+
+       return hfsmp->hfs_hotfile_freeblks;
+}
+
  
  /*
   *========================================================================
  
  /*
   *========================================================================
@@ -860,7 +2219,7 @@ out:
  static int
  hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
  {
  static int
  hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
  {
-        if ((vnode_isreg(vp) || vnode_islnk(vp)) && !vnode_issystem(vp))
+        if ((vnode_isreg(vp)) && !vnode_issystem(vp))
                 (void) hfs_addhotfile_internal(vp);
  
         return (VNODE_RETURNED);
                 (void) hfs_addhotfile_internal(vp);
  
         return (VNODE_RETURNED);
@@ -909,7 +2268,7 @@ update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state)
  static int
  hotfiles_refine(struct hfsmount *hfsmp)
  {
  static int
  hotfiles_refine(struct hfsmount *hfsmp)
  {
-       BTreeIterator * iterator;
+       BTreeIterator * iterator = NULL;
         struct mount *mp;
         filefork_t * filefork;
         hotfilelist_t  *listp;
         struct mount *mp;
         filefork_t * filefork;
         hotfilelist_t  *listp;
@@ -919,13 +2278,22 @@ hotfiles_refine(struct hfsmount *hfsmp)
         int  i;
         int  error = 0;
  
         int  i;
         int  error = 0;
  
-
         if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                 return (0);     
  
         if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                 return (0);     
  
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               // on ssd's we don't refine the temperature since the
+               // replacement algorithm is simply random
+               return 0;
+       }
+
         mp = HFSTOVFS(hfsmp);
  
         MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
         mp = HFSTOVFS(hfsmp);
  
         MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               error = ENOMEM;
+               goto out;
+       }
         bzero(iterator, sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
  
         bzero(iterator, sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
  
@@ -937,9 +2305,9 @@ hotfiles_refine(struct hfsmount *hfsmp)
             error = EINVAL;
             goto out;
         } 
             error = EINVAL;
             goto out;
         } 
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
                 error = EPERM;
                 error = EPERM;
-               goto out;
+               goto out1;
         }
         filefork = VTOF(hfsmp->hfc_filevp);
  
         }
         filefork = VTOF(hfsmp->hfc_filevp);
  
@@ -960,12 +2328,12 @@ hotfiles_refine(struct hfsmount *hfsmp)
                  * Update thread entry with latest temperature.
                  */
                 error = BTUpdateRecord(filefork, iterator,
                  * Update thread entry with latest temperature.
                  */
                 error = BTUpdateRecord(filefork, iterator,
-                               (IterateCallBackProcPtr)update_callback,
-                               &listp->hfl_hotfile[i].hf_temperature);
+                                      (IterateCallBackProcPtr)update_callback,
+                                     &listp->hfl_hotfile[i].hf_temperature);
                 if (error) {
                 if (error) {
-                       printf("hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
+                       printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
                         error = MacToVFSError(error);
                         error = MacToVFSError(error);
-               //      break;
+                       //      break;
                 }
                 /*
                  * Re-key entry with latest temperature.
                 }
                 /*
                  * Re-key entry with latest temperature.
@@ -979,7 +2347,7 @@ hotfiles_refine(struct hfsmount *hfsmp)
                 (void) BTSearchRecord(filefork, iterator, &record, NULL, iterator);
                 error = BTDeleteRecord(filefork, iterator);
                 if (error) {
                 (void) BTSearchRecord(filefork, iterator, &record, NULL, iterator);
                 error = BTDeleteRecord(filefork, iterator);
                 if (error) {
-                       printf("hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID);
+                       printf("hfs: hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID);
                         error = MacToVFSError(error);
                         break;
                 }
                         error = MacToVFSError(error);
                         break;
                 }
@@ -989,11 +2357,10 @@ hotfiles_refine(struct hfsmount *hfsmp)
                 key->forkType = 0;
                 error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
                 key->forkType = 0;
                 error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
-                       printf("hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID);
+                       printf("hfs: hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID);
                         error = MacToVFSError(error);
                         break;
                 }
                         error = MacToVFSError(error);
                         break;
                 }
-
                 /*
                  * Invalidate this entry in the list.
                  */
                 /*
                  * Invalidate this entry in the list.
                  */
@@ -1005,9 +2372,11 @@ hotfiles_refine(struct hfsmount *hfsmp)
         (void) BTFlushPath(filefork);
         hfs_unlock(VTOC(hfsmp->hfc_filevp));
  
         (void) BTFlushPath(filefork);
         hfs_unlock(VTOC(hfsmp->hfc_filevp));
  
+out1:
         hfs_end_transaction(hfsmp);
  out:
         hfs_end_transaction(hfsmp);
  out:
-       FREE(iterator, M_TEMP); 
+       if (iterator)
+               FREE(iterator, M_TEMP); 
         return (error);
  }
  
         return (error);
  }
  
@@ -1017,9 +2386,9 @@ out:
   * Requires that the hfc_mutex be held.
   */
  static int
   * Requires that the hfc_mutex be held.
   */
  static int
-hotfiles_adopt(struct hfsmount *hfsmp)
+hotfiles_adopt(struct hfsmount *hfsmp, vfs_context_t ctx)
  {
  {
-       BTreeIterator * iterator;
+       BTreeIterator * iterator = NULL;
         struct vnode *vp;
         filefork_t * filefork;
         hotfilelist_t  *listp;
         struct vnode *vp;
         filefork_t * filefork;
         hotfilelist_t  *listp;
@@ -1033,6 +2402,14 @@ hotfiles_adopt(struct hfsmount *hfsmp)
         int  last;
         int  error = 0;
         int  startedtrans = 0;
         int  last;
         int  error = 0;
         int  startedtrans = 0;
+       //
+       // all files in a given adoption phase have a temperature
+       // that starts at a random value and then increases linearly.
+       // the idea is that during eviction, files that were adopted
+       // together will be evicted together
+       //
+       long starting_temp = random() % HF_TEMP_RANGE;
+       long temp_adjust = 0;
  
         if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                 return (0);     
  
         if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                 return (0);     
@@ -1040,10 +2417,24 @@ hotfiles_adopt(struct hfsmount *hfsmp)
         if (hfsmp->hfc_stage != HFC_ADOPTION) {
                 return (EBUSY);
         }
         if (hfsmp->hfc_stage != HFC_ADOPTION) {
                 return (EBUSY);
         }
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
                 return (EPERM);
         }
  
                 return (EPERM);
         }
  
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               hfs_unlock(VTOC(hfsmp->hfc_filevp));
+               return (ENOMEM);
+       }
+
+#if HFC_VERBOSE
+               printf("hfs:%s: hotfiles_adopt: (hfl_next: %d, hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+                      hfsmp->vcbVN,
+                      listp->hfl_next,
+                      hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+                      hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
         stage = hfsmp->hfc_stage;
         hfsmp->hfc_stage = HFC_BUSY;
  
         stage = hfsmp->hfc_stage;
         hfsmp->hfc_stage = HFC_BUSY;
  
@@ -1052,7 +2443,6 @@ hotfiles_adopt(struct hfsmount *hfsmp)
         if (last > listp->hfl_count)
                 last = listp->hfl_count;
  
         if (last > listp->hfl_count)
                 last = listp->hfl_count;
  
-       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
         bzero(iterator, sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
         key->keyLength = HFC_KEYLENGTH;
         bzero(iterator, sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
         key->keyLength = HFC_KEYLENGTH;
@@ -1065,17 +2455,30 @@ hotfiles_adopt(struct hfsmount *hfsmp)
  
         for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
                 /*
  
         for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
                 /*
-                * Skip invalid entries (already in hot area).
+                * Skip entries that aren't going to work.
                  */
                 if (listp->hfl_hotfile[i].hf_temperature == 0) {
                  */
                 if (listp->hfl_hotfile[i].hf_temperature == 0) {
-                               listp->hfl_next++;
-                               continue;
+                       //printf("hfs: zero temp on file-id %d\n", listp->hfl_hotfile[i].hf_fileid);
+                       listp->hfl_next++;
+                       continue;
+               }
+               if (listp->hfl_hotfile[i].hf_fileid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
+                       //printf("hfs: cannot adopt the hotfile b-tree itself! (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+                       listp->hfl_next++;
+                       continue;
+               }
+               if (listp->hfl_hotfile[i].hf_fileid < kHFSFirstUserCatalogNodeID) {
+                       //printf("hfs: cannot adopt system files (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+                       listp->hfl_next++;
+                       continue;
                 }
                 }
+
                 /*
                  * Acquire a vnode for this file.
                  */
                 /*
                  * Acquire a vnode for this file.
                  */
-               error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0);
+               error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
                 if (error) {
                 if (error) {
+                       //printf("failed to get fileid %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
                         if (error == ENOENT) {
                                 error = 0;
                                 listp->hfl_next++;
                         if (error == ENOENT) {
                                 error = 0;
                                 listp->hfl_next++;
@@ -1083,15 +2486,24 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                         }
                         break;
                 }
                         }
                         break;
                 }
-               if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
-                       printf("hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
+
+               //printf("hfs: examining hotfile entry w/fileid %d, temp %d, blocks %d (HotFileCached: %s)\n",
+               //       listp->hfl_hotfile[i].hf_fileid, listp->hfl_hotfile[i].hf_temperature,
+               //       listp->hfl_hotfile[i].hf_blocks,
+               //       (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) ? "YES" : "NO");
+
+               if (!vnode_isreg(vp)) {
+                       /* Symlinks are ineligible for adoption into the hotfile zone.  */
+                       //printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         listp->hfl_hotfile[i].hf_temperature = 0;
                         listp->hfl_next++;
                         continue;  /* stale entry, go to next */
                 }
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         listp->hfl_hotfile[i].hf_temperature = 0;
                         listp->hfl_next++;
                         continue;  /* stale entry, go to next */
                 }
-               if (hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
+               if (   (VTOC(vp)->c_flag & (C_DELETED | C_NOEXISTS))
+                   || (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && hotextents(hfsmp, &VTOF(vp)->ff_extents[0]))
+                   || (VTOC(vp)->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask))) {
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         listp->hfl_hotfile[i].hf_temperature = 0;
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         listp->hfl_hotfile[i].hf_temperature = 0;
@@ -1099,8 +2511,58 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                         listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
                         continue;  /* stale entry, go to next */
                 }
                         listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
                         continue;  /* stale entry, go to next */
                 }
+
                 fileblocks = VTOF(vp)->ff_blocks;
                 fileblocks = VTOF(vp)->ff_blocks;
-               if (fileblocks > hfsmp->hfs_hotfile_freeblks) {
+
+               //
+               // for CF, if the file is empty (and not compressed) or it is too large,
+               // do not try to pin it.  (note: if fileblocks == 0 but the file is marked
+               // as compressed, we may still be able to cache it).
+               //
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+                   ((fileblocks == 0 && !(VTOC(vp)->c_bsdflags & UF_COMPRESSED)) ||
+                    (unsigned int)fileblocks > (HFC_MAXIMUM_FILESIZE / (uint64_t)HFSTOVCB(hfsmp)->blockSize))) {
+                       // don't try to cache something too large or that's zero-bytes
+
+                       vnode_clearfastdevicecandidate(vp);    // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+                       hfs_unlock(VTOC(vp));
+                       vnode_put(vp);
+                       listp->hfl_hotfile[i].hf_temperature = 0;
+                       listp->hfl_next++;
+                       listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+                       continue;  /* entry is too big, just carry on with the next guy */
+               }
+
+               //
+               // If a file is not an autocandidate (i.e. it's a user-tagged file desirous of
+               // being hotfile cached) but it is already bigger than 4 megs, don't bother
+               // hotfile caching it.  Note that if a user tagged file starts small, gets
+               // adopted and then grows over time we will allow it to grow bigger than 4 megs
+               // which is intentional for things like the Mail or Photos database files which
+               // grow slowly over time and benefit from being on the FastDevice.
+               //
+               if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+                   !(VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) && 
+                   (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) && 
+                   (unsigned int)fileblocks > ((4*1024*1024) / (uint64_t)HFSTOVCB(hfsmp)->blockSize)) {
+
+                       vnode_clearfastdevicecandidate(vp);    // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+                       hfs_unlock(VTOC(vp));
+                       vnode_put(vp);
+                       listp->hfl_hotfile[i].hf_temperature = 0;
+                       listp->hfl_next++;
+                       listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+                       continue;  /* entry is too big, just carry on with the next guy */
+               }
+
+               if (fileblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
+                       //
+                       // No room for this file.  Although eviction should have made space
+                       // it's best that we check here as well since writes to existing
+                       // hotfiles may have eaten up space since we performed eviction
+                       //
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         listp->hfl_next++;
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         listp->hfl_next++;
@@ -1110,44 +2572,131 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                 
                 if ((blksmoved > 0) &&
                     (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
                 
                 if ((blksmoved > 0) &&
                     (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
+                       //
+                       // we've done enough work, let's be nice to the system and
+                       // stop until the next iteration
+                       //
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         break;  /* adopt this entry the next time around */
                 }
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         break;  /* adopt this entry the next time around */
                 }
-               /* Start a new transaction. */
-               if (hfs_start_transaction(hfsmp) != 0) {
-                   error = EINVAL;
-                   hfs_unlock(VTOC(vp));
-                   vnode_put(vp);
-                   break;
-               }
-               startedtrans = 1;
-
                 if (VTOC(vp)->c_desc.cd_nameptr)
                 if (VTOC(vp)->c_desc.cd_nameptr)
-                       data = *(u_int32_t *)(VTOC(vp)->c_desc.cd_nameptr);
+                       data = *(const u_int32_t *)(VTOC(vp)->c_desc.cd_nameptr);
                 else
                         data = 0x3f3f3f3f;
  
                 else
                         data = 0x3f3f3f3f;
  
-               error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       //
+                       // For CF we pin the blocks belonging to the file
+                       // to the "fast" (aka ssd) media
+                       //
+                       uint32_t pinned_blocks;
+
+                       if (vnode_isautocandidate(vp)) {
+                               VTOC(vp)->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+                       }
+                       if (VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+                               //
+                               // this moves auto-cached files to the higher tier 
+                               // of "temperatures" which means they are less likely
+                               // to get evicted (user selected hotfiles will get
+                               // evicted first in the theory that they change more
+                               // frequently compared to system files)
+                               //
+                               temp_adjust = MAX_NORMAL_TEMP;
+                       } else {
+                               temp_adjust = 0;
+                       }
+
+                       hfs_unlock(VTOC(vp));  // don't need an exclusive lock for this
+                       hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+                       error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pinned_blocks, ctx);
+
+                       fileblocks = pinned_blocks;
+
+                       // go back to an exclusive lock since we're going to modify the cnode again
+                       hfs_unlock(VTOC(vp));
+                       hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+               } else {
+                       //
+                       // Old style hotfiles moves the data to the center (aka "hot")
+                       // region of the disk
+                       //
+                       error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
+               }
+
+               if (!error) {
+                       VTOC(vp)->c_attr.ca_recflags |= kHFSFastDevPinnedMask;
+                       VTOC(vp)->c_flag |= C_MODIFIED;
+               } else if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && error == EALREADY) {
+                       //
+                       // If hfs_pin_vnode() returned EALREADY then this file is not
+                       // ever able to be hotfile cached the normal way.  This can
+                       // happen with compressed files which have their data stored
+                       // in an extended attribute.  We flag them so that we won't
+                       // bother to try and hotfile cache them again the next time
+                       // they're read.
+                       //
+                       VTOC(vp)->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+                       VTOC(vp)->c_flag |= C_MODIFIED;
+               }
+
                 hfs_unlock(VTOC(vp));
                 vnode_put(vp);
                 hfs_unlock(VTOC(vp));
                 vnode_put(vp);
-               if (error)
-                       break;
-               
+               if (error) {
+#if HFC_VERBOSE
+                       if (error != EALREADY) {
+                               printf("hfs: hotfiles_adopt: could not relocate file %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
+                       }
+#endif
+
+                       if (last < listp->hfl_count) {
+                               last++;
+                       }
+                       /* Move on to next item. */
+                       listp->hfl_next++;
+                       continue;
+               }
                 /* Keep hot file free space current. */
                 hfsmp->hfs_hotfile_freeblks -= fileblocks;
                 listp->hfl_totalblocks -= fileblocks;
                 
                 /* Insert hot file entry */
                 key->keyLength   = HFC_KEYLENGTH;
                 /* Keep hot file free space current. */
                 hfsmp->hfs_hotfile_freeblks -= fileblocks;
                 listp->hfl_totalblocks -= fileblocks;
                 
                 /* Insert hot file entry */
                 key->keyLength   = HFC_KEYLENGTH;
+
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       //
+                       // The "temperature" for a CF hotfile is simply a random
+                       // number that we sequentially increment for each file in
+                       // the set of files we're currently adopting.  This has the
+                       // nice property that all of the files we pin to the ssd
+                       // in the current phase will sort together in the hotfile
+                       // btree.  When eviction time comes we will evict them
+                       // together as well.  This gives the eviction phase temporal
+                       // locality - things written together get evicted together
+                       // which is what ssd's like.
+                       //
+                       listp->hfl_hotfile[i].hf_temperature = (uint32_t)temp_adjust + starting_temp++;
+               }
+
                 key->temperature = listp->hfl_hotfile[i].hf_temperature;
                 key->fileID      = listp->hfl_hotfile[i].hf_fileid;
                 key->forkType    = 0;
  
                 key->temperature = listp->hfl_hotfile[i].hf_temperature;
                 key->fileID      = listp->hfl_hotfile[i].hf_fileid;
                 key->forkType    = 0;
  
+               /* Start a new transaction before calling BTree code. */
+               if (hfs_start_transaction(hfsmp) != 0) {
+                   error = EINVAL;
+                   break;
+               }
+               startedtrans = 1;
+
                 error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
                 error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
-                       printf("hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+                       int orig_error = error;
                         error = MacToVFSError(error);
                         error = MacToVFSError(error);
+                       printf("hfs: hotfiles_adopt:1: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
                         stage = HFC_IDLE;
                         break;
                 }
                         stage = HFC_IDLE;
                         break;
                 }
@@ -1160,12 +2709,20 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                 data = listp->hfl_hotfile[i].hf_temperature;
                 error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
                 data = listp->hfl_hotfile[i].hf_temperature;
                 error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
                 if (error) {
-                       printf("hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+                       int orig_error = error;
                         error = MacToVFSError(error);
                         error = MacToVFSError(error);
+                       printf("hfs: hotfiles_adopt:2: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
                         stage = HFC_IDLE;
                         break;
                         stage = HFC_IDLE;
                         break;
+               } else {
+                       (void) BTFlushPath(filefork);
+                       blksmoved += fileblocks;
+               }
+
+               listp->hfl_next++;
+               if (listp->hfl_next >= listp->hfl_count) {
+                       break;
                 }
                 }
-               (void) BTFlushPath(filefork);
  
                 /* Transaction complete. */
                 if (startedtrans) {
  
                 /* Transaction complete. */
                 if (startedtrans) {
@@ -1173,24 +2730,28 @@ hotfiles_adopt(struct hfsmount *hfsmp)
                     startedtrans = 0;
                 }
  
                     startedtrans = 0;
                 }
  
-               blksmoved += fileblocks;
-               listp->hfl_next++;
-               if (listp->hfl_next >= listp->hfl_count) {
-                       break;
-               }
-               if (hfsmp->hfs_hotfile_freeblks <= 0) {
+               if (hfs_hotfile_cur_freeblks(hfsmp) <= 0) {
  #if HFC_VERBOSE
  #if HFC_VERBOSE
-                       printf("hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
+                       printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
  #endif
                         break;
                 }
         } /* end for */
  
  #if HFC_VERBOSE
  #endif
                         break;
                 }
         } /* end for */
  
  #if HFC_VERBOSE
-       printf("hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks);
+       printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d files left)\n", listp->hfl_next, blksmoved, listp->hfl_count - i);
  #endif
  #endif
+       if (!startedtrans) {
+               // start a txn so we'll save the btree summary info
+               if (hfs_start_transaction(hfsmp) == 0) {
+                       startedtrans = 1;
+               }
+       }               
+
         /* Finish any outstanding transactions. */
         if (startedtrans) {
         /* Finish any outstanding transactions. */
         if (startedtrans) {
+               save_btree_user_info(hfsmp);
+
                 (void) BTFlushPath(filefork);
                 hfs_end_transaction(hfsmp);
                 startedtrans = 0;
                 (void) BTFlushPath(filefork);
                 hfs_end_transaction(hfsmp);
                 startedtrans = 0;
@@ -1199,8 +2760,8 @@ hotfiles_adopt(struct hfsmount *hfsmp)
  
         if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) {
  #if HFC_VERBOSE
  
         if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) {
  #if HFC_VERBOSE
-               printf("hotfiles_adopt: all done relocating %d files\n", listp->hfl_count);
-               printf("hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
+               printf("hfs: hotfiles_adopt: all done relocating %d files\n", listp->hfl_count);
+               printf("hfs: hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
  #endif
                 stage = HFC_IDLE;
         }
  #endif
                 stage = HFC_IDLE;
         }
@@ -1221,9 +2782,9 @@ hotfiles_adopt(struct hfsmount *hfsmp)
   * Requires that the hfc_mutex be held.
   */
  static int
   * Requires that the hfc_mutex be held.
   */
  static int
-hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
+hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
  {
  {
-       BTreeIterator * iterator;
+       BTreeIterator * iterator = NULL;
         struct vnode *vp;
         HotFileKey * key;
         filefork_t * filefork;
         struct vnode *vp;
         HotFileKey * key;
         filefork_t * filefork;
@@ -1244,22 +2805,38 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
         if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                 return (0);     
  
         if ((listp = (hotfilelist_t  *)hfsmp->hfc_recdata) == NULL)
                 return (0);     
  
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
                 return (EPERM);
         }
  
                 return (EPERM);
         }
  
+#if HFC_VERBOSE
+               printf("hfs:%s: hotfiles_evict (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+                      hfsmp->vcbVN,
+                      hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+                      hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
+       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               hfs_unlock(VTOC(hfsmp->hfc_filevp));
+               return (ENOMEM);
+       }
+
         stage = hfsmp->hfc_stage;
         hfsmp->hfc_stage = HFC_BUSY;
  
         filesmoved = blksmoved = 0;
         bt_op = kBTreeFirstRecord;
  
         stage = hfsmp->hfc_stage;
         hfsmp->hfc_stage = HFC_BUSY;
  
         filesmoved = blksmoved = 0;
         bt_op = kBTreeFirstRecord;
  
-       MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
         bzero(iterator, sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
  
         filefork = VTOF(hfsmp->hfc_filevp);
  
         bzero(iterator, sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
  
         filefork = VTOF(hfsmp->hfc_filevp);
  
+#if HFC_VERBOSE
+       printf("hfs: hotfiles_evict: reclaim blks %d\n", listp->hfl_reclaimblks);
+#endif
+       
         while (listp->hfl_reclaimblks > 0 &&
                blksmoved < HFC_BLKSPERSYNC &&
                filesmoved < HFC_FILESPERSYNC) {
         while (listp->hfl_reclaimblks > 0 &&
                blksmoved < HFC_BLKSPERSYNC &&
                filesmoved < HFC_FILESPERSYNC) {
@@ -1269,20 +2846,20 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
                  */
                 if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
  #if HFC_VERBOSE
                  */
                 if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
  #if HFC_VERBOSE
-                       printf("hotfiles_evict: no more records\n");
+                       printf("hfs: hotfiles_evict: no more records\n");
  #endif
                         error = 0;
                         stage = HFC_ADOPTION;
                         break;
                 }
                 if (key->keyLength != HFC_KEYLENGTH) {
  #endif
                         error = 0;
                         stage = HFC_ADOPTION;
                         break;
                 }
                 if (key->keyLength != HFC_KEYLENGTH) {
-                       printf("hotfiles_evict: invalid key length %d\n", key->keyLength);
+                       printf("hfs: hotfiles_evict: invalid key length %d\n", key->keyLength);
                         error = EFTYPE;
                         break;
                 }               
                 if (key->temperature == HFC_LOOKUPTAG) {
  #if HFC_VERBOSE
                         error = EFTYPE;
                         break;
                 }               
                 if (key->temperature == HFC_LOOKUPTAG) {
  #if HFC_VERBOSE
-                       printf("hotfiles_evict: ran into thread records\n");
+                       printf("hfs: hotfiles_evict: ran into thread records\n");
  #endif
                         error = 0;
                         stage = HFC_ADOPTION;
  #endif
                         error = 0;
                         stage = HFC_ADOPTION;
@@ -1291,34 +2868,28 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
                 /*
                  * Aquire the vnode for this file.
                  */
                 /*
                  * Aquire the vnode for this file.
                  */
-               error = hfs_vget(hfsmp, key->fileID, &vp, 0);
-
-               /* Start a new transaction. */
-               if (hfs_start_transaction(hfsmp) != 0) {
-                   if (error == 0) {
-                       hfs_unlock(VTOC(vp));
-                       vnode_put(vp);
-                   }
-                   error = EINVAL;
-                   break;
-               }
-               startedtrans = 1;
-
+               error = hfs_vget(hfsmp, key->fileID, &vp, 0, 0);
                 if (error) {
                         if (error == ENOENT) {
                                 goto delete;  /* stale entry, go to next */
                         } else {
                 if (error) {
                         if (error == ENOENT) {
                                 goto delete;  /* stale entry, go to next */
                         } else {
-                               printf("hotfiles_evict: err %d getting file %d\n",
+                               printf("hfs: hotfiles_evict: err %d getting file %d\n",
                                        error, key->fileID);
                         }
                         break;
                 }
                                        error, key->fileID);
                         }
                         break;
                 }
-               if (!vnode_isreg(vp) && !vnode_islnk(vp)) {
-                       printf("hotfiles_evict: huh, not a file %d\n", key->fileID);
+
+               /* 
+                * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck 
+                * here.  We do not want to move them. 
+                */
+               if (!vnode_isreg(vp)) {
+                       //printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         goto delete;  /* invalid entry, go to next */
                 }
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         goto delete;  /* invalid entry, go to next */
                 }
+
                 fileblocks = VTOF(vp)->ff_blocks;
                 if ((blksmoved > 0) &&
                     (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
                 fileblocks = VTOF(vp)->ff_blocks;
                 if ((blksmoved > 0) &&
                     (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
@@ -1329,9 +2900,9 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
                 /*
                  * Make sure file is in the hot area.
                  */
                 /*
                  * Make sure file is in the hot area.
                  */
-               if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) {
+               if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0]) && !(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
  #if HFC_VERBOSE
  #if HFC_VERBOSE
-                       printf("hotfiles_evict: file %d isn't hot!\n", key->fileID);
+                       printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID);
  #endif
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
  #endif
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
@@ -1339,15 +2910,38 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
                 }
                 
                 /*
                 }
                 
                 /*
-                * Relocate file out of hot area.
+                * Relocate file out of hot area.  On cooperative fusion (CF) that just 
+                * means un-pinning the data from the ssd.  For traditional hotfiles that means moving
+                * the file data out of the hot region of the disk.
                  */
                  */
-               error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, proc_ucred(p), p);
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       uint32_t pinned_blocks;
+                       
+                       hfs_unlock(VTOC(vp));  // don't need an exclusive lock for this
+                       hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+                       error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &pinned_blocks, ctx);
+                       fileblocks = pinned_blocks;
+
+                       if (!error) {
+                               // go back to an exclusive lock since we're going to modify the cnode again
+                               hfs_unlock(VTOC(vp));
+                               hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+                       }
+               } else {
+                       error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
+               }
                 if (error) {
                 if (error) {
-                       printf("hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
+#if HFC_VERBOSE
+                       printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
+#endif
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         bt_op = kBTreeNextRecord;
                         goto next;  /* go to next */
                         hfs_unlock(VTOC(vp));
                         vnode_put(vp);
                         bt_op = kBTreeNextRecord;
                         goto next;  /* go to next */
+               } else {
+                       VTOC(vp)->c_attr.ca_recflags &= ~kHFSFastDevPinnedMask;
+                       VTOC(vp)->c_flag |= C_MODIFIED;
                 }
  
                 //
                 }
  
                 //
@@ -1367,6 +2961,13 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p)
                 blksmoved += fileblocks;
                 filesmoved++;
  delete:
                 blksmoved += fileblocks;
                 filesmoved++;
  delete:
+               /* Start a new transaction before calling BTree code. */
+               if (hfs_start_transaction(hfsmp) != 0) {
+                   error = EINVAL;
+                   break;
+               }
+               startedtrans = 1;
+
                 error = BTDeleteRecord(filefork, iterator);
                 if (error) {
                         error = MacToVFSError(error);
                 error = BTDeleteRecord(filefork, iterator);
                 if (error) {
                         error = MacToVFSError(error);
@@ -1392,10 +2993,12 @@ next:
         } /* end while */
  
  #if HFC_VERBOSE
         } /* end while */
  
  #if HFC_VERBOSE
-       printf("hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks);
+       printf("hfs: hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks);
  #endif
         /* Finish any outstanding transactions. */
         if (startedtrans) {
  #endif
         /* Finish any outstanding transactions. */
         if (startedtrans) {
+               save_btree_user_info(hfsmp);
+
                 (void) BTFlushPath(filefork);
                 hfs_end_transaction(hfsmp);
                 startedtrans = 0;
                 (void) BTFlushPath(filefork);
                 hfs_end_transaction(hfsmp);
                 startedtrans = 0;
@@ -1408,7 +3011,7 @@ next:
         if (listp->hfl_reclaimblks <= 0) {
                 stage = HFC_ADOPTION;
  #if HFC_VERBOSE
         if (listp->hfl_reclaimblks <= 0) {
                 stage = HFC_ADOPTION;
  #if HFC_VERBOSE
-               printf("hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
+               printf("hfs: hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
  #endif
         }
         FREE(iterator, M_TEMP); 
  #endif
         }
         FREE(iterator, M_TEMP); 
@@ -1424,7 +3027,7 @@ static int
  hotfiles_age(struct hfsmount *hfsmp)
  {
         BTreeInfoRec  btinfo;
  hotfiles_age(struct hfsmount *hfsmp)
  {
         BTreeInfoRec  btinfo;
-       BTreeIterator * iterator;
+       BTreeIterator * iterator = NULL;
         BTreeIterator * prev_iterator;
         FSBufferDescriptor  record;
         FSBufferDescriptor  prev_record;
         BTreeIterator * prev_iterator;
         FSBufferDescriptor  record;
         FSBufferDescriptor  prev_record;
@@ -1441,7 +3044,18 @@ hotfiles_age(struct hfsmount *hfsmp)
         u_int16_t  reclen;
  
  
         u_int16_t  reclen;
  
  
+       if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+               //
+               // hotfiles don't age on CF
+               //
+               return 0;
+       }
+
         MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK);
         MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK);
+       if (iterator == NULL) {
+               error = ENOMEM;
+               goto out2;
+       }
         bzero(iterator, 2 * sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
  
         bzero(iterator, 2 * sizeof(*iterator));
         key = (HotFileKey*) &iterator->key;
  
@@ -1462,7 +3076,7 @@ hotfiles_age(struct hfsmount *hfsmp)
             error = EINVAL;
             goto out2;
         } 
             error = EINVAL;
             goto out2;
         } 
-       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) {
+       if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
                 error = EPERM;
                 goto out1;
         }
                 error = EPERM;
                 goto out1;
         }
@@ -1570,7 +3184,8 @@ out:
  out1:
         hfs_end_transaction(hfsmp);
  out2:
  out1:
         hfs_end_transaction(hfsmp);
  out2:
-       FREE(iterator, M_TEMP); 
+       if (iterator)
+               FREE(iterator, M_TEMP); 
         return (error);
  }
  
         return (error);
  }
  
@@ -1616,7 +3231,13 @@ hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents)
  static int
  hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
  {
  static int
  hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
  {
-       struct proc *p;
+       return hfc_btree_open_ext(hfsmp, vpp, 0);
+}
+
+static int
+hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs)
+{
+       proc_t p;
         struct vnode *vp;
         struct cat_desc  cdesc;
         struct cat_attr  cattr;
         struct vnode *vp;
         struct cat_desc  cdesc;
         struct cat_attr  cattr;
@@ -1625,36 +3246,38 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
         int  error;
         int  retry = 0;
         int lockflags;
         int  error;
         int  retry = 0;
         int lockflags;
+       int newvnode_flags = 0;
  
         *vpp = NULL;
         p = current_proc();
  
         bzero(&cdesc, sizeof(cdesc));
         cdesc.cd_parentcnid = kRootDirID;
  
         *vpp = NULL;
         p = current_proc();
  
         bzero(&cdesc, sizeof(cdesc));
         cdesc.cd_parentcnid = kRootDirID;
-       cdesc.cd_nameptr = filename;
+       cdesc.cd_nameptr = (const u_int8_t *)filename;
         cdesc.cd_namelen = strlen(filename);
  
         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
  
         cdesc.cd_namelen = strlen(filename);
  
         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
  
-       error = cat_lookup(hfsmp, &cdesc, 0, &cdesc, &cattr, &cfork, NULL);
+       error = cat_lookup(hfsmp, &cdesc, 0, 0, &cdesc, &cattr, &cfork, NULL);
  
         hfs_systemfile_unlock(hfsmp, lockflags);
  
         if (error) {
  
         hfs_systemfile_unlock(hfsmp, lockflags);
  
         if (error) {
-               printf("hfc_btree_open: cat_lookup error %d\n", error);
+               printf("hfs: hfc_btree_open: cat_lookup error %d\n", error);
                 return (error);
         }
  again:
         cdesc.cd_flags |= CD_ISMETA;
                 return (error);
         }
  again:
         cdesc.cd_flags |= CD_ISMETA;
-       error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, &cfork, &vp);
+       error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, 
+                                                       &cfork, &vp, &newvnode_flags);
         if (error) {
         if (error) {
-               printf("hfc_btree_open: hfs_getnewvnode error %d\n", error);
+               printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error);
                 cat_releasedesc(&cdesc);
                 return (error);
         }
         if (!vnode_issystem(vp)) {
  #if HFC_VERBOSE
                 cat_releasedesc(&cdesc);
                 return (error);
         }
         if (!vnode_issystem(vp)) {
  #if HFC_VERBOSE
-               printf("hfc_btree_open: file has UBC, try again\n");
+               printf("hfs: hfc_btree_open: file has UBC, try again\n");
  #endif
                 hfs_unlock(VTOC(vp));
                 vnode_recycle(vp);
  #endif
                 hfs_unlock(VTOC(vp));
                 vnode_recycle(vp);
@@ -1668,8 +3291,12 @@ again:
         /* Open the B-tree file for writing... */
         error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);      
         if (error) {
         /* Open the B-tree file for writing... */
         error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);      
         if (error) {
-               printf("hfc_btree_open: BTOpenPath error %d\n", error);
-               error = MacToVFSError(error);
+               if (!ignore_btree_errs) {
+                       printf("hfs: hfc_btree_open: BTOpenPath error %d; filesize %lld\n", error, VTOF(vp)->ff_size);
+                       error = MacToVFSError(error);
+               } else {
+                       error = 0;
+               }
         }
  
         hfs_unlock(VTOC(vp));
         }
  
         hfs_unlock(VTOC(vp));
@@ -1680,11 +3307,20 @@ again:
         vnode_put(vp);
  
         if (!vnode_issystem(vp))
         vnode_put(vp);
  
         if (!vnode_issystem(vp))
-               panic("hfc_btree_open: not a system file (vp = 0x%08x)", vp);
+               panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp);
+
+       HotFilesInfo hotfileinfo;
  
  
-       if (UBCINFOEXISTS(vp))
-               panic("hfc_btree_open: has UBCInfo (vp = 0x%08x)", vp);
+       if (error == 0 && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+               if ((BTGetUserData(VTOF(vp), &hotfileinfo, sizeof(hotfileinfo)) == 0) && (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC)) {
+                       if (hfsmp->hfs_hotfile_freeblks == 0) {
+                               hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+                       }
  
  
+                       hfs_hotfile_cur_freeblks(hfsmp);        // factors in any adjustments that happened at run-time
+               }
+       }
+       
         return (error);
  }
  
         return (error);
  }
  
@@ -1696,16 +3332,16 @@ again:
  static int
  hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
  {
  static int
  hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
  {
-       struct proc *p = current_proc();
+       proc_t p = current_proc();
         int  error = 0;
  
  
         if (hfsmp->jnl) {
         int  error = 0;
  
  
         if (hfsmp->jnl) {
-           journal_flush(hfsmp->jnl);
+           hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
         }
  
         if (vnode_get(vp) == 0) {
         }
  
         if (vnode_get(vp) == 0) {
-               error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
+               error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
                 if (error == 0) {
                         (void) hfs_fsync(vp, MNT_WAIT, 0, p);
                         error = BTClosePath(VTOF(vp));
                 if (error == 0) {
                         (void) hfs_fsync(vp, MNT_WAIT, 0, p);
                         error = BTClosePath(VTOF(vp));
@@ -1719,35 +3355,132 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
         return (error);
  }
  
         return (error);
  }
  
+//
+// Assumes that hfsmp->hfc_filevp points to the hotfile btree vnode
+// (i.e. you called hfc_btree_open() ahead of time)
+//
+static int
+hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key)
+{
+       int error;
+       filefork_t *filefork=VTOF(hfsmp->hfc_filevp);
+
+       /* Start a new transaction before calling BTree code. */
+       if (hfs_start_transaction(hfsmp) != 0) {
+               return EINVAL;
+       }
+
+       error = BTDeleteRecord(filefork, iterator);
+       if (error) {
+               error = MacToVFSError(error);
+               printf("hfs: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+               goto out;
+       }
+
+       int savedtemp;
+       savedtemp = key->temperature;
+       key->temperature = HFC_LOOKUPTAG;
+       error = BTDeleteRecord(filefork, iterator);
+       if (error) {
+               error = MacToVFSError(error);
+               printf("hfs:2: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+       }
+       key->temperature = savedtemp;
+
+       (void) BTFlushPath(filefork);
+
+out:
+       /* Transaction complete. */
+       hfs_end_transaction(hfsmp);
+
+       return error;
+}
+
+//
+// You have to have already opened the hotfile btree so
+// that hfsmp->hfc_filevp is filled in.
+//
+static int
+hfc_btree_delete(struct hfsmount *hfsmp)
+{
+       struct vnode *dvp = NULL;
+       vfs_context_t ctx = vfs_context_current();
+       struct vnode_attr va;
+       struct componentname cname;
+       static char filename[] = HFC_FILENAME;
+       int  error;
+
+       error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
+       if (error) {
+               return (error);
+       }
+       cname.cn_nameiop = DELETE;
+       cname.cn_flags = ISLASTCN;
+       cname.cn_context = ctx;
+       cname.cn_pnbuf = filename;
+       cname.cn_pnlen = sizeof(filename);
+       cname.cn_nameptr = filename;
+       cname.cn_namelen = strlen(filename);
+       cname.cn_hash = 0;
+       cname.cn_consume = 0;
+
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_type, VREG);
+       VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
+       VATTR_SET(&va, va_uid, 0);
+       VATTR_SET(&va, va_gid, 0);
+
+       if (hfs_start_transaction(hfsmp) != 0) {
+           error = EINVAL;
+           goto out;
+       } 
+
+       /* call ourselves directly, ignore the higher-level VFS file creation code */
+       error = VNOP_REMOVE(dvp, hfsmp->hfc_filevp, &cname, 0, ctx);
+       if (error) {
+               printf("hfs: error %d removing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+       }
+
+       hfs_end_transaction(hfsmp);
+
+out:
+       if (dvp) {
+               vnode_put(dvp);
+               dvp = NULL;
+       }
+
+       return 0;
+}
+
+
+
+
  /*
   *  Create a hot files btree file.
   *
   */
  static int
  /*
   *  Create a hot files btree file.
   *
   */
  static int
-hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
+hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int entries)
  {
         struct vnode *dvp = NULL;
         struct vnode *vp = NULL;
         struct cnode *cp = NULL;
  {
         struct vnode *dvp = NULL;
         struct vnode *vp = NULL;
         struct cnode *cp = NULL;
-       struct vfs_context context;
+       vfs_context_t ctx = vfs_context_current();
         struct vnode_attr va;
         struct componentname cname;
         static char filename[] = HFC_FILENAME;
         int  error;
  
         struct vnode_attr va;
         struct componentname cname;
         static char filename[] = HFC_FILENAME;
         int  error;
  
-       context.vc_proc = current_proc();
-       context.vc_ucred = kauth_cred_get();
-
         if (hfsmp->hfc_filevp)
         if (hfsmp->hfc_filevp)
-               panic("hfc_btree_create: hfc_filevp exists (vp = 0x%08x)", hfsmp->hfc_filevp);
+               panic("hfs: hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
  
  
-       error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, &context);
+       error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx);
         if (error) {
                 return (error);
         }
         cname.cn_nameiop = CREATE;
         cname.cn_flags = ISLASTCN;
         if (error) {
                 return (error);
         }
         cname.cn_nameiop = CREATE;
         cname.cn_flags = ISLASTCN;
-       cname.cn_context = &context;
+       cname.cn_context = ctx;
         cname.cn_pnbuf = filename;
         cname.cn_pnlen = sizeof(filename);
         cname.cn_nameptr = filename;
         cname.cn_pnbuf = filename;
         cname.cn_pnlen = sizeof(filename);
         cname.cn_nameptr = filename;
@@ -1761,30 +3494,35 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
         VATTR_SET(&va, va_uid, 0);
         VATTR_SET(&va, va_gid, 0);
  
         VATTR_SET(&va, va_uid, 0);
         VATTR_SET(&va, va_gid, 0);
  
+       if (hfs_start_transaction(hfsmp) != 0) {
+           error = EINVAL;
+           goto out;
+       } 
+
         /* call ourselves directly, ignore the higher-level VFS file creation code */
         /* call ourselves directly, ignore the higher-level VFS file creation code */
-       error = VNOP_CREATE(dvp, &vp, &cname, &va, &context);
+       error = VNOP_CREATE(dvp, &vp, &cname, &va, ctx);
         if (error) {
         if (error) {
-               printf("HFS: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+               printf("hfs: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
                 goto out;
         }
         if (dvp) {
                 vnode_put(dvp);
                 dvp = NULL;
         }
                 goto out;
         }
         if (dvp) {
                 vnode_put(dvp);
                 dvp = NULL;
         }
-       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+       if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
                 goto out;
         }
         cp = VTOC(vp);
  
         /* Don't use non-regular files or files with links. */
                 goto out;
         }
         cp = VTOC(vp);
  
         /* Don't use non-regular files or files with links. */
-       if (!vnode_isreg(vp) || cp->c_nlink != 1) {
+       if (!vnode_isreg(vp) || cp->c_linkcount != 1) {
                 error = EFTYPE;
                 goto out;
         }
  
                 error = EFTYPE;
                 goto out;
         }
  
-       printf("HFS: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN);
+       printf("hfs: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN);
  
  
-       if (VTOF(vp)->ff_size < (u_int64_t)nodesize) {
+       if (VTOF(vp)->ff_size < nodesize) {
                 caddr_t  buffer;
                 u_int16_t *index;
                 u_int16_t  offset;
                 caddr_t  buffer;
                 u_int16_t *index;
                 u_int16_t  offset;
@@ -1801,12 +3539,12 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
                 ((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
                         SWAP_BE16 (kIsInvisible + kNameLocked);
  
                 ((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
                         SWAP_BE16 (kIsInvisible + kNameLocked);
  
-               if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize)) {
+               if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize, VM_KERN_MEMORY_FILE)) {
                         error = ENOMEM;
                         goto out;
                 }       
                 bzero(buffer, nodesize);
                         error = ENOMEM;
                         goto out;
                 }       
                 bzero(buffer, nodesize);
-               index = (int16_t *)buffer;
+               index = (u_int16_t *)buffer;
         
                 entirespernode = (nodesize - sizeof(BTNodeDescriptor) - 2) /
                                  (sizeof(HotFileKey) + 6);
         
                 entirespernode = (nodesize - sizeof(BTNodeDescriptor) - 2) /
                                  (sizeof(HotFileKey) + 6);
@@ -1822,7 +3560,7 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
                 index[(nodesize / 2) - 1] = SWAP_BE16 (offset);
         
                 /* FILL IN THE HEADER RECORD:  */
                 index[(nodesize / 2) - 1] = SWAP_BE16 (offset);
         
                 /* FILL IN THE HEADER RECORD:  */
-               bthp = (BTHeaderRec *)((UInt8 *)buffer + offset);
+               bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
                 bthp->nodeSize     = SWAP_BE16 (nodesize);
                 bthp->totalNodes   = SWAP_BE32 (filesize / nodesize);
                 bthp->freeNodes    = SWAP_BE32 (nodecnt - 1);
                 bthp->nodeSize     = SWAP_BE16 (nodesize);
                 bthp->totalNodes   = SWAP_BE32 (filesize / nodesize);
                 bthp->freeNodes    = SWAP_BE32 (nodecnt - 1);
@@ -1834,7 +3572,7 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
                 index[(nodesize / 2) - 2] = SWAP_BE16 (offset);
         
                 /* FILL IN THE USER RECORD:  */
                 index[(nodesize / 2) - 2] = SWAP_BE16 (offset);
         
                 /* FILL IN THE USER RECORD:  */
-               hotfileinfo = (HotFilesInfo *)((UInt8 *)buffer + offset);
+               hotfileinfo = (HotFilesInfo *)((u_int8_t *)buffer + offset);
                 hotfileinfo->magic       = SWAP_BE32 (HFC_MAGIC);
                 hotfileinfo->version     = SWAP_BE32 (HFC_VERSION);
                 hotfileinfo->duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
                 hotfileinfo->magic       = SWAP_BE32 (HFC_MAGIC);
                 hotfileinfo->version     = SWAP_BE32 (HFC_VERSION);
                 hotfileinfo->duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
@@ -1842,8 +3580,16 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
                 hotfileinfo->timeleft    = 0;
                 hotfileinfo->threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
                 hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
                 hotfileinfo->timeleft    = 0;
                 hotfileinfo->threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
                 hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
-               hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
-               strcpy(hotfileinfo->tag, hfc_tag);
+               if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+                       if (hfsmp->hfs_hotfile_freeblks == 0) {
+                               hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+                       }
+                       hotfileinfo->usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks);
+               } else {
+                       hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+               }
+               strlcpy((char *)hotfileinfo->tag, hfc_tag,
+                       sizeof hotfileinfo->tag);
                 offset += kBTreeHeaderUserBytes;
                 index[(nodesize / 2) - 3] = SWAP_BE16 (offset);
         
                 offset += kBTreeHeaderUserBytes;
                 index[(nodesize / 2) - 3] = SWAP_BE16 (offset);
         
@@ -1854,9 +3600,9 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
                 index[(nodesize / 2) - 4] = SWAP_BE16 (offset);
  
                 vnode_setnoflush(vp);
                 index[(nodesize / 2) - 4] = SWAP_BE16 (offset);
  
                 vnode_setnoflush(vp);
-               error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, &context);
+               error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, ctx);
                 if (error) {
                 if (error) {
-                       printf("HFS: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+                       printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
                         goto out;
                 }
                 cp->c_flag |= C_ZFWANTSYNC;
                         goto out;
                 }
                 cp->c_flag |= C_ZFWANTSYNC;
@@ -1866,27 +3612,28 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries)
                         struct vnop_write_args args;
                         uio_t auio;
  
                         struct vnop_write_args args;
                         uio_t auio;
  
-                       auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE);
+                       auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
                         uio_addiov(auio, (uintptr_t)buffer, nodesize);
  
                         args.a_desc = &vnop_write_desc;
                         args.a_vp = vp;
                         args.a_uio = auio;
                         args.a_ioflag = 0;
                         uio_addiov(auio, (uintptr_t)buffer, nodesize);
  
                         args.a_desc = &vnop_write_desc;
                         args.a_vp = vp;
                         args.a_uio = auio;
                         args.a_ioflag = 0;
-                       args.a_context = &context;
+                       args.a_context = ctx;
  
                         hfs_unlock(cp);
                         cp = NULL;
  
                         error = hfs_vnop_write(&args);
                         if (error)
  
                         hfs_unlock(cp);
                         cp = NULL;
  
                         error = hfs_vnop_write(&args);
                         if (error)
-                               printf("HFS: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+                               printf("hfs: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
  
                         uio_free(auio);
                 }
                 kmem_free(kernel_map, (vm_offset_t)buffer, nodesize);
         }
  out:
  
                         uio_free(auio);
                 }
                 kmem_free(kernel_map, (vm_offset_t)buffer, nodesize);
         }
  out:
+       hfs_end_transaction(hfsmp);
         if (dvp) {
                 vnode_put(dvp);
         }
         if (dvp) {
                 vnode_put(dvp);
         }
@@ -1971,7 +3718,7 @@ hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
  /*
   * Insert a hot file entry into the tree.
   */
  /*
   * Insert a hot file entry into the tree.
   */
-static void
+static int
  hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry) 
  {
         hotfile_entry_t *entry = hotdata->rootentry;
  hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry) 
  {
         hotfile_entry_t *entry = hotdata->rootentry;
@@ -1982,44 +3729,48 @@ hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry)
                 hotdata->rootentry = newentry;
                 hotdata->coldest = newentry;
                 hotdata->activefiles++;
                 hotdata->rootentry = newentry;
                 hotdata->coldest = newentry;
                 hotdata->activefiles++;
-               return;
+               return 0;
         }
  
         while (entry) {
                 if (temperature > entry->temperature) {
         }
  
         while (entry) {
                 if (temperature > entry->temperature) {
-                       if (entry->right)
+                       if (entry->right) {
                                 entry = entry->right;
                                 entry = entry->right;
-                       else {
+                       } else {
                                 entry->right = newentry;
                                 break;
                         }
                 } else if (temperature < entry->temperature) {
                                 entry->right = newentry;
                                 break;
                         }
                 } else if (temperature < entry->temperature) {
-                       if (entry->left) 
+                       if (entry->left) {
                                 entry = entry->left;
                                 entry = entry->left;
-                       else {
+                       } else {
                                 entry->left = newentry;
                                 break;
                         }
                 } else if (fileid > entry->fileid) { 
                                 entry->left = newentry;
                                 break;
                         }
                 } else if (fileid > entry->fileid) { 
-                       if (entry->right)
+                       if (entry->right) {
                                 entry = entry->right;
                                 entry = entry->right;
-                       else {
+                       } else {
                                 if (entry->fileid != fileid)
                                         entry->right = newentry;
                                 break;
                         }
                 } else { 
                                 if (entry->fileid != fileid)
                                         entry->right = newentry;
                                 break;
                         }
                 } else { 
-                       if (entry->left) 
+                       if (entry->left) {
                                 entry = entry->left;
                                 entry = entry->left;
-                       else {
-                               if (entry->fileid != fileid)
+                       } else {
+                               if (entry->fileid != fileid) {
                                         entry->left = newentry;
                                         entry->left = newentry;
+                               } else {
+                                       return EEXIST;
+                               }
                                 break;
                         }
                 }
         }
  
         hotdata->activefiles++;
                                 break;
                         }
                 }
         }
  
         hotdata->activefiles++;
+       return 0;
  }
  
  /*
  }
  
  /*
@@ -2080,7 +3831,7 @@ hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
  
         if (entry) {
                 /*
  
         if (entry) {
                 /*
-                * Reorginize the sub-trees spanning from our entry.
+                * Reorganize the sub-trees spanning from our entry.
                  */
                 if ((next = entry->right)) {
                         hotfile_entry_t *pnextl, *psub;
                  */
                 if ((next = entry->right)) {
                         hotfile_entry_t *pnextl, *psub;
@@ -2176,7 +3927,7 @@ hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist)
         sortedlist->hfl_count = i;
         
  #if HFC_VERBOSE
         sortedlist->hfl_count = i;
         
  #if HFC_VERBOSE
-       printf("HFS: hf_getsortedlist returned %d entries\n", i);
+       printf("hfs: hf_getsortedlist returning %d entries w/%d total blocks\n", i, sortedlist->hfl_totalblocks);
  #endif
  }
  
  #endif
  }
  
@@ -2199,7 +3950,7 @@ hf_printtree(hotfile_entry_t * root)
  {
         if (root) {
                 hf_printtree(root->left);
  {
         if (root) {
                 hf_printtree(root->left);
-               printf("temperature: % 8d, fileid %d\n", root->temperature, root->fileid);
+               printf("hfs: temperature: % 8d, fileid %d\n", root->temperature, root->fileid);
                 hf_printtree(root->right);
         }
  }
                 hf_printtree(root->right);
         }
  }