--- /dev/null
+/*
+ * Copyright (c) 2003-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <libkern/OSAtomic.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/ubc.h>
+#include <sys/vnode.h>
+#include <sys/kauth.h>
+#include <sys/vm.h>
+
+#include "hfs.h"
+#include "hfs_endian.h"
+#include "hfs_format.h"
+#include "hfs_mount.h"
+#include "hfs_hotfiles.h"
+
+#include "BTreeScanner.h"
+
+
+#define HFC_DEBUG 0
+#define HFC_VERBOSE 0
+
+
+/*
+ * Minimum post Tiger base time.
+ * Thu Mar 31 17:00:00 2005
+ */
+#define HFC_MIN_BASE_TIME 0x424c8f00L
+
+/*
+ * Hot File List (runtime).
+ */
+typedef struct hotfileinfo {
+ u_int32_t hf_fileid;
+ u_int32_t hf_temperature;
+ u_int32_t hf_blocks;
+} hotfileinfo_t;
+
+typedef struct hotfilelist {
+ size_t hfl_size;
+ u_int32_t hfl_magic;
+ u_int32_t hfl_version;
+ time_t hfl_duration; /* duration of sample period */
+ int hfl_count; /* count of hot files recorded */
+ int hfl_next; /* next file to move */
+ int hfl_totalblocks; /* total hot file blocks */
+ int hfl_reclaimblks; /* blocks to reclaim in HFV */
+ u_int32_t hfl_spare[2];
+ hotfileinfo_t hfl_hotfile[1]; /* array of hot files */
+} hotfilelist_t;
+
+
+/*
+ * Hot File Entry (runtime).
+ */
+typedef struct hotfile_entry {
+ struct hotfile_entry *left;
+ struct hotfile_entry *right;
+ u_int32_t fileid;
+ u_int32_t temperature;
+ u_int32_t blocks;
+} hotfile_entry_t;
+
+
+//
+// We cap the max temperature for non-system files to "MAX_NORMAL_TEMP"
+// so that they will always have a lower temperature than system (aka
+// "auto-cached") files. System files have MAX_NORMAL_TEMP added to
+// their temperature which produces two bands of files (all non-system
+// files will have a temp less than MAX_NORMAL_TEMP and all system
+// files will have a temp greatern than MAX_NORMAL_TEMP).
+//
+// This puts non-system files on the left side of the hotfile btree
+// (and we start evicting from the left-side of the tree). The idea is
+// that we will evict non-system files more aggressively since their
+// working set changes much more dynamically than system files (which
+// are for the most part, static).
+//
+// NOTE: these values have to fit into a 32-bit int. We use a
+// value of 1-billion which gives a pretty broad range
+// and yet should not run afoul of any sign issues.
+//
+#define MAX_NORMAL_TEMP 1000000000
+#define HF_TEMP_RANGE MAX_NORMAL_TEMP
+
+
+//
+// These used to be defines of the hard coded values. But if
+// we're on an cooperative fusion (CF) system we need to change
+// the values (which happens in hfs_recording_init()
+//
+uint32_t hfc_default_file_count = 1000;
+uint32_t hfc_default_duration = (3600 * 60);
+uint32_t hfc_max_file_count = 5000;
+uint64_t hfc_max_file_size = (10 * 1024 * 1024);
+
+
+/*
+ * Hot File Recording Data (runtime).
+ */
+typedef struct hotfile_data {
+ size_t size;
+ struct hfsmount *hfsmp;
+ long refcount;
+ u_int32_t activefiles; /* active number of hot files */
+ u_int32_t threshold;
+ u_int32_t maxblocks;
+ hotfile_entry_t *rootentry;
+ hotfile_entry_t *freelist;
+ hotfile_entry_t *coldest;
+ hotfile_entry_t entries[];
+} hotfile_data_t;
+
+static int hfs_recording_start (struct hfsmount *);
+static int hfs_recording_stop (struct hfsmount *);
+
+/* Hotfiles pinning routines */
+static int hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned);
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned);
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc);
+
+/*
+ * Hot File Data recording functions (in-memory binary tree).
+ */
+static int hf_insert (hotfile_data_t *, hotfile_entry_t *);
+static void hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
+static hotfile_entry_t * hf_coldest (hotfile_data_t *);
+static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
+static void hf_getsortedlist (hotfile_data_t *, hotfilelist_t *);
+
+#if HFC_DEBUG
+static hotfile_entry_t * hf_lookup (hotfile_data_t *, u_int32_t, u_int32_t);
+static void hf_maxdepth(hotfile_entry_t *, int, int *);
+static void hf_printtree (hotfile_entry_t *);
+#endif
+
+/*
+ * Hot File misc support functions.
+ */
+static int hotfiles_collect (struct hfsmount *);
+static int hotfiles_age (struct hfsmount *);
+static int hotfiles_adopt (struct hfsmount *);
+static int hotfiles_evict (struct hfsmount *, vfs_context_t);
+static int hotfiles_refine (struct hfsmount *);
+static int hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
+static int hfs_addhotfile_internal(struct vnode *);
+static int hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp);
+
+
+/*
+ * Hot File Cluster B-tree (on disk) functions.
+ */
+static int hfc_btree_create (struct hfsmount *, unsigned int, unsigned int);
+static int hfc_btree_open (struct hfsmount *, struct vnode **);
+static int hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs);
+static int hfc_btree_close (struct hfsmount *, struct vnode *);
+static int hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key);
+static int hfc_btree_delete(struct hfsmount *hfsmp);
+static int hfc_comparekeys (HotFileKey *, HotFileKey *);
+
+
+char hfc_tag[] = "CLUSTERED HOT FILES B-TREE ";
+
+
+/*
+ *========================================================================
+ * HOT FILE INTERFACE ROUTINES
+ *========================================================================
+ */
+
+/*
+ * Start recording the hottest files on a file system.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hfs_recording_start(struct hfsmount *hfsmp)
+{
+ hotfile_data_t *hotdata;
+ struct timeval tv;
+ int maxentries;
+ size_t size;
+ int i;
+ int error;
+
+ if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
+ (hfsmp->jnl == NULL) ||
+ (hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) {
+ return (EPERM);
+ }
+ if (HFSTOVCB(hfsmp)->freeBlocks < (2 * (u_int32_t)hfsmp->hfs_hotfile_maxblks)) {
+ return (ENOSPC);
+ }
+ if (hfsmp->hfc_stage != HFC_IDLE) {
+ return (EBUSY);
+ }
+ hfsmp->hfc_stage = HFC_BUSY;
+
+ if (hfsmp->hfc_recdata) {
+ hfs_free(hfsmp->hfc_recdata, hfsmp->hfc_recdata->size);
+ hfsmp->hfc_recdata = NULL;
+ }
+ if (hfsmp->hfc_filelist) {
+ hfs_free(hfsmp->hfc_filelist, hfsmp->hfc_filelist->hfl_size);
+ hfsmp->hfc_filelist = NULL;
+ }
+
+ microtime(&tv); /* Times are base on GMT time. */
+
+ /*
+ * On first startup check for suspended recording.
+ */
+ if (hfsmp->hfc_timebase == 0 &&
+ hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) == 0) {
+ HotFilesInfo hotfileinfo;
+
+ if ((BTGetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo,
+ sizeof(hotfileinfo)) == 0) &&
+ (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
+ (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
+ (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ if (hfsmp->hfs_hotfile_freeblks == 0) {
+ hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+ }
+ hfsmp->hfc_maxfiles = 0x7fffffff;
+ printf("hfs: %s: %s: hotfile freeblocks: %d, max: %d\n", hfsmp->vcbVN, __FUNCTION__,
+ hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks);
+ } else {
+ hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
+ }
+ hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
+ int timeleft = (int)SWAP_BE32(hotfileinfo.timeleft);
+ if (timeleft < 0 || timeleft > (int)(HFC_DEFAULT_DURATION*2)) {
+ // in case this field got botched, don't let it screw things up
+ // printf("hfs: hotfiles: bogus looking timeleft: %d\n", timeleft);
+ timeleft = HFC_DEFAULT_DURATION;
+ }
+ hfsmp->hfc_timeout = timeleft + tv.tv_sec ;
+ /* Fix up any bogus timebase values. */
+ if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
+ hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
+ }
+#if HFC_VERBOSE
+ printf("hfs: Resume recording hot files on %s (%d secs left (%d); timeout %ld)\n",
+ hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft), timeleft, hfsmp->hfc_timeout - tv.tv_sec);
+#endif
+ } else {
+ hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
+ hfsmp->hfc_timebase = tv.tv_sec + 1;
+ hfsmp->hfc_timeout = hfsmp->hfc_timebase + HFC_DEFAULT_DURATION;
+ }
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ } else {
+ struct cat_attr cattr;
+ u_int32_t cnid;
+
+ /*
+ * Make sure a btree file exists.
+ */
+ cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
+ if ((cnid == 0) &&
+ !S_ISREG(cattr.ca_mode) &&
+ (error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT))) {
+ hfsmp->hfc_stage = HFC_IDLE;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+ return (error);
+ }
+#if HFC_VERBOSE
+ printf("hfs: begin recording hot files on %s (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+ hfsmp->vcbVN,
+ hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+ hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+ hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
+ hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
+
+ /* Reset time base. */
+ if (hfsmp->hfc_timebase == 0) {
+ hfsmp->hfc_timebase = tv.tv_sec + 1;
+ } else {
+ time_t cumulativebase;
+
+ cumulativebase = hfsmp->hfc_timeout - (HFC_CUMULATIVE_CYCLES * HFC_DEFAULT_DURATION);
+ hfsmp->hfc_timebase = MAX(hfsmp->hfc_timebase, cumulativebase);
+ }
+ }
+
+ if ((hfsmp->hfc_maxfiles == 0) ||
+ (hfsmp->hfc_maxfiles > HFC_MAXIMUM_FILE_COUNT)) {
+ hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
+ }
+ maxentries = hfsmp->hfc_maxfiles;
+
+ size = sizeof(hotfile_data_t) + maxentries * sizeof(hotfile_entry_t);
+ hotdata = hfs_mallocz(size);
+ hotdata->size = size;
+
+ for (i = 1; i < maxentries ; i++)
+ hotdata->entries[i-1].right = &hotdata->entries[i];
+
+ hotdata->freelist = &hotdata->entries[0];
+ /*
+ * Establish minimum temperature and maximum file size.
+ */
+ hotdata->threshold = HFC_MINIMUM_TEMPERATURE;
+ hotdata->maxblocks = HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize;
+ hotdata->hfsmp = hfsmp;
+
+ hfsmp->hfc_recdata = hotdata;
+ hfsmp->hfc_stage = HFC_RECORDING;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+ return (0);
+}
+
+/*
+ * Stop recording the hotest files on a file system.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hfs_recording_stop(struct hfsmount *hfsmp)
+{
+ hotfile_data_t *hotdata;
+ hotfilelist_t *listp;
+ struct timeval tv;
+ size_t size;
+ enum hfc_stage newstage = HFC_IDLE;
+ int error;
+
+ if (hfsmp->hfc_stage != HFC_RECORDING)
+ return (EPERM);
+
+ hfsmp->hfc_stage = HFC_BUSY;
+
+ hotfiles_collect(hfsmp);
+
+
+ /*
+ * Convert hot file data into a simple file id list....
+ *
+ * then dump the sample data
+ */
+#if HFC_VERBOSE
+ printf("hfs: end of hot file recording on %s\n", hfsmp->vcbVN);
+#endif
+ hotdata = hfsmp->hfc_recdata;
+ if (hotdata == NULL)
+ return (0);
+ hfsmp->hfc_recdata = NULL;
+ hfsmp->hfc_stage = HFC_EVALUATION;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+
+#if HFC_VERBOSE
+ printf("hfs: curentries: %d\n", hotdata->activefiles);
+#endif
+ /*
+ * If no hot files recorded then we're done.
+ */
+ if (hotdata->rootentry == NULL) {
+ error = 0;
+ goto out;
+ }
+
+ /* Open the B-tree file for writing... */
+ if (hfsmp->hfc_filevp)
+ panic("hfs_recording_stop: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+ error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+ if (error) {
+ goto out;
+ }
+
+ /*
+ * Age the previous set of clustered hot files.
+ */
+ error = hotfiles_age(hfsmp);
+ if (error) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ goto out;
+ }
+
+ /*
+ * Create a sorted list of hotest files.
+ */
+ size = sizeof(hotfilelist_t);
+ size += sizeof(hotfileinfo_t) * (hotdata->activefiles - 1);
+ listp = hfs_mallocz(size);
+ listp->hfl_size = size;
+
+ hf_getsortedlist(hotdata, listp); /* NOTE: destroys hot file tree! */
+ microtime(&tv);
+ listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase;
+ hfs_assert(!hfsmp->hfc_filelist);
+ hfsmp->hfc_filelist = listp;
+
+ /*
+ * Account for duplicates.
+ */
+ error = hotfiles_refine(hfsmp);
+ if (error) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ goto out;
+ }
+
+ /*
+ * Compute the amount of space to reclaim...
+ */
+ if (listp->hfl_totalblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
+ listp->hfl_reclaimblks =
+ MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
+ hfsmp->hfs_hotfile_freeblks;
+#if HFC_VERBOSE
+ printf("hfs_recording_stop: need to reclaim %d blocks\n", listp->hfl_reclaimblks);
+#endif
+ if (listp->hfl_reclaimblks)
+ newstage = HFC_EVICTION;
+ else
+ newstage = HFC_ADOPTION;
+ } else {
+ newstage = HFC_ADOPTION;
+ }
+
+ if (newstage == HFC_ADOPTION && listp->hfl_totalblocks == 0) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ newstage = HFC_IDLE;
+ }
+out:
+#if HFC_VERBOSE
+ if (newstage == HFC_EVICTION)
+ printf("hfs: evicting coldest files\n");
+ else if (newstage == HFC_ADOPTION)
+ printf("hfs: adopting hotest files\n");
+#endif
+ hfs_free(hotdata, hotdata->size);
+
+ hfsmp->hfc_stage = newstage;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+ return (error);
+}
+
+static void
+save_btree_user_info(struct hfsmount *hfsmp)
+{
+ HotFilesInfo hotfileinfo;
+ struct timeval tv;
+
+ microtime(&tv);
+ hotfileinfo.magic = SWAP_BE32 (HFC_MAGIC);
+ hotfileinfo.version = SWAP_BE32 (HFC_VERSION);
+ hotfileinfo.duration = SWAP_BE32 (HFC_DEFAULT_DURATION);
+ hotfileinfo.timebase = SWAP_BE32 (hfsmp->hfc_timebase);
+ hotfileinfo.timeleft = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
+ hotfileinfo.threshold = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
+ hotfileinfo.maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ hotfileinfo.usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfs_hotfile_cur_freeblks(hfsmp));
+#if HFC_VERBOSE
+ printf("hfs: %s: saving usedblocks = %d (timeleft: %d; timeout %ld)\n", hfsmp->vcbVN, (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks),
+ SWAP_BE32(hotfileinfo.timeleft), hfsmp->hfc_timeout);
+#endif
+ } else {
+ hotfileinfo.maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+ }
+ strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
+ (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
+}
+
+/*
+ * Suspend recording the hotest files on a file system.
+ */
+int
+hfs_recording_suspend(struct hfsmount *hfsmp)
+{
+ hotfile_data_t *hotdata = NULL;
+ int error;
+
+ if (hfsmp->hfc_stage == HFC_DISABLED)
+ return (0);
+
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+
+ /*
+ * XXX NOTE
+ * A suspend can occur during eval/evict/adopt stage.
+ * In that case we would need to write out info and
+ * flush our HFBT vnode. Currently we just bail.
+ */
+
+ hotdata = hfsmp->hfc_recdata;
+ if (hotdata == NULL || hfsmp->hfc_stage != HFC_RECORDING) {
+ error = 0;
+ goto out;
+ }
+ hfsmp->hfc_stage = HFC_BUSY;
+
+#if HFC_VERBOSE
+ printf("hfs: suspend hot file recording on %s\n", hfsmp->vcbVN);
+#endif
+ error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+ if (error) {
+ printf("hfs_recording_suspend: err %d opening btree\n", error);
+ goto out;
+ }
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ goto out;
+ }
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ goto end_transaction;
+ }
+
+ save_btree_user_info(hfsmp);
+
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+end_transaction:
+ hfs_end_transaction(hfsmp);
+
+out:
+ if (hfsmp->hfc_filevp) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+ if (hotdata) {
+ hfs_free(hotdata, hotdata->size);
+ hfsmp->hfc_recdata = NULL;
+ }
+ hfsmp->hfc_stage = HFC_DISABLED;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ return (error);
+}
+
+
+static void
+reset_file_ids(struct hfsmount *hfsmp, uint32_t *fileid_table, int num_ids)
+{
+ int i, error;
+
+ for(i=0; i < num_ids; i++) {
+ struct vnode *vp;
+
+ error = hfs_vget(hfsmp, fileid_table[i], &vp, 0, 0);
+ if (error) {
+ if (error == ENOENT) {
+ error = 0;
+ continue; /* stale entry, go to next */
+ }
+ continue;
+ }
+
+ // hfs_vget returns a locked cnode so no need to lock here
+
+ if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+ error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, NULL);
+ }
+
+ /*
+ * The updates to the catalog must be journaled
+ */
+ hfs_start_transaction(hfsmp);
+
+ //
+ // turn off _all_ the hotfile related bits since we're resetting state
+ //
+ if (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
+ vnode_clearfastdevicecandidate(vp);
+ }
+
+ VTOC(vp)->c_attr.ca_recflags &= ~(kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+ VTOC(vp)->c_flag |= C_MODIFIED;
+
+ hfs_update(vp, 0);
+
+ hfs_end_transaction(hfsmp);
+
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ }
+}
+
+static int
+flag_hotfile(struct hfsmount *hfsmp, const char *filename)
+{
+ struct vnode *dvp = NULL, *fvp = NULL;
+ vfs_context_t ctx = vfs_context_kernel();
+ int error=0;
+ size_t fname_len;
+ const char *orig_fname = filename;
+
+ if (filename == NULL) {
+ return EINVAL;
+ }
+
+ fname_len = strlen(filename); // do NOT include the trailing '\0' so that we break out of the loop below
+
+ error = hfs_vfs_root(HFSTOVFS(hfsmp), &dvp, ctx);
+ if (error) {
+ return (error);
+ }
+
+ /* At this point, 'dvp' must be considered iocounted */
+ const char *ptr;
+ ptr = filename;
+
+ while (ptr < (orig_fname + fname_len - 1)) {
+ for(; ptr < (orig_fname + fname_len) && *ptr && *ptr != '/'; ptr++) {
+ /* just keep advancing till we reach the end of the string or a slash */
+ }
+
+ struct componentname cname = {
+ .cn_nameiop = LOOKUP,
+ .cn_flags = ISLASTCN,
+ .cn_pnbuf = __DECONST(char *, orig_fname),
+ .cn_nameptr = __DECONST(char *, filename),
+ .cn_pnlen = fname_len,
+ .cn_namelen = ptr - filename
+ };
+
+ struct vnop_lookup_args ap = {
+ .a_dvp = dvp,
+ .a_vpp = &fvp,
+ .a_cnp = &cname,
+ .a_context = ctx
+ };
+
+ error = hfs_vnop_lookup(&ap);
+ if (error) {
+ /*
+ * If 'dvp' is non-NULL, then it has an iocount. Make sure to release it
+ * before bailing out. VNOP_LOOKUP could legitimately return ENOENT
+ * if the item didn't exist or if we raced with a delete.
+ */
+ if (dvp) {
+ vnode_put(dvp);
+ dvp = NULL;
+ }
+ return error;
+ }
+
+ if (ptr < orig_fname + fname_len - 1) {
+ //
+ // we've got a multi-part pathname so drop the ref on the dir,
+ // make dvp become what we just looked up, and advance over
+ // the slash character in the pathname to get to the next part
+ // of the component
+ //
+ vnode_put(dvp);
+ dvp = fvp;
+ fvp = NULL;
+
+ filename = ++ptr; // skip the slash character
+ }
+ }
+
+ if (fvp == NULL) {
+ error = ENOENT;
+ goto out;
+ }
+
+ struct cnode *cp = VTOC(fvp);
+ if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+ goto out;
+ }
+
+ hfs_start_transaction(hfsmp);
+
+ cp->c_attr.ca_recflags |= (kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+ cp->c_flag |= C_MODIFIED;
+
+ hfs_update(fvp, 0);
+
+ hfs_end_transaction(hfsmp);
+
+ hfs_unlock(cp);
+ //printf("hfs: flagged /%s with the fast-dev-candidate|auto-candidate flags\n", filename);
+
+
+out:
+ if (fvp) {
+ vnode_put(fvp);
+ fvp = NULL;
+ }
+
+ if (dvp) {
+ vnode_put(dvp);
+ dvp = NULL;
+ }
+
+ return error;
+}
+
+
+static void
+hfs_setup_default_cf_hotfiles(struct hfsmount *hfsmp)
+{
+ const char *system_default_hotfiles[] = {
+ "usr",
+ "System",
+ "Applications",
+ "private/var/db/dyld"
+ };
+ int i;
+
+ for(i=0; i < (int)(sizeof(system_default_hotfiles)/sizeof(char *)); i++) {
+ flag_hotfile(hfsmp, system_default_hotfiles[i]);
+ }
+}
+
+
+#define NUM_FILE_RESET_IDS 4096 // so we allocate 16k to hold file-ids
+
+static void
+hfs_hotfile_reset(struct hfsmount *hfsmp)
+{
+ CatalogKey * keyp;
+ CatalogRecord * datap;
+ u_int32_t dataSize;
+ BTScanState scanstate;
+ BTreeIterator * iterator = NULL;
+ FSBufferDescriptor record;
+ u_int32_t data;
+ u_int32_t cnid;
+ int error = 0;
+ uint32_t *fileids=NULL;
+ int cur_id_index = 0;
+
+ int cleared = 0; /* debug variables */
+ int filecount = 0;
+ int dircount = 0;
+
+#if HFC_VERBOSE
+ printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
+#endif
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+ fileids = hfs_malloc(NUM_FILE_RESET_IDS * sizeof(uint32_t));
+
+ record.bufferAddress = &data;
+ record.itemSize = sizeof(u_int32_t);
+ record.itemCount = 1;
+
+ /*
+ * Get ready to scan the Catalog file.
+ */
+ error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+ kCatSearchBufferSize, &scanstate);
+ if (error) {
+ printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+ goto out;
+ }
+
+ /*
+ * Visit all the catalog btree leaf records, clearing any that have the
+ * HotFileCached bit set.
+ */
+ for (;;) {
+ error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
+ if (error) {
+ if (error == btNotFound)
+ error = 0;
+ else
+ printf("hfs_hotfile_reset: err %d BTScanNext\n", error);
+ break;
+ }
+
+ if (datap->recordType == kHFSPlusFolderRecord && (dataSize == sizeof(HFSPlusCatalogFolder))) {
+ HFSPlusCatalogFolder *dirp = (HFSPlusCatalogFolder *)datap;
+
+ dircount++;
+
+ if ((dirp->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+ continue;
+ }
+
+ cnid = dirp->folderID;
+ } else if ((datap->recordType == kHFSPlusFileRecord) && (dataSize == sizeof(HFSPlusCatalogFile))) {
+ HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)datap;
+
+ filecount++;
+
+ /*
+ * If the file doesn't have any of the HotFileCached bits set, ignore it.
+ */
+ if ((filep->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+ continue;
+ }
+
+ cnid = filep->fileID;
+ } else {
+ continue;
+ }
+
+ /* Skip over journal files. */
+ if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
+ continue;
+ }
+
+ //
+ // Just record the cnid of the file for now. We will modify it separately
+ // because we can't modify the catalog while we're scanning it.
+ //
+ fileids[cur_id_index++] = cnid;
+ if (cur_id_index >= NUM_FILE_RESET_IDS) {
+ //
+ // We're over the limit of file-ids so we have to terminate this
+ // scan, go modify all the catalog records, then restart the scan.
+ // This is required because it's not permissible to modify the
+ // catalog while scanning it.
+ //
+ (void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+ reset_file_ids(hfsmp, fileids, cur_id_index);
+ cleared += cur_id_index;
+ cur_id_index = 0;
+
+ // restart the scan
+ error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+ kCatSearchBufferSize, &scanstate);
+ if (error) {
+ printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+ goto out;
+ }
+ continue;
+ }
+ }
+
+ if (cur_id_index) {
+ reset_file_ids(hfsmp, fileids, cur_id_index);
+ cleared += cur_id_index;
+ cur_id_index = 0;
+ }
+
+ printf("hfs: cleared HotFileCache related bits on %d files out of %d (dircount %d)\n", cleared, filecount, dircount);
+
+ (void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+out:
+ hfs_free(fileids, NUM_FILE_RESET_IDS * sizeof(uint32_t));
+ hfs_free(iterator, sizeof(*iterator));
+
+ //
+ // If the hotfile btree exists, delete it. We need to open
+ // it to be able to delete it because we need the hfc_filevp
+ // for deletion.
+ //
+ error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+ if (!error) {
+ printf("hfs: hotfile_reset: deleting existing hotfile btree\n");
+ hfc_btree_delete(hfsmp);
+ }
+
+ if (hfsmp->hfc_filevp) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+
+ hfsmp->hfs_hotfile_blk_adjust = 0;
+ hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+}
+
+
+//
+// This should ONLY be called by hfs_recording_init() and the special fsctl.
+//
+// We assume that the hotfile btree is already opened.
+//
+static int
+hfs_hotfile_repin_files(struct hfsmount *hfsmp)
+{
+ BTreeIterator * iterator = NULL;
+ HotFileKey * key;
+ filefork_t * filefork;
+ int error = 0;
+ int bt_op;
+ enum hfc_stage stage;
+ uint32_t pinned_blocks;
+ uint32_t num_files=0, nrsrc=0;
+ uint32_t total_pinned=0;
+
+ if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || !hfsmp->hfc_filevp) {
+ //
+ // this is only meaningful if we're pinning hotfiles
+ // (as opposed to the regular form of hotfiles that
+ // get relocated to the hotfile zone)
+ //
+ return 0;
+ }
+
+#if HFC_VERBOSE
+ printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
+#endif
+
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ return (EPERM);
+ }
+
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+ stage = hfsmp->hfc_stage;
+ hfsmp->hfc_stage = HFC_BUSY;
+
+ bt_op = kBTreeFirstRecord;
+
+ key = (HotFileKey*) &iterator->key;
+
+ filefork = VTOF(hfsmp->hfc_filevp);
+ int lockflags;
+
+ while (1) {
+
+ lockflags = 0;
+ /*
+ * Obtain the first record (ie the coldest one).
+ */
+ if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
+ // no more records
+ error = 0;
+ break;
+ }
+ if (key->keyLength != HFC_KEYLENGTH) {
+ // printf("hfs: hotfiles_repin_files: invalid key length %d\n", key->keyLength);
+ error = EFTYPE;
+ break;
+ }
+ if (key->temperature == HFC_LOOKUPTAG) {
+ // ran into thread records in the hotfile btree
+ error = 0;
+ break;
+ }
+
+ //
+ // Just lookup the records in the catalog and pin the direct
+ // mapped extents. Faster than instantiating full vnodes
+ // (and thereby thrashing the system vnode cache).
+ //
+ struct cat_desc fdesc;
+ struct cat_attr attr;
+ struct cat_fork fork;
+ uint8_t forktype = 0;
+
+ lockflags = hfs_systemfile_lock(hfsmp, (SFL_CATALOG | SFL_EXTENTS), HFS_SHARED_LOCK);
+ /*
+ * Snoop the cnode hash to find out if the item we want is in-core already.
+ *
+ * We largely expect this function to fail (the items we want are probably not in the hash).
+ * we use the special variant which bails out as soon as it finds a vnode (even if it is
+ * marked as open-unlinked or actually removed on-disk. If we find a vnode, then we
+ * release the systemfile locks and go through the pin-vnode path instead.
+ */
+ if (hfs_chash_snoop (hfsmp, key->fileID, 1, NULL, NULL) == 0) {
+ pinned_blocks = 0;
+
+ /* unlock immediately and go through the in-core path */
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ lockflags = 0;
+
+ error = hfs_getvnode_and_pin (hfsmp, key->fileID, &pinned_blocks);
+ if (error) {
+ /* if ENOENT, then it was deleted in the catalog. Remove from our hotfiles tracking */
+ if (error == ENOENT) {
+ hfc_btree_delete_record(hfsmp, iterator, key);
+ }
+ /* other errors, just ignore and move on with life */
+ }
+ else { //!error
+ total_pinned += pinned_blocks;
+ num_files++;
+ }
+
+ goto next;
+ }
+
+ /* If we get here, we're still holding the systemfile locks */
+ error = cat_idlookup(hfsmp, key->fileID, 1, 0, &fdesc, &attr, &fork);
+ if (error) {
+ //
+ // this file system could have been mounted while booted from a
+ // different partition and thus the hotfile btree would not have
+ // been maintained. thus a file that was hotfile cached could
+ // have been deleted while booted from a different partition which
+ // means we need to delete it from the hotfile btree.
+ //
+ // block accounting is taken care of at the end: we re-assign
+ // hfsmp->hfs_hotfile_freeblks based on how many blocks we actually
+ // pinned.
+ //
+ hfc_btree_delete_record(hfsmp, iterator, key);
+
+ goto next;
+ }
+
+ if (fork.cf_size == 0) {
+ // hmmm, the data is probably in the resource fork (aka a compressed file)
+ error = cat_idlookup(hfsmp, key->fileID, 1, 1, &fdesc, &attr, &fork);
+ if (error) {
+ hfc_btree_delete_record(hfsmp, iterator, key);
+ goto next;
+ }
+ forktype = 0xff;
+ nrsrc++;
+ }
+
+ pinned_blocks = 0;
+
+ /* Can't release the catalog /extents lock yet, we may need to go find the overflow blocks */
+ error = hfs_pin_extent_record (hfsmp, fork.cf_extents, &pinned_blocks);
+ if (error) {
+ goto next; //skip to next
+ }
+ /* add in the blocks from the inline 8 */
+ total_pinned += pinned_blocks;
+ pinned_blocks = 0;
+
+ /* Could this file have overflow extents? */
+ if (fork.cf_extents[kHFSPlusExtentDensity-1].startBlock) {
+ /* better pin them, too */
+ error = hfs_pin_overflow_extents (hfsmp, key->fileID, forktype, &pinned_blocks);
+ if (error) {
+ /* If we fail to pin all of the overflow extents, then just skip to the next file */
+ goto next;
+ }
+ }
+
+ num_files++;
+ if (pinned_blocks) {
+ /* now add in any overflow also */
+ total_pinned += pinned_blocks;
+ }
+
+ next:
+ if (lockflags) {
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ lockflags = 0;
+ }
+ bt_op = kBTreeNextRecord;
+
+ } /* end while */
+
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_repin_files: re-pinned %d files (nrsrc %d, total pinned %d blks; freeblock %d, maxblocks %d, calculated free: %d)\n",
+ num_files, nrsrc, total_pinned, hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks,
+ hfsmp->hfs_hotfile_maxblks - total_pinned);
+#endif
+ //
+ // make sure this is accurate based on how many blocks we actually pinned
+ //
+ hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - total_pinned;
+
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+ hfs_free(iterator, sizeof(*iterator));
+ hfsmp->hfc_stage = stage;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+ return (error);
+}
+
+void
+hfs_repin_hotfiles(struct hfsmount *hfsmp)
+{
+ int error, need_close;
+
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+
+ if (hfsmp->hfc_filevp == NULL) {
+ error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+ if (!error) {
+ need_close = 1;
+ } else {
+ printf("hfs: failed to open the btree err=%d. Unable to re-pin hotfiles.\n", error);
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ return;
+ }
+ } else {
+ need_close = 0;
+ }
+
+ hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL);
+
+ hfs_hotfile_repin_files(hfsmp);
+
+ if (need_close) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+}
+
+/*
+ * For a given file ID, find and pin all of its overflow extents to the underlying CS
+ * device. Assumes that the extents overflow b-tree is locked for the duration of this call.
+ *
+ * Emit the number of blocks pinned in output argument 'pinned'
+ *
+ * Return success or failure (errno) in return value.
+ *
+ */
+int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+ uint8_t forktype, uint32_t *pinned) {
+
+ struct BTreeIterator *ext_iter = NULL;
+ ExtentKey *ext_key_ptr = NULL;
+ ExtentRecord ext_data;
+ FSBufferDescriptor btRecord;
+ uint16_t btRecordSize;
+ int error = 0;
+
+ uint32_t pinned_blocks = 0;
+
+
+ ext_iter = hfs_mallocz(sizeof (*ext_iter));
+
+ BTInvalidateHint (ext_iter);
+ ext_key_ptr = (ExtentKey*)&ext_iter->key;
+ btRecord.bufferAddress = &ext_data;
+ btRecord.itemCount = 1;
+
+ /*
+ * This is like when you delete a file; we don't actually need most of the search machinery because
+ * we are going to need all of the extent records that belong to this file (for a given fork type),
+ * so we might as well use a straight-up iterator.
+ *
+ * Position the B-Tree iterator at the first record with this file ID
+ */
+ btRecord.itemSize = sizeof (HFSPlusExtentRecord);
+ ext_key_ptr->hfsPlus.keyLength = kHFSPlusExtentKeyMaximumLength;
+ ext_key_ptr->hfsPlus.forkType = forktype;
+ ext_key_ptr->hfsPlus.pad = 0;
+ ext_key_ptr->hfsPlus.fileID = fileid;
+ ext_key_ptr->hfsPlus.startBlock = 0;
+
+ error = BTSearchRecord (VTOF(hfsmp->hfs_extents_vp), ext_iter, &btRecord, &btRecordSize, ext_iter);
+ if (error == btNotFound) {
+ /* empty b-tree, so that's ok. we'll fall out during error check below. */
+ error = 0;
+ }
+
+ while (1) {
+ uint32_t found_fileid;
+ uint32_t pblocks;
+
+ error = BTIterateRecord (VTOF(hfsmp->hfs_extents_vp), kBTreeNextRecord, ext_iter, &btRecord, &btRecordSize);
+ if (error) {
+ /* swallow it if it's btNotFound, otherwise just bail out */
+ if (error == btNotFound)
+ error = 0;
+ break;
+ }
+
+ found_fileid = ext_key_ptr->hfsPlus.fileID;
+ /*
+ * We only do one fork type at a time. So if either the fork-type doesn't
+ * match what we are looking for (resource or data), OR the file id doesn't match
+ * which indicates that there's nothing more with this file ID as the key, then bail out
+ */
+ if ((found_fileid != fileid) || (ext_key_ptr->hfsPlus.forkType != forktype)) {
+ error = 0;
+ break;
+ }
+
+ /* Otherwise, we now have an extent record. Process and pin all of the file extents. */
+ pblocks = 0;
+ error = hfs_pin_extent_record (hfsmp, ext_data.hfsPlus, &pblocks);
+
+ if (error) {
+ break;
+ }
+ pinned_blocks += pblocks;
+
+ /* if 8th extent is empty, then bail out */
+ if (ext_data.hfsPlus[kHFSPlusExtentDensity-1].startBlock == 0) {
+ error = 0;
+ break;
+ }
+
+ } // end extent-getting loop
+
+ /* dump the iterator */
+ hfs_free(ext_iter, sizeof(*ext_iter));
+
+ if (error == 0) {
+ /*
+ * In the event that the file has no overflow extents, pinned_blocks
+ * will never be updated, so we'll properly export 0 pinned blocks to caller
+ */
+ *pinned = pinned_blocks;
+ }
+
+ return error;
+
+}
+
+
+static int
+hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned) {
+ struct vnode *vp;
+ int error = 0;
+ *pinned = 0;
+ uint32_t pblocks;
+
+ /*
+ * Acquire the vnode for this file. This returns a locked cnode on success
+ */
+ error = hfs_vget(hfsmp, fileid, &vp, 0, 0);
+ if (error) {
+ /* It's possible the file was open-unlinked. In this case, we'll get ENOENT back. */
+ return error;
+ }
+
+ /*
+ * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck
+ * here. We do not want to move them.
+ */
+ if (!vnode_isreg(vp)) {
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ return EPERM;
+ }
+
+ if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ return EINVAL;
+ }
+
+ error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pblocks);
+ if (error == 0) {
+ *pinned = pblocks;
+ }
+
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+
+ return error;
+
+}
+
+/*
+ * Pins an HFS Extent record to the underlying CoreStorage. Assumes that Catalog & Extents overflow
+ * B-trees are held locked, as needed.
+ *
+ * Returns the number of blocks pinned in the output argument 'pinned'
+ *
+ * Returns error status (0 || errno) in return value.
+ */
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned) {
+ uint32_t pb = 0;
+ int i;
+ int error = 0;
+
+ if (pinned == NULL) {
+ return EINVAL;
+ }
+ *pinned = 0;
+
+
+
+ /* iterate through the extents */
+ for ( i = 0; i < kHFSPlusExtentDensity; i++) {
+ if (extents[i].startBlock == 0) {
+ break;
+ }
+
+ error = hfs_pin_block_range (hfsmp, HFS_PIN_IT, extents[i].startBlock,
+ extents[i].blockCount);
+
+ if (error) {
+ break;
+ }
+ pb += extents[i].blockCount;
+ }
+
+ *pinned = pb;
+
+ return error;
+}
+
+/*
+ * Consume an HFS Plus on-disk catalog record and pin its blocks
+ * to the underlying CS devnode.
+ *
+ * NOTE: This is an important distinction!
+ * This function takes in an HFSPlusCatalogFile* which is the actual
+ * 200-some-odd-byte on-disk representation in the Catalog B-Tree (not
+ * one of the run-time structs that we normally use.
+ *
+ * This assumes that the catalog and extents-overflow btrees
+ * are locked, at least in shared mode
+ */
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc) {
+ uint32_t pinned_blocks = 0;
+ HFSPlusForkData *forkdata;
+ int error = 0;
+ uint8_t forktype = 0;
+
+ if (rsrc) {
+ forkdata = &cfp->resourceFork;
+ forktype = 0xff;
+ }
+ else {
+ forkdata = &cfp->dataFork;
+ }
+
+ uint32_t pblocks = 0;
+
+ /* iterate through the inline extents */
+ error = hfs_pin_extent_record (hfsmp, forkdata->extents, &pblocks);
+ if (error) {
+ return error;
+ }
+
+ pinned_blocks += pblocks;
+ pblocks = 0;
+
+ /* it may have overflow extents */
+ if (forkdata->extents[kHFSPlusExtentDensity-1].startBlock != 0) {
+ error = hfs_pin_overflow_extents (hfsmp, cfp->fileID, forktype, &pblocks);
+ }
+ pinned_blocks += pblocks;
+
+ hfsmp->hfs_hotfile_freeblks -= pinned_blocks;
+
+ return error;
+}
+
+
+/*
+ *
+ */
+int
+hfs_recording_init(struct hfsmount *hfsmp)
+{
+ CatalogKey * keyp;
+ CatalogRecord * datap;
+ u_int32_t dataSize;
+ HFSPlusCatalogFile *filep;
+ BTScanState scanstate;
+ BTreeIterator * iterator = NULL;
+ FSBufferDescriptor record;
+ HotFileKey * key;
+ filefork_t * filefork;
+ u_int32_t data;
+ struct cat_attr cattr;
+ u_int32_t cnid;
+ int error = 0;
+ long starting_temp;
+
+ int started_tr = 0;
+ int started_scan = 0;
+
+ int inserted = 0; /* debug variables */
+ int filecount = 0;
+ int uncacheable = 0;
+
+ /*
+ * For now, only the boot volume is supported.
+ */
+ if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
+ hfsmp->hfc_stage = HFC_DISABLED;
+ return (EPERM);
+ }
+
+ /* We grab the HFC mutex even though we're not fully mounted yet, just for orderliness */
+ lck_mtx_lock (&hfsmp->hfc_mutex);
+
+ /*
+ * Tracking of hot files requires up-to-date access times.
+ * So if access time updates are disabled, then we disable
+ * hot files, too.
+ */
+ if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
+ hfsmp->hfc_stage = HFC_DISABLED;
+ lck_mtx_unlock (&hfsmp->hfc_mutex);
+ return EPERM;
+ }
+
+ //
+ // Check if we've been asked to suspend operation
+ //
+ cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-suspend", &cattr, NULL);
+ if (cnid != 0) {
+ printf("hfs: %s: %s: hotfiles explicitly disabled! remove /.hotfiles-suspend to re-enable\n", hfsmp->vcbVN, __FUNCTION__);
+ hfsmp->hfc_stage = HFC_DISABLED;
+ lck_mtx_unlock (&hfsmp->hfc_mutex);
+ return EPERM;
+ }
+
+ //
+ // Check if we've been asked to reset our state.
+ //
+ cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-reset", &cattr, NULL);
+ if (cnid != 0) {
+ hfs_hotfile_reset(hfsmp);
+ }
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ //
+ // Cooperative Fusion (CF) systems use different constants
+ // than traditional hotfile systems. These were picked after a bit of
+ // experimentation - we can cache many more files on the
+ // ssd in an CF system and we can do so more rapidly
+ // so bump the limits considerably (and turn down the
+ // duration so that it doesn't take weeks to adopt all
+ // the files).
+ //
+ hfc_default_file_count = 20000;
+ hfc_default_duration = 300; // 5min
+ hfc_max_file_count = 50000;
+ hfc_max_file_size = (512ULL * 1024ULL * 1024ULL);
+ }
+
+ /*
+ * If the Hot File btree exists then metadata zone is ready.
+ */
+ cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
+ if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
+ int recreate = 0;
+
+ if (hfsmp->hfc_stage == HFC_DISABLED)
+ hfsmp->hfc_stage = HFC_IDLE;
+ hfsmp->hfs_hotfile_freeblks = 0;
+
+ if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && cattr.ca_blocks > 0) {
+ //
+ // make sure the hotfile btree is pinned
+ //
+ error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+ if (!error) {
+ /* XXX: must fix hfs_pin_vnode too */
+ hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL);
+
+ } else {
+ printf("hfs: failed to open the btree err=%d. Recreating hotfile btree.\n", error);
+ recreate = 1;
+ }
+
+ hfs_hotfile_repin_files(hfsmp);
+
+ if (hfsmp->hfc_filevp) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+
+ } else if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ // hmmm, the hotfile btree is zero bytes long? how odd. let's recreate it.
+ printf("hfs: hotfile btree is zero bytes long?! recreating it.\n");
+ recreate = 1;
+ }
+
+ if (!recreate) {
+ /* don't forget to unlock the mutex */
+ lck_mtx_unlock (&hfsmp->hfc_mutex);
+ return (0);
+ } else {
+ //
+ // open the hotfile btree file ignoring errors because
+ // we need the vnode pointer for hfc_btree_delete() to
+ // be able to do its work
+ //
+ error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+ if (!error) {
+ // and delete it!
+ error = hfc_btree_delete(hfsmp);
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+ }
+ }
+
+ printf("hfs: %s: %s: creating the hotfile btree\n", hfsmp->vcbVN, __FUNCTION__);
+ if (hfs_start_transaction(hfsmp) != 0) {
+ lck_mtx_unlock (&hfsmp->hfc_mutex);
+ return EINVAL;
+ }
+
+ /* B-tree creation must be journaled */
+ started_tr = 1;
+
+ error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
+ if (error) {
+#if HFC_VERBOSE
+ printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+ goto recording_init_out;
+ }
+
+ hfs_end_transaction (hfsmp);
+ started_tr = 0;
+ /*
+ * Do a journal flush + flush track cache. We have to ensure that the async I/Os have been issued to the media
+ * before proceeding.
+ */
+ hfs_flush (hfsmp, HFS_FLUSH_FULL);
+
+ /* now re-start a new transaction */
+ if (hfs_start_transaction (hfsmp) != 0) {
+ lck_mtx_unlock (&hfsmp->hfc_mutex);
+ return EINVAL;
+ }
+ started_tr = 1;
+
+ /*
+ * Open the Hot File B-tree file for writing.
+ */
+ if (hfsmp->hfc_filevp)
+ panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+ error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+ if (error) {
+#if HFC_VERBOSE
+ printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+ goto recording_init_out;
+ }
+
+ /*
+ * This function performs work similar to namei; we must NOT hold the catalog lock while
+ * calling it. This will decorate catalog records as being pinning candidates. (no hotfiles work)
+ */
+ hfs_setup_default_cf_hotfiles(hfsmp);
+
+ /*
+ * now grab the hotfiles b-tree vnode/cnode lock first, as it is not classified as a systemfile.
+ */
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ error = EPERM;
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ /* zero it out to avoid pinning later on */
+ hfsmp->hfc_filevp = NULL;
+ goto recording_init_out;
+ }
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+ key = (HotFileKey*) &iterator->key;
+ key->keyLength = HFC_KEYLENGTH;
+
+ record.bufferAddress = &data;
+ record.itemSize = sizeof(u_int32_t);
+ record.itemCount = 1;
+
+#if HFC_VERBOSE
+ printf("hfs: Evaluating space for \"%s\" metadata zone... (freeblks %d)\n", HFSTOVCB(hfsmp)->vcbVN,
+ hfsmp->hfs_hotfile_freeblks);
+#endif
+
+ /*
+ * Get ready to scan the Catalog file. We explicitly do NOT grab the catalog lock because
+ * we're fully single-threaded at the moment (by virtue of being called during mount()),
+ * and if we have to grow the hotfile btree, then we would need to grab the catalog lock
+ * and if we take a shared lock here, it would deadlock (see <rdar://problem/21486585>)
+ *
+ * We already started a transaction so we should already be holding the journal lock at this point.
+ * Note that we have to hold the journal lock / start a txn BEFORE the systemfile locks.
+ */
+
+ error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+ kCatSearchBufferSize, &scanstate);
+ if (error) {
+ printf("hfs_recording_init: err %d BTScanInit\n", error);
+
+ /* drop the systemfile locks */
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+
+ /* zero it out to avoid pinning */
+ hfsmp->hfc_filevp = NULL;
+ goto recording_init_out;
+ }
+
+ started_scan = 1;
+
+ filefork = VTOF(hfsmp->hfc_filevp);
+
+ starting_temp = random() % HF_TEMP_RANGE;
+
+ /*
+ * Visit all the catalog btree leaf records. We have to hold the catalog lock to do this.
+ *
+ * NOTE: The B-Tree scanner reads from the media itself. Under normal circumstances it would be
+ * fine to simply use b-tree routines to read blocks that correspond to b-tree nodes, because the
+ * block cache is going to ensure you always get the cached copy of a block (even if a journal
+ * txn has modified one of those blocks). That is NOT true when
+ * using the scanner. In particular, it will always read whatever is on-disk. So we have to ensure
+ * that the journal has flushed and that the async I/Os to the metadata files have been issued.
+ */
+ for (;;) {
+ error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
+ if (error) {
+ if (error == btNotFound)
+ error = 0;
+ else
+ printf("hfs_recording_init: err %d BTScanNext\n", error);
+ break;
+ }
+ if ((datap->recordType != kHFSPlusFileRecord) ||
+ (dataSize != sizeof(HFSPlusCatalogFile))) {
+ continue;
+ }
+ filep = (HFSPlusCatalogFile *)datap;
+ filecount++;
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ if (filep->flags & kHFSDoNotFastDevPinMask) {
+ uncacheable++;
+ }
+
+ //
+ // If the file does not have the FastDevPinnedMask set, we
+ // can ignore it and just go to the next record.
+ //
+ if ((filep->flags & kHFSFastDevPinnedMask) == 0) {
+ continue;
+ }
+ } else if (filep->dataFork.totalBlocks == 0) {
+ continue;
+ }
+
+ /*
+ * On a regular hdd, any file that has blocks inside
+ * the hot file space is recorded for later eviction.
+ *
+ * For now, resource forks are ignored.
+ *
+ * We don't do this on CF systems as there is no real
+ * hotfile area - we just pin/unpin blocks belonging to
+ * interesting files.
+ */
+ if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && !hotextents(hfsmp, &filep->dataFork.extents[0])) {
+ continue;
+ }
+ cnid = filep->fileID;
+
+ /* Skip over journal files and the hotfiles B-Tree file. */
+ if (cnid == hfsmp->hfs_jnlfileid
+ || cnid == hfsmp->hfs_jnlinfoblkid
+ || cnid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
+ continue;
+ }
+ /*
+ * XXX - need to skip quota files as well.
+ */
+
+ uint32_t temp;
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ int rsrc = 0;
+
+ temp = (uint32_t)starting_temp++;
+ if (filep->flags & kHFSAutoCandidateMask) {
+ temp += MAX_NORMAL_TEMP;
+ }
+
+ /* use the data fork by default */
+ if (filep->dataFork.totalBlocks == 0) {
+ /*
+ * but if empty, switch to rsrc as its likely
+ * a compressed file
+ */
+ rsrc = 1;
+ }
+
+ error = hfs_pin_catalog_rec (hfsmp, filep, rsrc);
+ if (error)
+ break;
+
+ } else {
+ temp = HFC_MINIMUM_TEMPERATURE;
+ }
+
+ /* Insert a hot file entry. */
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = temp;
+ key->fileID = cnid;
+ key->forkType = 0;
+ data = 0x3f3f3f3f;
+ error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+ if (error) {
+ printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+ error = MacToVFSError(error);
+ break;
+ }
+
+ /* Insert the corresponding thread record. */
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = HFC_LOOKUPTAG;
+ key->fileID = cnid;
+ key->forkType = 0;
+ data = temp;
+ error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+ if (error) {
+ printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+ error = MacToVFSError(error);
+ break;
+ }
+ inserted++;
+ } // end catalog iteration loop
+
+ save_btree_user_info(hfsmp);
+ (void) BTFlushPath(filefork);
+
+recording_init_out:
+
+ /* Unlock first, then pin after releasing everything else */
+ if (hfsmp->hfc_filevp) {
+ hfs_unlock (VTOC(hfsmp->hfc_filevp));
+ }
+
+ if (started_scan) {
+ (void) BTScanTerminate (&scanstate, &data, &data, &data);
+ }
+
+ if (started_tr) {
+ hfs_end_transaction(hfsmp);
+ }
+
+#if HFC_VERBOSE
+ printf("hfs: %d files identified out of %d (freeblocks is now: %d)\n", inserted, filecount, hfsmp->hfs_hotfile_freeblks);
+ if (uncacheable) {
+ printf("hfs: %d files were marked as uncacheable\n", uncacheable);
+ }
+#endif
+
+ if (iterator)
+ hfs_free(iterator, sizeof(*iterator));
+
+ if (hfsmp->hfc_filevp) {
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL);
+ }
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+
+ if (error == 0)
+ hfsmp->hfc_stage = HFC_IDLE;
+
+ /* Finally, unlock the HFC mutex */
+ lck_mtx_unlock (&hfsmp->hfc_mutex);
+
+ return (error);
+}
+
+/*
+ * Use sync to perform ocassional background work.
+ */
+int
+hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
+{
+ if (hfsmp->hfc_stage) {
+ struct timeval tv;
+
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+
+ switch (hfsmp->hfc_stage) {
+ case HFC_IDLE:
+ (void) hfs_recording_start(hfsmp);
+ break;
+
+ case HFC_RECORDING:
+ microtime(&tv);
+ if (tv.tv_sec > hfsmp->hfc_timeout)
+ (void) hfs_recording_stop(hfsmp);
+ break;
+
+ case HFC_EVICTION:
+ (void) hotfiles_evict(hfsmp, ctx);
+ break;
+
+ case HFC_ADOPTION:
+ (void) hotfiles_adopt(hfsmp);
+ break;
+ default:
+ break;
+ }
+
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ }
+ return (0);
+}
+
+/*
+ * Add a hot file to the recording list.
+ *
+ * This can happen when a hot file gets reclaimed or at the
+ * end of the recording period for any active hot file.
+ *
+ * NOTE: Since both the data and resource fork can be hot,
+ * there can be two entries for the same file id.
+ *
+ * Note: the cnode is locked on entry.
+ */
+int
+hfs_addhotfile(struct vnode *vp)
+{
+ hfsmount_t *hfsmp;
+ int error;
+
+ hfsmp = VTOHFS(vp);
+ if (hfsmp->hfc_stage != HFC_RECORDING)
+ return (0);
+
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+ error = hfs_addhotfile_internal(vp);
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ return (error);
+}
+
+static int
+hf_ignore_process(const char *pname, size_t maxlen)
+{
+ if ( strncmp(pname, "mds", maxlen) == 0
+ || strncmp(pname, "mdworker", maxlen) == 0
+ || strncmp(pname, "mds_stores", maxlen) == 0
+ || strncmp(pname, "makewhatis", maxlen) == 0) {
+ return 1;
+ }
+
+ return 0;
+
+}
+
+static int
+hfs_addhotfile_internal(struct vnode *vp)
+{
+ hotfile_data_t *hotdata;
+ hotfile_entry_t *entry;
+ hfsmount_t *hfsmp;
+ cnode_t *cp;
+ filefork_t *ffp;
+ u_int32_t temperature;
+
+ hfsmp = VTOHFS(vp);
+ if (hfsmp->hfc_stage != HFC_RECORDING)
+ return (0);
+
+ /*
+ * Only regular files are eligible for hotfiles addition.
+ *
+ * Symlinks were previously added to the list and may exist in
+ * extant hotfiles regions, but no new ones will be added, and no
+ * symlinks will now be relocated/evicted from the hotfiles region.
+ */
+ if (!vnode_isreg(vp) || vnode_issystem(vp)) {
+ return (0);
+ }
+
+ /* Skip resource forks for now. */
+ if (VNODE_IS_RSRC(vp)) {
+ return (0);
+ }
+ if ((hotdata = hfsmp->hfc_recdata) == NULL) {
+ return (0);
+ }
+ ffp = VTOF(vp);
+ cp = VTOC(vp);
+
+ if (cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask)) {
+ // it's already a hotfile or can't be a hotfile...
+ return 0;
+ }
+
+ if (vnode_isdir(vp) || vnode_issystem(vp) || (cp->c_flag & (C_DELETED | C_NOEXISTS))) {
+ return 0;
+ }
+
+ if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && vnode_isfastdevicecandidate(vp)) {
+ //
+ // On cooperative fusion (CF) systems we have different criteria for whether something
+ // can be pinned to the ssd.
+ //
+ if (cp->c_flag & (C_DELETED|C_NOEXISTS)) {
+ //
+ // dead files are definitely not worth caching
+ //
+ return 0;
+ } else if (ffp->ff_blocks == 0 && !(cp->c_bsdflags & UF_COMPRESSED) && !(cp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) {
+ //
+ // empty files aren't worth caching but compressed ones might be, as are
+ // newly created files that live in WorthCaching directories...
+ //
+ return 0;
+ }
+
+ char pname[256];
+ pname[0] = '\0';
+ proc_selfname(pname, sizeof(pname));
+ if (hf_ignore_process(pname, sizeof(pname))) {
+ // ignore i/o's from certain system daemons
+ return 0;
+ }
+
+ temperature = cp->c_fileid; // in memory we just keep it sorted by file-id
+ } else {
+ // the normal hard drive based hotfile checks
+ if ((ffp->ff_bytesread == 0) ||
+ (ffp->ff_blocks == 0) ||
+ (ffp->ff_size == 0) ||
+ (ffp->ff_blocks > hotdata->maxblocks) ||
+ (cp->c_bsdflags & (UF_NODUMP | UF_COMPRESSED)) ||
+ (cp->c_atime < hfsmp->hfc_timebase)) {
+ return (0);
+ }
+
+ temperature = ffp->ff_bytesread / ffp->ff_size;
+ if (temperature < hotdata->threshold) {
+ return (0);
+ }
+ }
+
+ /*
+ * If there is room or this file is hotter than
+ * the coldest one then add it to the list.
+ *
+ */
+ if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
+ (hotdata->coldest == NULL) ||
+ (temperature >= hotdata->coldest->temperature)) {
+ ++hotdata->refcount;
+ entry = hf_getnewentry(hotdata);
+ entry->temperature = temperature;
+ entry->fileid = cp->c_fileid;
+ //
+ // if ffp->ff_blocks is zero, it might be compressed so make sure we record
+ // that there's at least one block.
+ //
+ entry->blocks = ffp->ff_blocks ? ffp->ff_blocks : 1;
+ if (hf_insert(hotdata, entry) == EEXIST) {
+ // entry is already present, don't need to add it again
+ entry->right = hotdata->freelist;
+ hotdata->freelist = entry;
+ }
+ --hotdata->refcount;
+ }
+
+ return (0);
+}
+
+/*
+ * Remove a hot file from the recording list.
+ *
+ * This can happen when a hot file becomes
+ * an active vnode (active hot files are
+ * not kept in the recording list until the
+ * end of the recording period).
+ *
+ * Note: the cnode is locked on entry.
+ */
+int
+hfs_removehotfile(struct vnode *vp)
+{
+ hotfile_data_t *hotdata;
+ hfsmount_t *hfsmp;
+ cnode_t *cp;
+ filefork_t *ffp;
+ u_int32_t temperature;
+
+ hfsmp = VTOHFS(vp);
+ if (hfsmp->hfc_stage != HFC_RECORDING)
+ return (0);
+
+ if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
+ return (0);
+ }
+
+ ffp = VTOF(vp);
+ cp = VTOC(vp);
+
+ if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) ||
+ (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) {
+ return (0);
+ }
+
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+ if (hfsmp->hfc_stage != HFC_RECORDING)
+ goto out;
+ if ((hotdata = hfsmp->hfc_recdata) == NULL)
+ goto out;
+
+ temperature = ffp->ff_bytesread / ffp->ff_size;
+ if (temperature < hotdata->threshold)
+ goto out;
+
+ if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) {
+ ++hotdata->refcount;
+ hf_delete(hotdata, VTOC(vp)->c_fileid, temperature);
+ --hotdata->refcount;
+ }
+out:
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ return (0);
+}
+
+int
+hfs_hotfile_deleted(__unused struct vnode *vp)
+{
+#if 1
+ return 0;
+#else
+ //
+ // XXXdbg - this code, while it would work, would introduce a huge inefficiency
+ // to deleting files as the way it's written would require us to open
+ // the hotfile btree on every open, delete two records in it and then
+ // close the hotfile btree (which involves more writes).
+ //
+ // We actually can be lazy about deleting hotfile records for files
+ // that get deleted. When it's time to evict things, if we encounter
+ // a record that references a dead file (i.e. a fileid which no
+ // longer exists), the eviction code will remove the records. Likewise
+ // the code that scans the HotFile B-Tree at boot time to re-pin files
+ // will remove dead records.
+ //
+
+ hotfile_data_t *hotdata;
+ hfsmount_t *hfsmp;
+ cnode_t *cp;
+ filefork_t *filefork;
+ u_int32_t temperature;
+ BTreeIterator * iterator = NULL;
+ FSBufferDescriptor record;
+ HotFileKey *key;
+ u_int32_t data;
+ int error=0;
+
+ cp = VTOC(vp);
+ if (cp == NULL || !(cp->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+ return 0;
+ }
+
+ hfsmp = VTOHFS(vp);
+ if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+ return 0;
+ }
+
+ if (hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) != 0 || hfsmp->hfc_filevp == NULL) {
+ // either there is no hotfile info or it's damaged
+ return EINVAL;
+ }
+
+ filefork = VTOF(hfsmp->hfc_filevp);
+ if (filefork == NULL) {
+ return 0;
+ }
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+ key = (HotFileKey*) &iterator->key;
+
+ record.bufferAddress = &data;
+ record.itemSize = sizeof(u_int32_t);
+ record.itemCount = 1;
+
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = HFC_LOOKUPTAG;
+ key->fileID = cp->c_fileid;
+ key->forkType = 0;
+
+ lck_mtx_lock(&hfsmp->hfc_mutex);
+ (void) BTInvalidateHint(iterator);
+ if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) == 0) {
+ temperature = key->temperature;
+ hfc_btree_delete_record(hfsmp, iterator, key);
+ } else {
+ //printf("hfs: hotfile_deleted: did not find fileid %d\n", cp->c_fileid);
+ error = ENOENT;
+ }
+
+ if ((hotdata = hfsmp->hfc_recdata) != NULL) {
+ // just in case, also make sure it's removed from the in-memory list as well
+ ++hotdata->refcount;
+ hf_delete(hotdata, cp->c_fileid, cp->c_fileid);
+ --hotdata->refcount;
+ }
+
+ lck_mtx_unlock(&hfsmp->hfc_mutex);
+ hfs_free(iterator, sizeof(*iterator));
+
+ hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+
+ return error;
+#endif
+}
+
+int
+hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks)
+{
+ hfsmount_t *hfsmp;
+
+ if (vp == NULL) {
+ return 0;
+ }
+
+ hfsmp = VTOHFS(vp);
+
+ if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || num_blocks == 0 || vp == NULL) {
+ return 0;
+ }
+
+ //
+ // if file is not HotFileCached or it has the CanNotHotFile cache
+ // bit set then there is nothing to do
+ //
+ if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) || (VTOC(vp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) {
+ // it's not a hot file or can't be one so don't bother tracking
+ return 0;
+ }
+
+ OSAddAtomic(num_blocks, &hfsmp->hfs_hotfile_blk_adjust);
+
+ return (0);
+}
+
+//
+// Assumes hfsmp->hfc_mutex is LOCKED
+//
+static int
+hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp)
+{
+ if (hfsmp->hfc_stage < HFC_IDLE) {
+ return 0;
+ }
+
+ int cur_blk_adjust = hfsmp->hfs_hotfile_blk_adjust; // snap a copy of this value
+
+ if (cur_blk_adjust) {
+ OSAddAtomic(-cur_blk_adjust, &hfsmp->hfs_hotfile_blk_adjust);
+ hfsmp->hfs_hotfile_freeblks += cur_blk_adjust;
+ }
+
+ return hfsmp->hfs_hotfile_freeblks;
+}
+
+
+/*
+ *========================================================================
+ * HOT FILE MAINTENANCE ROUTINES
+ *========================================================================
+ */
+
+static int
+hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
+{
+ if ((vnode_isreg(vp)) && !vnode_issystem(vp))
+ (void) hfs_addhotfile_internal(vp);
+
+ return (VNODE_RETURNED);
+}
+
+/*
+ * Add all active hot files to the recording list.
+ */
+static int
+hotfiles_collect(struct hfsmount *hfsmp)
+{
+ struct mount *mp = HFSTOVFS(hfsmp);
+
+ if (vfs_busy(mp, LK_NOWAIT))
+ return (0);
+
+ /*
+ * hotfiles_collect_callback will be called for each vnode
+ * hung off of this mount point
+ * the vnode will be
+ * properly referenced and unreferenced around the callback
+ */
+ vnode_iterate(mp, 0, hotfiles_collect_callback, (void *)NULL);
+
+ vfs_unbusy(mp);
+
+ return (0);
+}
+
+
+/*
+ * Update the data of a btree record
+ * This is called from within BTUpdateRecord.
+ */
+static int
+update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state)
+{
+ if (key->temperature == HFC_LOOKUPTAG)
+ *data = *state;
+ return (0);
+}
+
+/*
+ * Identify files already in hot area.
+ */
+static int
+hotfiles_refine(struct hfsmount *hfsmp)
+{
+ BTreeIterator * iterator = NULL;
+ struct mount *mp;
+ filefork_t * filefork;
+ hotfilelist_t *listp;
+ FSBufferDescriptor record;
+ HotFileKey * key;
+ u_int32_t data;
+ int i;
+ int error = 0;
+
+ if ((listp = hfsmp->hfc_filelist) == NULL)
+ return (0);
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ // on ssd's we don't refine the temperature since the
+ // replacement algorithm is simply random
+ return 0;
+ }
+
+ mp = HFSTOVFS(hfsmp);
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+ key = (HotFileKey*) &iterator->key;
+
+ record.bufferAddress = &data;
+ record.itemSize = sizeof(u_int32_t);
+ record.itemCount = 1;
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ error = EPERM;
+ goto out1;
+ }
+ filefork = VTOF(hfsmp->hfc_filevp);
+
+ for (i = 0; i < listp->hfl_count; ++i) {
+ /*
+ * Check if entry (thread) is already in hot area.
+ */
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = HFC_LOOKUPTAG;
+ key->fileID = listp->hfl_hotfile[i].hf_fileid;
+ key->forkType = 0;
+ (void) BTInvalidateHint(iterator);
+ if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) != 0) {
+ continue; /* not in hot area, so skip */
+ }
+
+ /*
+ * Update thread entry with latest temperature.
+ */
+ error = BTUpdateRecord(filefork, iterator,
+ (IterateCallBackProcPtr)update_callback,
+ &listp->hfl_hotfile[i].hf_temperature);
+ if (error) {
+ printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
+ error = MacToVFSError(error);
+ // break;
+ }
+ /*
+ * Re-key entry with latest temperature.
+ */
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = data;
+ key->fileID = listp->hfl_hotfile[i].hf_fileid;
+ key->forkType = 0;
+ /* Pick up record data. */
+ (void) BTInvalidateHint(iterator);
+ (void) BTSearchRecord(filefork, iterator, &record, NULL, iterator);
+ error = BTDeleteRecord(filefork, iterator);
+ if (error) {
+ printf("hfs: hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID);
+ error = MacToVFSError(error);
+ break;
+ }
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = listp->hfl_hotfile[i].hf_temperature;
+ key->fileID = listp->hfl_hotfile[i].hf_fileid;
+ key->forkType = 0;
+ error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+ if (error) {
+ printf("hfs: hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID);
+ error = MacToVFSError(error);
+ break;
+ }
+ /*
+ * Invalidate this entry in the list.
+ */
+ listp->hfl_hotfile[i].hf_temperature = 0;
+ listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+
+ } /* end for */
+
+ (void) BTFlushPath(filefork);
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+out1:
+ hfs_end_transaction(hfsmp);
+out:
+ if (iterator)
+ hfs_free(iterator, sizeof(*iterator));
+ return (error);
+}
+
+/*
+ * Move new hot files into hot area.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hotfiles_adopt(struct hfsmount *hfsmp)
+{
+ BTreeIterator * iterator = NULL;
+ struct vnode *vp;
+ filefork_t * filefork;
+ hotfilelist_t *listp;
+ FSBufferDescriptor record;
+ HotFileKey * key;
+ u_int32_t data;
+ enum hfc_stage stage;
+ int fileblocks;
+ int blksmoved;
+ int i;
+ int last;
+ int error = 0;
+ int startedtrans = 0;
+ //
+ // all files in a given adoption phase have a temperature
+ // that starts at a random value and then increases linearly.
+ // the idea is that during eviction, files that were adopted
+ // together will be evicted together
+ //
+ long starting_temp = random() % HF_TEMP_RANGE;
+ long temp_adjust = 0;
+
+ if ((listp = hfsmp->hfc_filelist) == NULL)
+ return (0);
+
+ if (hfsmp->hfc_stage != HFC_ADOPTION) {
+ return (EBUSY);
+ }
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ return (EPERM);
+ }
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+#if HFC_VERBOSE
+ printf("hfs:%s: hotfiles_adopt: (hfl_next: %d, hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+ hfsmp->vcbVN,
+ listp->hfl_next,
+ hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+ hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
+ stage = hfsmp->hfc_stage;
+ hfsmp->hfc_stage = HFC_BUSY;
+
+ blksmoved = 0;
+ last = listp->hfl_next + HFC_FILESPERSYNC;
+ if (last > listp->hfl_count)
+ last = listp->hfl_count;
+
+ key = (HotFileKey*) &iterator->key;
+ key->keyLength = HFC_KEYLENGTH;
+
+ record.bufferAddress = &data;
+ record.itemSize = sizeof(u_int32_t);
+ record.itemCount = 1;
+
+ filefork = VTOF(hfsmp->hfc_filevp);
+
+ for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
+ /*
+ * Skip entries that aren't going to work.
+ */
+ if (listp->hfl_hotfile[i].hf_temperature == 0) {
+ //printf("hfs: zero temp on file-id %d\n", listp->hfl_hotfile[i].hf_fileid);
+ listp->hfl_next++;
+ continue;
+ }
+ if (listp->hfl_hotfile[i].hf_fileid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
+ //printf("hfs: cannot adopt the hotfile b-tree itself! (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+ listp->hfl_next++;
+ continue;
+ }
+ if (listp->hfl_hotfile[i].hf_fileid < kHFSFirstUserCatalogNodeID) {
+ //printf("hfs: cannot adopt system files (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+ listp->hfl_next++;
+ continue;
+ }
+
+ /*
+ * Acquire a vnode for this file.
+ */
+ error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
+ if (error) {
+ //printf("failed to get fileid %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
+ if (error == ENOENT) {
+ error = 0;
+ listp->hfl_next++;
+ continue; /* stale entry, go to next */
+ }
+ break;
+ }
+
+ //printf("hfs: examining hotfile entry w/fileid %d, temp %d, blocks %d (HotFileCached: %s)\n",
+ // listp->hfl_hotfile[i].hf_fileid, listp->hfl_hotfile[i].hf_temperature,
+ // listp->hfl_hotfile[i].hf_blocks,
+ // (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) ? "YES" : "NO");
+
+ if (!vnode_isreg(vp)) {
+ /* Symlinks are ineligible for adoption into the hotfile zone. */
+ //printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ listp->hfl_hotfile[i].hf_temperature = 0;
+ listp->hfl_next++;
+ continue; /* stale entry, go to next */
+ }
+ if ( (VTOC(vp)->c_flag & (C_DELETED | C_NOEXISTS))
+ || (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && hotextents(hfsmp, &VTOF(vp)->ff_extents[0]))
+ || (VTOC(vp)->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask))) {
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ listp->hfl_hotfile[i].hf_temperature = 0;
+ listp->hfl_next++;
+ listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+ continue; /* stale entry, go to next */
+ }
+
+ fileblocks = VTOF(vp)->ff_blocks;
+
+ //
+ // for CF, if the file is empty (and not compressed) or it is too large,
+ // do not try to pin it. (note: if fileblocks == 0 but the file is marked
+ // as compressed, we may still be able to cache it).
+ //
+ if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+ ((fileblocks == 0 && !(VTOC(vp)->c_bsdflags & UF_COMPRESSED)) ||
+ (unsigned int)fileblocks > (HFC_MAXIMUM_FILESIZE / (uint64_t)HFSTOVCB(hfsmp)->blockSize))) {
+ // don't try to cache something too large or that's zero-bytes
+
+ vnode_clearfastdevicecandidate(vp); // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ listp->hfl_hotfile[i].hf_temperature = 0;
+ listp->hfl_next++;
+ listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+ continue; /* entry is too big, just carry on with the next guy */
+ }
+
+ //
+ // If a file is not an autocandidate (i.e. it's a user-tagged file desirous of
+ // being hotfile cached) but it is already bigger than 4 megs, don't bother
+ // hotfile caching it. Note that if a user tagged file starts small, gets
+ // adopted and then grows over time we will allow it to grow bigger than 4 megs
+ // which is intentional for things like the Mail or Photos database files which
+ // grow slowly over time and benefit from being on the FastDevice.
+ //
+ if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+ !(VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) &&
+ (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) &&
+ (unsigned int)fileblocks > ((4*1024*1024) / (uint64_t)HFSTOVCB(hfsmp)->blockSize)) {
+
+ vnode_clearfastdevicecandidate(vp); // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ listp->hfl_hotfile[i].hf_temperature = 0;
+ listp->hfl_next++;
+ listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+ continue; /* entry is too big, just carry on with the next guy */
+ }
+
+ if (fileblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
+ //
+ // No room for this file. Although eviction should have made space
+ // it's best that we check here as well since writes to existing
+ // hotfiles may have eaten up space since we performed eviction
+ //
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ listp->hfl_next++;
+ listp->hfl_totalblocks -= fileblocks;
+ continue; /* entry too big, go to next */
+ }
+
+ if ((blksmoved > 0) &&
+ (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
+ //
+ // we've done enough work, let's be nice to the system and
+ // stop until the next iteration
+ //
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ break; /* adopt this entry the next time around */
+ }
+
+ //
+ // The size of data for a hot file record is 4 bytes. The data
+ // stored in hot file record is not really meaningful. However
+ // to aid debugging, we store first four bytes of the file name
+ // or the ASCII text "????"
+ //
+ if (VTOC(vp)->c_desc.cd_nameptr && (VTOC(vp)->c_desc.cd_namelen > 0)) {
+ size_t max_len;
+
+ max_len = sizeof(u_int32_t);
+ if (max_len > (unsigned)VTOC(vp)->c_desc.cd_namelen)
+ max_len = VTOC(vp)->c_desc.cd_namelen;
+
+ memcpy(&data, VTOC(vp)->c_desc.cd_nameptr, max_len);
+ } else
+ data = 0x3f3f3f3f;
+
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ //
+ // For CF we pin the blocks belonging to the file
+ // to the "fast" (aka ssd) media
+ //
+ uint32_t pinned_blocks;
+
+ if (vnode_isautocandidate(vp)) {
+ VTOC(vp)->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+ }
+ if (VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+ //
+ // this moves auto-cached files to the higher tier
+ // of "temperatures" which means they are less likely
+ // to get evicted (user selected hotfiles will get
+ // evicted first in the theory that they change more
+ // frequently compared to system files)
+ //
+ temp_adjust = MAX_NORMAL_TEMP;
+ } else {
+ temp_adjust = 0;
+ }
+
+ hfs_unlock(VTOC(vp)); // don't need an exclusive lock for this
+ hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+ error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pinned_blocks);
+
+ fileblocks = pinned_blocks;
+
+ // go back to an exclusive lock since we're going to modify the cnode again
+ hfs_unlock(VTOC(vp));
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ } else {
+ //
+ // Old style hotfiles moves the data to the center (aka "hot")
+ // region of the disk
+ //
+ error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
+ }
+
+ if (!error) {
+ VTOC(vp)->c_attr.ca_recflags |= kHFSFastDevPinnedMask;
+ VTOC(vp)->c_flag |= C_MODIFIED;
+ } else if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && error == EALREADY) {
+ //
+ // If hfs_pin_vnode() returned EALREADY then this file is not
+ // ever able to be hotfile cached the normal way. This can
+ // happen with compressed files which have their data stored
+ // in an extended attribute. We flag them so that we won't
+ // bother to try and hotfile cache them again the next time
+ // they're read.
+ //
+ VTOC(vp)->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+ VTOC(vp)->c_flag |= C_MODIFIED;
+ }
+
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ if (error) {
+#if HFC_VERBOSE
+ if (error != EALREADY) {
+ printf("hfs: hotfiles_adopt: could not relocate file %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
+ }
+#endif
+
+ if (last < listp->hfl_count) {
+ last++;
+ }
+ /* Move on to next item. */
+ listp->hfl_next++;
+ continue;
+ }
+ /* Keep hot file free space current. */
+ hfsmp->hfs_hotfile_freeblks -= fileblocks;
+ listp->hfl_totalblocks -= fileblocks;
+
+ /* Insert hot file entry */
+ key->keyLength = HFC_KEYLENGTH;
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ //
+ // The "temperature" for a CF hotfile is simply a random
+ // number that we sequentially increment for each file in
+ // the set of files we're currently adopting. This has the
+ // nice property that all of the files we pin to the ssd
+ // in the current phase will sort together in the hotfile
+ // btree. When eviction time comes we will evict them
+ // together as well. This gives the eviction phase temporal
+ // locality - things written together get evicted together
+ // which is what ssd's like.
+ //
+ listp->hfl_hotfile[i].hf_temperature = (uint32_t)temp_adjust + starting_temp++;
+ }
+
+ key->temperature = listp->hfl_hotfile[i].hf_temperature;
+ key->fileID = listp->hfl_hotfile[i].hf_fileid;
+ key->forkType = 0;
+
+ /* Start a new transaction before calling BTree code. */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ break;
+ }
+ startedtrans = 1;
+
+ error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+ if (error) {
+ int orig_error = error;
+ error = MacToVFSError(error);
+ printf("hfs: hotfiles_adopt:1: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
+ stage = HFC_IDLE;
+ break;
+ }
+
+ /* Insert thread record */
+ key->keyLength = HFC_KEYLENGTH;
+ key->temperature = HFC_LOOKUPTAG;
+ key->fileID = listp->hfl_hotfile[i].hf_fileid;
+ key->forkType = 0;
+ data = listp->hfl_hotfile[i].hf_temperature;
+ error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+ if (error) {
+ int orig_error = error;
+ error = MacToVFSError(error);
+ printf("hfs: hotfiles_adopt:2: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
+ stage = HFC_IDLE;
+ break;
+ } else {
+ (void) BTFlushPath(filefork);
+ blksmoved += fileblocks;
+ }
+
+ listp->hfl_next++;
+ if (listp->hfl_next >= listp->hfl_count) {
+ break;
+ }
+
+ /* Transaction complete. */
+ if (startedtrans) {
+ hfs_end_transaction(hfsmp);
+ startedtrans = 0;
+ }
+
+ if (hfs_hotfile_cur_freeblks(hfsmp) <= 0) {
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+ break;
+ }
+ } /* end for */
+
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d files left)\n", listp->hfl_next, blksmoved, listp->hfl_count - i);
+#endif
+ if (!startedtrans) {
+ // start a txn so we'll save the btree summary info
+ if (hfs_start_transaction(hfsmp) == 0) {
+ startedtrans = 1;
+ }
+ }
+
+ /* Finish any outstanding transactions. */
+ if (startedtrans) {
+ save_btree_user_info(hfsmp);
+
+ (void) BTFlushPath(filefork);
+ hfs_end_transaction(hfsmp);
+ startedtrans = 0;
+ }
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+ if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) {
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_adopt: all done relocating %d files\n", listp->hfl_count);
+ printf("hfs: hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+ stage = HFC_IDLE;
+ }
+ hfs_free(iterator, sizeof(*iterator));
+
+ if (stage != HFC_ADOPTION && hfsmp->hfc_filevp) {
+ (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+ hfsmp->hfc_filevp = NULL;
+ }
+ hfsmp->hfc_stage = stage;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+ return (error);
+}
+
+/*
+ * Reclaim space by evicting the coldest files.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
+{
+ BTreeIterator * iterator = NULL;
+ struct vnode *vp;
+ HotFileKey * key;
+ filefork_t * filefork;
+ hotfilelist_t *listp;
+ enum hfc_stage stage;
+ u_int32_t savedtemp;
+ int blksmoved;
+ int filesmoved;
+ int fileblocks;
+ int error = 0;
+ int startedtrans = 0;
+ int bt_op;
+
+ if (hfsmp->hfc_stage != HFC_EVICTION) {
+ return (EBUSY);
+ }
+
+ if ((listp = hfsmp->hfc_filelist) == NULL)
+ return (0);
+
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ return (EPERM);
+ }
+
+#if HFC_VERBOSE
+ printf("hfs:%s: hotfiles_evict (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+ hfsmp->vcbVN,
+ hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+ hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
+ iterator = hfs_mallocz(sizeof(*iterator));
+
+ stage = hfsmp->hfc_stage;
+ hfsmp->hfc_stage = HFC_BUSY;
+
+ filesmoved = blksmoved = 0;
+ bt_op = kBTreeFirstRecord;
+
+ key = (HotFileKey*) &iterator->key;
+
+ filefork = VTOF(hfsmp->hfc_filevp);
+
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: reclaim blks %d\n", listp->hfl_reclaimblks);
+#endif
+
+ while (listp->hfl_reclaimblks > 0 &&
+ blksmoved < HFC_BLKSPERSYNC &&
+ filesmoved < HFC_FILESPERSYNC) {
+
+ /*
+ * Obtain the first record (ie the coldest one).
+ */
+ if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: no more records\n");
+#endif
+ error = 0;
+ stage = HFC_ADOPTION;
+ break;
+ }
+ if (key->keyLength != HFC_KEYLENGTH) {
+ printf("hfs: hotfiles_evict: invalid key length %d\n", key->keyLength);
+ error = EFTYPE;
+ break;
+ }
+ if (key->temperature == HFC_LOOKUPTAG) {
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: ran into thread records\n");
+#endif
+ error = 0;
+ stage = HFC_ADOPTION;
+ break;
+ }
+
+ // Jump straight to delete for some files...
+ if (key->fileID == VTOC(hfsmp->hfc_filevp)->c_fileid
+ || key->fileID == hfsmp->hfs_jnlfileid
+ || key->fileID == hfsmp->hfs_jnlinfoblkid
+ || key->fileID < kHFSFirstUserCatalogNodeID) {
+ goto delete;
+ }
+
+ /*
+ * Aquire the vnode for this file.
+ */
+ error = hfs_vget(hfsmp, key->fileID, &vp, 0, 0);
+ if (error) {
+ if (error == ENOENT) {
+ goto delete; /* stale entry, go to next */
+ } else {
+ printf("hfs: hotfiles_evict: err %d getting file %d\n",
+ error, key->fileID);
+ }
+ break;
+ }
+
+ /*
+ * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck
+ * here. We do not want to move them.
+ */
+ if (!vnode_isreg(vp)) {
+ //printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ goto delete; /* invalid entry, go to next */
+ }
+
+ fileblocks = VTOF(vp)->ff_blocks;
+ if ((blksmoved > 0) &&
+ (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ break;
+ }
+ /*
+ * Make sure file is in the hot area.
+ */
+ if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0]) && !(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID);
+#endif
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ goto delete; /* stale entry, go to next */
+ }
+
+ /*
+ * Relocate file out of hot area. On cooperative fusion (CF) that just
+ * means un-pinning the data from the ssd. For traditional hotfiles that means moving
+ * the file data out of the hot region of the disk.
+ */
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ uint32_t pinned_blocks;
+
+ hfs_unlock(VTOC(vp)); // don't need an exclusive lock for this
+ hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+ error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &pinned_blocks);
+ fileblocks = pinned_blocks;
+
+ if (!error) {
+ // go back to an exclusive lock since we're going to modify the cnode again
+ hfs_unlock(VTOC(vp));
+ hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+ }
+ } else {
+ error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
+ }
+ if (error) {
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
+#endif
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+ bt_op = kBTreeNextRecord;
+ goto next; /* go to next */
+ } else {
+ VTOC(vp)->c_attr.ca_recflags &= ~kHFSFastDevPinnedMask;
+ VTOC(vp)->c_flag |= C_MODIFIED;
+ }
+
+ //
+ // We do not believe that this call to hfs_fsync() is
+ // necessary and it causes a journal transaction
+ // deadlock so we are removing it.
+ //
+ // (void) hfs_fsync(vp, MNT_WAIT, 0, p);
+
+ hfs_unlock(VTOC(vp));
+ vnode_put(vp);
+
+ hfsmp->hfs_hotfile_freeblks += fileblocks;
+ listp->hfl_reclaimblks -= fileblocks;
+ if (listp->hfl_reclaimblks < 0)
+ listp->hfl_reclaimblks = 0;
+ blksmoved += fileblocks;
+ filesmoved++;
+delete:
+ /* Start a new transaction before calling BTree code. */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ break;
+ }
+ startedtrans = 1;
+
+ error = BTDeleteRecord(filefork, iterator);
+ if (error) {
+ error = MacToVFSError(error);
+ break;
+ }
+ savedtemp = key->temperature;
+ key->temperature = HFC_LOOKUPTAG;
+ error = BTDeleteRecord(filefork, iterator);
+ if (error) {
+ error = MacToVFSError(error);
+ break;
+ }
+ key->temperature = savedtemp;
+next:
+ (void) BTFlushPath(filefork);
+
+ /* Transaction complete. */
+ if (startedtrans) {
+ hfs_end_transaction(hfsmp);
+ startedtrans = 0;
+ }
+
+ } /* end while */
+
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks);
+#endif
+ /* Finish any outstanding transactions. */
+ if (startedtrans) {
+ save_btree_user_info(hfsmp);
+
+ (void) BTFlushPath(filefork);
+ hfs_end_transaction(hfsmp);
+ startedtrans = 0;
+ }
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+ /*
+ * Move to next stage when finished.
+ */
+ if (listp->hfl_reclaimblks <= 0) {
+ stage = HFC_ADOPTION;
+#if HFC_VERBOSE
+ printf("hfs: hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+ }
+ hfs_free(iterator, sizeof(*iterator));
+ hfsmp->hfc_stage = stage;
+ wakeup((caddr_t)&hfsmp->hfc_stage);
+ return (error);
+}
+
+/*
+ * Age the existing records in the hot files b-tree.
+ */
+static int
+hotfiles_age(struct hfsmount *hfsmp)
+{
+ BTreeInfoRec btinfo;
+ BTreeIterator * iterator = NULL;
+ BTreeIterator * prev_iterator;
+ FSBufferDescriptor record;
+ FSBufferDescriptor prev_record;
+ HotFileKey * key;
+ HotFileKey * prev_key;
+ filefork_t * filefork;
+ u_int32_t data;
+ u_int32_t prev_data;
+ u_int32_t newtemp;
+ int error;
+ int i;
+ int numrecs;
+ int aged = 0;
+ u_int16_t reclen;
+
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ //
+ // hotfiles don't age on CF
+ //
+ return 0;
+ }
+
+ iterator = hfs_mallocz(2 * sizeof(*iterator));
+
+ key = (HotFileKey*) &iterator->key;
+
+ prev_iterator = &iterator[1];
+ prev_key = (HotFileKey*) &prev_iterator->key;
+
+ record.bufferAddress = &data;
+ record.itemSize = sizeof(data);
+ record.itemCount = 1;
+ prev_record.bufferAddress = &prev_data;
+ prev_record.itemSize = sizeof(prev_data);
+ prev_record.itemCount = 1;
+
+ /*
+ * Capture b-tree changes inside a transaction
+ */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ goto out2;
+ }
+ if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+ error = EPERM;
+ goto out1;
+ }
+ filefork = VTOF(hfsmp->hfc_filevp);
+
+ error = BTGetInformation(filefork, 0, &btinfo);
+ if (error) {
+ error = MacToVFSError(error);
+ goto out;
+ }
+ if (btinfo.numRecords < 2) {
+ error = 0;
+ goto out;
+ }
+
+ /* Only want 1st half of leaf records */
+ numrecs = (btinfo.numRecords /= 2) - 1;
+
+ error = BTIterateRecord(filefork, kBTreeFirstRecord, iterator, &record, &reclen);
+ if (error) {
+ printf("hfs_agehotfiles: BTIterateRecord: %d\n", error);
+ error = MacToVFSError(error);
+ goto out;
+ }
+ bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
+ prev_data = data;
+
+ for (i = 0; i < numrecs; ++i) {
+ error = BTIterateRecord(filefork, kBTreeNextRecord, iterator, &record, &reclen);
+ if (error == 0) {
+ if (key->temperature < prev_key->temperature) {
+ printf("hfs_agehotfiles: out of order keys!\n");
+ error = EFTYPE;
+ break;
+ }
+ if (reclen != sizeof(data)) {
+ printf("hfs_agehotfiles: invalid record length %d\n", reclen);
+ error = EFTYPE;
+ break;
+ }
+ if (key->keyLength != HFC_KEYLENGTH) {
+ printf("hfs_agehotfiles: invalid key length %d\n", key->keyLength);
+ error = EFTYPE;
+ break;
+ }
+ } else if ((error == fsBTEndOfIterationErr || error == fsBTRecordNotFoundErr) &&
+ (i == (numrecs - 1))) {
+ error = 0;
+ } else if (error) {
+ printf("hfs_agehotfiles: %d of %d BTIterateRecord: %d\n", i, numrecs, error);
+ error = MacToVFSError(error);
+ break;
+ }
+ if (prev_key->temperature == HFC_LOOKUPTAG) {
+#if HFC_VERBOSE
+ printf("hfs_agehotfiles: ran into thread record\n");
+#endif
+ error = 0;
+ break;
+ }
+ error = BTDeleteRecord(filefork, prev_iterator);
+ if (error) {
+ printf("hfs_agehotfiles: BTDeleteRecord failed %d (file %d)\n", error, prev_key->fileID);
+ error = MacToVFSError(error);
+ break;
+ }
+
+ /* Age by halving the temperature (floor = 4) */
+ newtemp = MAX(prev_key->temperature >> 1, 4);
+ prev_key->temperature = newtemp;
+
+ error = BTInsertRecord(filefork, prev_iterator, &prev_record, prev_record.itemSize);
+ if (error) {
+ printf("hfs_agehotfiles: BTInsertRecord failed %d (file %d)\n", error, prev_key->fileID);
+ error = MacToVFSError(error);
+ break;
+ }
+ ++aged;
+ /*
+ * Update thread entry with latest temperature.
+ */
+ prev_key->temperature = HFC_LOOKUPTAG;
+ error = BTUpdateRecord(filefork, prev_iterator,
+ (IterateCallBackProcPtr)update_callback,
+ &newtemp);
+ if (error) {
+ printf("hfs_agehotfiles: %d of %d BTUpdateRecord failed %d (file %d, %d)\n",
+ i, numrecs, error, prev_key->fileID, newtemp);
+ error = MacToVFSError(error);
+ // break;
+ }
+
+ bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
+ prev_data = data;
+
+ } /* end for */
+
+#if HFC_VERBOSE
+ if (error == 0)
+ printf("hfs_agehotfiles: aged %d records out of %d\n", aged, btinfo.numRecords);
+#endif
+ (void) BTFlushPath(filefork);
+out:
+ hfs_unlock(VTOC(hfsmp->hfc_filevp));
+out1:
+ hfs_end_transaction(hfsmp);
+out2:
+ if (iterator)
+ hfs_free(iterator, 2 * sizeof(*iterator));
+ return (error);
+}
+
+/*
+ * Return true if any blocks (or all blocks if all is true)
+ * are contained in the hot file region.
+ */
+static int
+hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents)
+{
+ u_int32_t b1, b2;
+ int i;
+ int inside = 0;
+
+ for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+ b1 = extents[i].startBlock;
+ if (b1 == 0)
+ break;
+ b2 = b1 + extents[i].blockCount - 1;
+ if ((b1 >= hfsmp->hfs_hotfile_start &&
+ b2 <= hfsmp->hfs_hotfile_end) ||
+ (b1 < hfsmp->hfs_hotfile_end &&
+ b2 > hfsmp->hfs_hotfile_end)) {
+ inside = 1;
+ break;
+ }
+ }
+ return (inside);
+}
+
+
+/*
+ *========================================================================
+ * HOT FILE B-TREE ROUTINES
+ *========================================================================
+ */
+
+/*
+ * Open the hot files b-tree for writing.
+ *
+ * On successful exit the vnode has a reference but not an iocount.
+ */
+static int
+hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
+{
+ return hfc_btree_open_ext(hfsmp, vpp, 0);
+}
+
+static int
+hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs)
+{
+ proc_t p;
+ struct vnode *vp;
+ struct cat_desc cdesc;
+ struct cat_attr cattr;
+ struct cat_fork cfork;
+ static char filename[] = HFC_FILENAME;
+ int error;
+ int retry = 0;
+ int lockflags;
+ int newvnode_flags = 0;
+
+ *vpp = NULL;
+ p = current_proc();
+
+ bzero(&cdesc, sizeof(cdesc));
+ cdesc.cd_parentcnid = kRootDirID;
+ cdesc.cd_nameptr = (const u_int8_t *)filename;
+ cdesc.cd_namelen = strlen(filename);
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+ error = cat_lookup(hfsmp, &cdesc, 0, 0, &cdesc, &cattr, &cfork, NULL);
+
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ if (error) {
+ printf("hfs: hfc_btree_open: cat_lookup error %d\n", error);
+ return (error);
+ }
+again:
+ cdesc.cd_flags |= CD_ISMETA;
+ error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr,
+ &cfork, &vp, &newvnode_flags);
+ if (error) {
+ printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error);
+ cat_releasedesc(&cdesc);
+ return (error);
+ }
+ if (!vnode_issystem(vp)) {
+#if HFC_VERBOSE
+ printf("hfs: hfc_btree_open: file has UBC, try again\n");
+#endif
+ hfs_unlock(VTOC(vp));
+ vnode_recycle(vp);
+ vnode_put(vp);
+ if (retry++ == 0)
+ goto again;
+ else
+ return (EBUSY);
+ }
+
+ /* Open the B-tree file for writing... */
+ error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);
+ if (error) {
+ if (!ignore_btree_errs) {
+ printf("hfs: hfc_btree_open: BTOpenPath error %d; filesize %lld\n", error, VTOF(vp)->ff_size);
+ error = MacToVFSError(error);
+ } else {
+ error = 0;
+ }
+ }
+
+ hfs_unlock(VTOC(vp));
+ if (error == 0) {
+ *vpp = vp;
+ vnode_ref(vp); /* keep a reference while its open */
+ }
+ vnode_put(vp);
+
+ if (!vnode_issystem(vp))
+ panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp);
+
+ HotFilesInfo hotfileinfo;
+
+ if (error == 0 && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+ if ((BTGetUserData(VTOF(vp), &hotfileinfo, sizeof(hotfileinfo)) == 0) && (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC)) {
+ if (hfsmp->hfs_hotfile_freeblks == 0) {
+ hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+ }
+
+ hfs_hotfile_cur_freeblks(hfsmp); // factors in any adjustments that happened at run-time
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Close the hot files b-tree.
+ *
+ * On entry the vnode has a reference.
+ */
+static int
+hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
+{
+ proc_t p = current_proc();
+ int error = 0;
+
+
+ if (hfsmp->jnl) {
+ hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
+ }
+
+ if (vnode_get(vp) == 0) {
+ error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+ if (error == 0) {
+ (void) hfs_fsync(vp, MNT_WAIT, 0, p);
+ error = BTClosePath(VTOF(vp));
+ hfs_unlock(VTOC(vp));
+ }
+ vnode_rele(vp);
+ vnode_recycle(vp);
+ vnode_put(vp);
+ }
+
+ return (error);
+}
+
+//
+// Assumes that hfsmp->hfc_filevp points to the hotfile btree vnode
+// (i.e. you called hfc_btree_open() ahead of time)
+//
+static int
+hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key)
+{
+ int error;
+ filefork_t *filefork=VTOF(hfsmp->hfc_filevp);
+
+ /* Start a new transaction before calling BTree code. */
+ if (hfs_start_transaction(hfsmp) != 0) {
+ return EINVAL;
+ }
+
+ error = BTDeleteRecord(filefork, iterator);
+ if (error) {
+ error = MacToVFSError(error);
+ printf("hfs: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+ goto out;
+ }
+
+ int savedtemp;
+ savedtemp = key->temperature;
+ key->temperature = HFC_LOOKUPTAG;
+ error = BTDeleteRecord(filefork, iterator);
+ if (error) {
+ error = MacToVFSError(error);
+ printf("hfs:2: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+ }
+ key->temperature = savedtemp;
+
+ (void) BTFlushPath(filefork);
+
+out:
+ /* Transaction complete. */
+ hfs_end_transaction(hfsmp);
+
+ return error;
+}
+
+//
+// You have to have already opened the hotfile btree so
+// that hfsmp->hfc_filevp is filled in.
+//
+static int
+hfc_btree_delete(struct hfsmount *hfsmp)
+{
+ struct vnode *dvp = NULL;
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode_attr va;
+ static char filename[] = HFC_FILENAME;
+ int error;
+
+ error = hfs_vfs_root(HFSTOVFS(hfsmp), &dvp, ctx);
+ if (error) {
+ return (error);
+ }
+
+ struct componentname cname = {
+ .cn_nameiop = DELETE,
+ .cn_flags = ISLASTCN,
+ .cn_pnbuf = filename,
+ .cn_pnlen = sizeof(filename),
+ .cn_nameptr = filename,
+ .cn_namelen = strlen(filename),
+ };
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, VREG);
+ VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
+ VATTR_SET(&va, va_uid, 0);
+ VATTR_SET(&va, va_gid, 0);
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ struct vnop_remove_args ap = {
+ .a_dvp = dvp,
+ .a_vp = hfsmp->hfc_filevp,
+ .a_cnp = &cname,
+ };
+
+ error = hfs_vnop_remove(&ap);
+ if (error) {
+ printf("hfs: error %d removing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+ }
+
+ hfs_end_transaction(hfsmp);
+
+out:
+ if (dvp) {
+ vnode_put(dvp);
+ dvp = NULL;
+ }
+
+ return 0;
+}
+
+
+
+
+/*
+ * Create a hot files btree file.
+ *
+ */
+static int
+hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int entries)
+{
+ struct vnode *dvp = NULL;
+ struct vnode *vp = NULL;
+ struct cnode *cp = NULL;
+ vfs_context_t ctx = vfs_context_current();
+ struct vnode_attr va;
+ static char filename[] = HFC_FILENAME;
+ int error;
+
+ if (hfsmp->hfc_filevp)
+ panic("hfs: hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+ error = hfs_vfs_root(HFSTOVFS(hfsmp), &dvp, ctx);
+ if (error) {
+ return (error);
+ }
+
+ struct componentname cname = {
+ .cn_nameiop = CREATE,
+ .cn_flags = ISLASTCN,
+ .cn_pnbuf = filename,
+ .cn_pnlen = sizeof(filename),
+ .cn_nameptr = filename,
+ .cn_namelen = strlen(filename)
+ };
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, VREG);
+ VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
+ VATTR_SET(&va, va_uid, 0);
+ VATTR_SET(&va, va_gid, 0);
+
+ if (hfs_start_transaction(hfsmp) != 0) {
+ vnode_put(dvp);
+ return EINVAL;
+ }
+
+ /* call ourselves directly, ignore the higher-level VFS file creation code */
+
+ struct vnop_create_args ap = {
+ .a_dvp = dvp,
+ .a_vpp = &vp,
+ .a_cnp = &cname,
+ .a_vap = &va
+ };
+
+ error = hfs_vnop_create(&ap);
+ if (error) {
+ printf("hfs: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+ goto out;
+ }
+ if (dvp) {
+ vnode_put(dvp);
+ dvp = NULL;
+ }
+ if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+ goto out;
+ }
+ cp = VTOC(vp);
+
+ /* Don't use non-regular files or files with links. */
+ if (!vnode_isreg(vp) || cp->c_linkcount != 1) {
+ error = EFTYPE;
+ goto out;
+ }
+
+ printf("hfs: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN);
+
+ if (VTOF(vp)->ff_size < nodesize) {
+ caddr_t buffer;
+ u_int16_t *index;
+ u_int16_t offset;
+ BTNodeDescriptor *ndp;
+ BTHeaderRec *bthp;
+ HotFilesInfo *hotfileinfo;
+ int nodecnt;
+ int filesize;
+ int entirespernode;
+
+ /*
+ * Mark it invisible (truncate will pull these changes).
+ */
+ ((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
+ SWAP_BE16 (kIsInvisible + kNameLocked);
+
+ buffer = hfs_mallocz(nodesize);
+ index = (u_int16_t *)buffer;
+
+ entirespernode = (nodesize - sizeof(BTNodeDescriptor) - 2) /
+ (sizeof(HotFileKey) + 6);
+ nodecnt = 2 + howmany(entries * 2, entirespernode);
+ nodecnt = roundup(nodecnt, 8);
+ filesize = nodecnt * nodesize;
+
+ /* FILL IN THE NODE DESCRIPTOR: */
+ ndp = (BTNodeDescriptor *)buffer;
+ ndp->kind = kBTHeaderNode;
+ ndp->numRecords = SWAP_BE16 (3);
+ offset = sizeof(BTNodeDescriptor);
+ index[(nodesize / 2) - 1] = SWAP_BE16 (offset);
+
+ /* FILL IN THE HEADER RECORD: */
+ bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
+ bthp->nodeSize = SWAP_BE16 (nodesize);
+ bthp->totalNodes = SWAP_BE32 (filesize / nodesize);
+ bthp->freeNodes = SWAP_BE32 (nodecnt - 1);
+ bthp->clumpSize = SWAP_BE32 (filesize);
+ bthp->btreeType = kUserBTreeType; /* non-metadata */
+ bthp->attributes |= SWAP_BE32 (kBTBigKeysMask);
+ bthp->maxKeyLength = SWAP_BE16 (HFC_KEYLENGTH);
+ offset += sizeof(BTHeaderRec);
+ index[(nodesize / 2) - 2] = SWAP_BE16 (offset);
+
+ /* FILL IN THE USER RECORD: */
+ hotfileinfo = (HotFilesInfo *)((u_int8_t *)buffer + offset);
+ hotfileinfo->magic = SWAP_BE32 (HFC_MAGIC);
+ hotfileinfo->version = SWAP_BE32 (HFC_VERSION);
+ hotfileinfo->duration = SWAP_BE32 (HFC_DEFAULT_DURATION);
+ hotfileinfo->timebase = 0;
+ hotfileinfo->timeleft = 0;
+ hotfileinfo->threshold = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
+ hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ if (hfsmp->hfs_hotfile_freeblks == 0) {
+ hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+ }
+ hotfileinfo->usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks);
+ } else {
+ hotfileinfo->maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+ }
+ strlcpy((char *)hotfileinfo->tag, hfc_tag,
+ sizeof hotfileinfo->tag);
+ offset += kBTreeHeaderUserBytes;
+ index[(nodesize / 2) - 3] = SWAP_BE16 (offset);
+
+ /* FILL IN THE MAP RECORD (only one node in use). */
+ *((u_int8_t *)buffer + offset) = 0x80;
+ offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
+ - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
+ index[(nodesize / 2) - 4] = SWAP_BE16 (offset);
+
+ vnode_setnoflush(vp);
+ error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, ctx);
+ if (error) {
+ printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+ goto out;
+ }
+ cp->c_flag |= C_ZFWANTSYNC;
+ cp->c_zftimeout = 1;
+
+ if (error == 0) {
+ struct vnop_write_args args;
+ uio_t auio;
+
+ auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
+ uio_addiov(auio, (uintptr_t)buffer, nodesize);
+
+ args.a_desc = &vnop_write_desc;
+ args.a_vp = vp;
+ args.a_uio = auio;
+ args.a_ioflag = 0;
+ args.a_context = ctx;
+
+ hfs_unlock(cp);
+ cp = NULL;
+
+ error = hfs_vnop_write(&args);
+ if (error)
+ printf("hfs: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+
+ uio_free(auio);
+ }
+ hfs_free(buffer, nodesize);
+ }
+out:
+ hfs_end_transaction(hfsmp);
+ if (dvp) {
+ vnode_put(dvp);
+ }
+ if (vp) {
+ if (cp)
+ hfs_unlock(cp);
+ vnode_recycle(vp);
+ vnode_put(vp);
+ }
+ return (error);
+}
+
+/*
+ * Compare two hot file b-tree keys.
+ *
+ * Result: +n search key > trial key
+ * 0 search key = trial key
+ * -n search key < trial key
+ */
+static int
+hfc_comparekeys(HotFileKey *searchKey, HotFileKey *trialKey)
+{
+ /*
+ * Compared temperatures first.
+ */
+ if (searchKey->temperature == trialKey->temperature) {
+ /*
+ * Temperatures are equal so compare file ids.
+ */
+ if (searchKey->fileID == trialKey->fileID) {
+ /*
+ * File ids are equal so compare fork types.
+ */
+ if (searchKey->forkType == trialKey->forkType) {
+ return (0);
+ } else if (searchKey->forkType > trialKey->forkType) {
+ return (1);
+ }
+ } else if (searchKey->fileID > trialKey->fileID) {
+ return (1);
+ }
+ } else if (searchKey->temperature > trialKey->temperature) {
+ return (1);
+ }
+
+ return (-1);
+}
+
+
+/*
+ *========================================================================
+ * HOT FILE DATA COLLECTING ROUTINES
+ *========================================================================
+ */
+
+/*
+ * Lookup a hot file entry in the tree.
+ */
+#if HFC_DEBUG
+static hotfile_entry_t *
+hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
+{
+ hotfile_entry_t *entry = hotdata->rootentry;
+
+ while (entry &&
+ entry->temperature != temperature &&
+ entry->fileid != fileid) {
+
+ if (temperature > entry->temperature)
+ entry = entry->right;
+ else if (temperature < entry->temperature)
+ entry = entry->left;
+ else if (fileid > entry->fileid)
+ entry = entry->right;
+ else
+ entry = entry->left;
+ }
+ return (entry);
+}
+#endif
+
+/*
+ * Insert a hot file entry into the tree.
+ */
+static int
+hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry)
+{
+ hotfile_entry_t *entry = hotdata->rootentry;
+ u_int32_t fileid = newentry->fileid;
+ u_int32_t temperature = newentry->temperature;
+
+ if (entry == NULL) {
+ hotdata->rootentry = newentry;
+ hotdata->coldest = newentry;
+ hotdata->activefiles++;
+ return 0;
+ }
+
+ while (entry) {
+ if (temperature > entry->temperature) {
+ if (entry->right) {
+ entry = entry->right;
+ } else {
+ entry->right = newentry;
+ break;
+ }
+ } else if (temperature < entry->temperature) {
+ if (entry->left) {
+ entry = entry->left;
+ } else {
+ entry->left = newentry;
+ break;
+ }
+ } else if (fileid > entry->fileid) {
+ if (entry->right) {
+ entry = entry->right;
+ } else {
+ if (entry->fileid != fileid)
+ entry->right = newentry;
+ break;
+ }
+ } else {
+ if (entry->left) {
+ entry = entry->left;
+ } else {
+ if (entry->fileid != fileid) {
+ entry->left = newentry;
+ } else {
+ return EEXIST;
+ }
+ break;
+ }
+ }
+ }
+
+ hotdata->activefiles++;
+ return 0;
+}
+
+/*
+ * Find the coldest entry in the tree.
+ */
+static hotfile_entry_t *
+hf_coldest(hotfile_data_t *hotdata)
+{
+ hotfile_entry_t *entry = hotdata->rootentry;
+
+ if (entry) {
+ while (entry->left)
+ entry = entry->left;
+ }
+ return (entry);
+}
+
+/*
+ * Find the hottest entry in the tree.
+ */
+static hotfile_entry_t *
+hf_hottest(hotfile_data_t *hotdata)
+{
+ hotfile_entry_t *entry = hotdata->rootentry;
+
+ if (entry) {
+ while (entry->right)
+ entry = entry->right;
+ }
+ return (entry);
+}
+
+/*
+ * Delete a hot file entry from the tree.
+ */
+static void
+hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
+{
+ hotfile_entry_t *entry, *parent, *next;
+
+ parent = NULL;
+ entry = hotdata->rootentry;
+
+ while (entry &&
+ entry->temperature != temperature &&
+ entry->fileid != fileid) {
+
+ parent = entry;
+ if (temperature > entry->temperature)
+ entry = entry->right;
+ else if (temperature < entry->temperature)
+ entry = entry->left;
+ else if (fileid > entry->fileid)
+ entry = entry->right;
+ else
+ entry = entry->left;
+ }
+
+ if (entry) {
+ /*
+ * Reorganize the sub-trees spanning from our entry.
+ */
+ if ((next = entry->right)) {
+ hotfile_entry_t *pnextl, *psub;
+ /*
+ * Tree pruning: take the left branch of the
+ * current entry and place it at the lowest
+ * left branch of the current right branch
+ */
+ psub = next;
+
+ /* Walk the Right/Left sub tree from current entry */
+ while ((pnextl = psub->left))
+ psub = pnextl;
+
+ /* Plug the old left tree to the new ->Right leftmost entry */
+ psub->left = entry->left;
+
+ } else /* only left sub-tree, simple case */ {
+ next = entry->left;
+ }
+ /*
+ * Now, plug the current entry sub tree to
+ * the good pointer of our parent entry.
+ */
+ if (parent == NULL)
+ hotdata->rootentry = next;
+ else if (parent->left == entry)
+ parent->left = next;
+ else
+ parent->right = next;
+
+ /* Place entry back on the free-list */
+ entry->left = 0;
+ entry->fileid = 0;
+ entry->temperature = 0;
+
+ entry->right = hotdata->freelist;
+ hotdata->freelist = entry;
+ hotdata->activefiles--;
+
+ if (hotdata->coldest == entry || hotdata->coldest == NULL) {
+ hotdata->coldest = hf_coldest(hotdata);
+ }
+
+ }
+}
+
+/*
+ * Get a free hot file entry.
+ */
+static hotfile_entry_t *
+hf_getnewentry(hotfile_data_t *hotdata)
+{
+ hotfile_entry_t * entry;
+
+ /*
+ * When the free list is empty then steal the coldest one
+ */
+ if (hotdata->freelist == NULL) {
+ entry = hf_coldest(hotdata);
+ hf_delete(hotdata, entry->fileid, entry->temperature);
+ }
+ entry = hotdata->freelist;
+ hotdata->freelist = entry->right;
+ entry->right = 0;
+
+ return (entry);
+}
+
+
+/*
+ * Generate a sorted list of hot files (hottest to coldest).
+ *
+ * As a side effect, every node in the hot file tree will be
+ * deleted (moved to the free list).
+ */
+static void
+hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist)
+{
+ int i = 0;
+ hotfile_entry_t *entry;
+
+ while ((entry = hf_hottest(hotdata)) != NULL) {
+ sortedlist->hfl_hotfile[i].hf_fileid = entry->fileid;
+ sortedlist->hfl_hotfile[i].hf_temperature = entry->temperature;
+ sortedlist->hfl_hotfile[i].hf_blocks = entry->blocks;
+ sortedlist->hfl_totalblocks += entry->blocks;
+ ++i;
+
+ hf_delete(hotdata, entry->fileid, entry->temperature);
+ }
+
+ sortedlist->hfl_count = i;
+
+#if HFC_VERBOSE
+ printf("hfs: hf_getsortedlist returning %d entries w/%d total blocks\n", i, sortedlist->hfl_totalblocks);
+#endif
+}
+
+
+#if HFC_DEBUG
+static void
+hf_maxdepth(hotfile_entry_t * root, int depth, int *maxdepth)
+{
+ if (root) {
+ depth++;
+ if (depth > *maxdepth)
+ *maxdepth = depth;
+ hf_maxdepth(root->left, depth, maxdepth);
+ hf_maxdepth(root->right, depth, maxdepth);
+ }
+}
+
+static void
+hf_printtree(hotfile_entry_t * root)
+{
+ if (root) {
+ hf_printtree(root->left);
+ printf("hfs: temperature: % 8d, fileid %d\n", root->temperature, root->fileid);
+ hf_printtree(root->right);
+ }
+}
+#endif