+
+/*
+ * Calculate the allocation zone for metadata.
+ *
+ * This zone includes the following:
+ * Allocation Bitmap file
+ * Overflow Extents file
+ * Journal file
+ * Quota files
+ * Clustered Hot files
+ * Catalog file
+ *
+ * METADATA ALLOCATION ZONE
+ * ____________________________________________________________________________
+ * | | | | | | |
+ * | BM | JF | OEF | CATALOG |---> | HOT FILES |
+ * |____|____|_____|_______________|______________________________|___________|
+ *
+ * <------------------------------- N * 128 MB ------------------------------->
+ *
+ */
+#define GIGABYTE (u_int64_t)(1024*1024*1024)
+
+#define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
+#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
+#define JOURNAL_DEFAULT_SIZE (8*1024*1024)
+#define JOURNAL_MAXIMUM_SIZE (512*1024*1024)
+#define HOTBAND_MINIMUM_SIZE (10*1024*1024)
+#define HOTBAND_MAXIMUM_SIZE (512*1024*1024)
+
+/* Initialize the metadata zone.
+ *
+ * If the size of the volume is less than the minimum size for
+ * metadata zone, metadata zone is disabled.
+ *
+ * If disable is true, disable metadata zone unconditionally.
+ */
+void
+hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
+{
+ ExtendedVCB *vcb;
+ u_int64_t fs_size;
+ u_int64_t zonesize;
+ u_int64_t temp;
+ u_int64_t filesize;
+ u_int32_t blk;
+ int items, really_do_it=1;
+
+ vcb = HFSTOVCB(hfsmp);
+ fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
+
+ /*
+ * For volumes less than 10 GB, don't bother.
+ */
+ if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
+ really_do_it = 0;
+ }
+
+ /*
+ * Skip non-journaled volumes as well.
+ */
+ if (hfsmp->jnl == NULL) {
+ really_do_it = 0;
+ }
+
+ /* If caller wants to disable metadata zone, do it */
+ if (disable == true) {
+ really_do_it = 0;
+ }
+
+ /*
+ * Start with space for the boot blocks and Volume Header.
+ * 1536 = byte offset from start of volume to end of volume header:
+ * 1024 bytes is the offset from the start of the volume to the
+ * start of the volume header (defined by the volume format)
+ * + 512 bytes (the size of the volume header).
+ */
+ zonesize = roundup(1536, hfsmp->blockSize);
+
+ /*
+ * Add the on-disk size of allocation bitmap.
+ */
+ zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+
+ /*
+ * Add space for the Journal Info Block and Journal (if they're in
+ * this file system).
+ */
+ if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
+ zonesize += hfsmp->blockSize + hfsmp->jnl_size;
+ }
+
+ /*
+ * Add the existing size of the Extents Overflow B-tree.
+ * (It rarely grows, so don't bother reserving additional room for it.)
+ */
+ zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
+
+ /*
+ * If there is an Attributes B-tree, leave room for 11 clumps worth.
+ * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
+ * When installing a full OS install onto a 20GB volume, we use
+ * 7 to 8 clumps worth of space (depending on packages), so that leaves
+ * us with another 3 or 4 clumps worth before we need another extent.
+ */
+ if (hfsmp->hfs_attribute_cp) {
+ zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
+ }
+
+ /*
+ * Leave room for 11 clumps of the Catalog B-tree.
+ * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
+ * When installing a full OS install onto a 20GB volume, we use
+ * 7 to 8 clumps worth of space (depending on packages), so that leaves
+ * us with another 3 or 4 clumps worth before we need another extent.
+ */
+ zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
+
+ /*
+ * Add space for hot file region.
+ *
+ * ...for now, use 5 MB per 1 GB (0.5 %)
+ */
+ filesize = (fs_size / 1024) * 5;
+ if (filesize > HOTBAND_MAXIMUM_SIZE)
+ filesize = HOTBAND_MAXIMUM_SIZE;
+ else if (filesize < HOTBAND_MINIMUM_SIZE)
+ filesize = HOTBAND_MINIMUM_SIZE;
+ /*
+ * Calculate user quota file requirements.
+ */
+ if (hfsmp->hfs_flags & HFS_QUOTAS) {
+ items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
+ if (items < QF_MIN_USERS)
+ items = QF_MIN_USERS;
+ else if (items > QF_MAX_USERS)
+ items = QF_MAX_USERS;
+ if (!powerof2(items)) {
+ int x = items;
+ items = 4;
+ while (x>>1 != 1) {
+ x = x >> 1;
+ items = items << 1;
+ }
+ }
+ filesize += (items + 1) * sizeof(struct dqblk);
+ /*
+ * Calculate group quota file requirements.
+ *
+ */
+ items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
+ if (items < QF_MIN_GROUPS)
+ items = QF_MIN_GROUPS;
+ else if (items > QF_MAX_GROUPS)
+ items = QF_MAX_GROUPS;
+ if (!powerof2(items)) {
+ int x = items;
+ items = 4;
+ while (x>>1 != 1) {
+ x = x >> 1;
+ items = items << 1;
+ }
+ }
+ filesize += (items + 1) * sizeof(struct dqblk);
+ }
+ zonesize += filesize;
+
+ /*
+ * Round up entire zone to a bitmap block's worth.
+ * The extra space goes to the catalog file and hot file area.
+ */
+ temp = zonesize;
+ zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+ hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
+ /*
+ * If doing the round up for hfs_min_alloc_start would push us past
+ * allocLimit, then just reset it back to 0. Though using a value
+ * bigger than allocLimit would not cause damage in the block allocator
+ * code, this value could get stored in the volume header and make it out
+ * to disk, making the volume header technically corrupt.
+ */
+ if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
+ hfsmp->hfs_min_alloc_start = 0;
+ }
+
+ if (really_do_it == 0) {
+ /* If metadata zone needs to be disabled because the
+ * volume was truncated, clear the bit and zero out
+ * the values that are no longer needed.
+ */
+ if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+ /* Disable metadata zone */
+ hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
+
+ /* Zero out mount point values that are not required */
+ hfsmp->hfs_catalog_maxblks = 0;
+ hfsmp->hfs_hotfile_maxblks = 0;
+ hfsmp->hfs_hotfile_start = 0;
+ hfsmp->hfs_hotfile_end = 0;
+ hfsmp->hfs_hotfile_freeblks = 0;
+ hfsmp->hfs_metazone_start = 0;
+ hfsmp->hfs_metazone_end = 0;
+ }
+
+ return;
+ }
+
+ temp = zonesize - temp; /* temp has extra space */
+ filesize += temp / 3;
+ hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
+ } else {
+ hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+ }
+
+ /* Convert to allocation blocks. */
+ blk = zonesize / vcb->blockSize;
+
+ /* The default metadata zone location is at the start of volume. */
+ hfsmp->hfs_metazone_start = 1;
+ hfsmp->hfs_metazone_end = blk - 1;
+
+ /* The default hotfile area is at the end of the zone. */
+ if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
+ hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
+ hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
+ hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
+ }
+ else {
+ hfsmp->hfs_hotfile_start = 0;
+ hfsmp->hfs_hotfile_end = 0;
+ hfsmp->hfs_hotfile_freeblks = 0;
+ }
+#if DEBUG
+ printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+ printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+ printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
+#endif
+
+ hfsmp->hfs_flags |= HFS_METADATA_ZONE;
+}
+
+
+static u_int32_t
+hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
+{
+ ExtendedVCB *vcb = HFSTOVCB(hfsmp);
+ int lockflags;
+ int freeblocks;
+
+ if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+ //
+ // This is only used at initialization time and on an ssd
+ // we'll get the real info from the hotfile btree user
+ // info
+ //
+ return 0;
+ }
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+ freeblocks = MetaZoneFreeBlocks(vcb);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ /* Minus Extents overflow file reserve. */
+ if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
+ freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
+ }
+
+ /* Minus catalog file reserve. */
+ if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
+ freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
+ }
+
+ if (freeblocks < 0)
+ freeblocks = 0;
+
+ // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
+ return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
+}
+
+/*
+ * Determine if a file is a "virtual" metadata file.
+ * This includes journal and quota files.
+ */
+int
+hfs_virtualmetafile(struct cnode *cp)
+{
+ const char * filename;
+
+
+ if (cp->c_parentcnid != kHFSRootFolderID)
+ return (0);
+
+ filename = (const char *)cp->c_desc.cd_nameptr;
+ if (filename == NULL)
+ return (0);
+
+ if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
+ (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
+ (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
+ (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
+ (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
+ return (1);
+
+ return (0);
+}
+
+__private_extern__
+void hfs_syncer_lock(struct hfsmount *hfsmp)
+{
+ hfs_lock_mount(hfsmp);
+}
+
+__private_extern__
+void hfs_syncer_unlock(struct hfsmount *hfsmp)
+{
+ hfs_unlock_mount(hfsmp);
+}
+
+__private_extern__
+void hfs_syncer_wait(struct hfsmount *hfsmp)
+{
+ msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
+ "hfs_syncer_wait", NULL);
+}
+
+__private_extern__
+void hfs_syncer_wakeup(struct hfsmount *hfsmp)
+{
+ wakeup(&hfsmp->hfs_sync_incomplete);
+}
+
+__private_extern__
+uint64_t hfs_usecs_to_deadline(uint64_t usecs)
+{
+ uint64_t deadline;
+ clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
+ return deadline;
+}
+
+__private_extern__
+void hfs_syncer_queue(thread_call_t syncer)
+{
+ if (thread_call_enter_delayed_with_leeway(syncer,
+ NULL,
+ hfs_usecs_to_deadline(HFS_META_DELAY),
+ 0,
+ THREAD_CALL_DELAY_SYS_BACKGROUND)) {
+ printf("hfs: syncer already scheduled!\n");
+ }
+}
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+ // If we don't have a syncer or we get called by the syncer, just return
+ if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
+ return;
+
+ hfs_syncer_lock(hfsmp);
+
+ if (!timerisset(&hfsmp->hfs_sync_req_oldest))
+ microuptime(&hfsmp->hfs_sync_req_oldest);
+
+ /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
+ don't want to queue again if there is a sync outstanding. */
+ if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
+ hfs_syncer_unlock(hfsmp);
+ return;
+ }
+
+ hfsmp->hfs_sync_incomplete = TRUE;
+
+ thread_call_t syncer = hfsmp->hfs_syncer;
+
+ hfs_syncer_unlock(hfsmp);
+
+ hfs_syncer_queue(syncer);
+}
+
+int
+hfs_start_transaction(struct hfsmount *hfsmp)
+{
+ int ret = 0, unlock_on_err = 0;
+ thread_t thread = current_thread();
+
+#ifdef HFS_CHECK_LOCK_ORDER
+ /*
+ * You cannot start a transaction while holding a system
+ * file lock. (unless the transaction is nested.)
+ */
+ if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
+ if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
+ }
+ if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
+ }
+ if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
+ }
+ }
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+again:
+
+ if (hfsmp->jnl) {
+ if (journal_owner(hfsmp->jnl) != thread) {
+ /*
+ * The global lock should be held shared if journal is
+ * active to prevent disabling. If we're not the owner
+ * of the journal lock, verify that we're not already
+ * holding the global lock exclusive before moving on.
+ */
+ if (hfsmp->hfs_global_lockowner == thread) {
+ ret = EBUSY;
+ goto out;
+ }
+
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+ // Things could have changed
+ if (!hfsmp->jnl) {
+ hfs_unlock_global(hfsmp);
+ goto again;
+ }
+
+ OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+ unlock_on_err = 1;
+ }
+ } else {
+ // No journal
+ if (hfsmp->hfs_global_lockowner != thread) {
+ hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
+
+ // Things could have changed
+ if (hfsmp->jnl) {
+ hfs_unlock_global(hfsmp);
+ goto again;
+ }
+
+ OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+ unlock_on_err = 1;
+ }
+ }
+
+ /* If a downgrade to read-only mount is in progress, no other
+ * thread than the downgrade thread is allowed to modify
+ * the file system.
+ */
+ if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
+ hfsmp->hfs_downgrading_thread != thread) {
+ ret = EROFS;
+ goto out;
+ }
+
+ if (hfsmp->jnl) {
+ ret = journal_start_transaction(hfsmp->jnl);
+ } else {
+ ret = 0;
+ }
+
+ if (ret == 0)
+ ++hfsmp->hfs_transaction_nesting;
+
+out:
+ if (ret != 0 && unlock_on_err) {
+ hfs_unlock_global (hfsmp);
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+ }
+
+ return ret;
+}
+
+int
+hfs_end_transaction(struct hfsmount *hfsmp)
+{
+ int ret;
+
+ assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
+ assert(hfsmp->hfs_transaction_nesting > 0);
+
+ if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
+ hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
+
+ bool need_unlock = !--hfsmp->hfs_transaction_nesting;
+
+ if (hfsmp->jnl) {
+ ret = journal_end_transaction(hfsmp->jnl);
+ } else {
+ ret = 0;
+ }
+
+ if (need_unlock) {
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+ hfs_unlock_global (hfsmp);
+ hfs_sync_ejectable(hfsmp);
+ }
+
+ return ret;
+}
+
+
+void
+hfs_journal_lock(struct hfsmount *hfsmp)
+{
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+ if (hfsmp->jnl) {
+ journal_lock(hfsmp->jnl);
+ }
+ hfs_unlock_global (hfsmp);
+}
+
+void
+hfs_journal_unlock(struct hfsmount *hfsmp)
+{
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+ if (hfsmp->jnl) {
+ journal_unlock(hfsmp->jnl);
+ }
+ hfs_unlock_global (hfsmp);
+}
+
+/*
+ * Flush the contents of the journal to the disk.
+ *
+ * - HFS_FLUSH_JOURNAL
+ * Wait to write in-memory journal to the disk consistently.
+ * This means that the journal still contains uncommitted
+ * transactions and the file system metadata blocks in
+ * the journal transactions might be written asynchronously
+ * to the disk. But there is no guarantee that they are
+ * written to the disk before returning to the caller.
+ * Note that this option is sufficient for file system
+ * data integrity as it guarantees consistent journal
+ * content on the disk.
+ *
+ * - HFS_FLUSH_JOURNAL_META
+ * Wait to write in-memory journal to the disk
+ * consistently, and also wait to write all asynchronous
+ * metadata blocks to its corresponding locations
+ * consistently on the disk. This is overkill in normal
+ * scenarios but is useful whenever the metadata blocks
+ * are required to be consistent on-disk instead of
+ * just the journalbeing consistent; like before live
+ * verification and live volume resizing. The update of the
+ * metadata doesn't include a barrier of track cache flush.
+ *
+ * - HFS_FLUSH_FULL
+ * HFS_FLUSH_JOURNAL + force a track cache flush to media
+ *
+ * - HFS_FLUSH_CACHE
+ * Force a track cache flush to media.
+ *
+ * - HFS_FLUSH_BARRIER
+ * Barrier-only flush to ensure write order
+ *
+ */
+errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
+{
+ errno_t error = 0;
+ journal_flush_options_t options = 0;
+ dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
+
+ switch (mode) {
+ case HFS_FLUSH_JOURNAL_META:
+ // wait for journal, metadata blocks and previous async flush to finish
+ SET(options, JOURNAL_WAIT_FOR_IO);
+
+ // no break
+
+ case HFS_FLUSH_JOURNAL:
+ case HFS_FLUSH_JOURNAL_BARRIER:
+ case HFS_FLUSH_FULL:
+
+ if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
+ !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+ mode = HFS_FLUSH_FULL;
+
+ if (mode == HFS_FLUSH_FULL)
+ SET(options, JOURNAL_FLUSH_FULL);
+
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+ if (hfsmp->jnl)
+ error = journal_flush(hfsmp->jnl, options);
+
+ hfs_unlock_global (hfsmp);
+
+ /*
+ * This may result in a double barrier as
+ * journal_flush may have issued a barrier itself
+ */
+ if (mode == HFS_FLUSH_JOURNAL_BARRIER)
+ error = VNOP_IOCTL(hfsmp->hfs_devvp,
+ DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+ FWRITE, vfs_context_kernel());
+
+ break;
+
+ case HFS_FLUSH_CACHE:
+ // Do a full sync
+ sync_req.options = 0;
+
+ // no break
+
+ case HFS_FLUSH_BARRIER:
+ // If barrier only flush doesn't support, fall back to use full flush.
+ if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+ sync_req.options = 0;
+
+ error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+ FWRITE, vfs_context_kernel());
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ return error;
+}
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>). If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair? We can't always be
+ * certain. If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs. Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired. A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+ int result;
+ struct filefork *catalog;
+ int lockflags;
+
+ if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+ {
+ /* This volume has already been checked and repaired. */
+ return 0;
+ }
+
+ if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+ {
+ /* This volume is too old to have had the problem. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ catalog = hfsmp->hfs_catalog_cp->c_datafork;
+ if (catalog->ff_size > catalog->ff_clumpsize)
+ {
+ /* The entire first clump must have been in use at some point. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ /*
+ * If we get here, we need to zero out those unused nodes.
+ *
+ * We start a transaction and lock the catalog since we're going to be
+ * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
+ * do its writing via the journal, because that would be too much I/O
+ * to fit in a transaction, and it's a pain to break it up into multiple
+ * transactions. (It behaves more like growing a B-tree would.)
+ */
+ printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+ result = hfs_start_transaction(hfsmp);
+ if (result)
+ goto done;
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+ result = BTZeroUnusedNodes(catalog);
+ vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ hfs_end_transaction(hfsmp);
+ if (result == 0)
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+ return result;
+}
+
+
+extern time_t snapshot_timestamp;
+
+int
+check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
+{
+ int snapshot_error = 0;
+
+ if (vp == NULL) {
+ return 0;
+ }
+
+ /* Swap files are special; skip them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+ // the change time is within this epoch
+ int error;
+
+ error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+ if (error == EDEADLK) {
+ snapshot_error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
+ } else if (error == EINTR) {
+ // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
+ snapshot_error = EINTR;
+ }
+ }
+ }
+
+ if (snapshot_error) return snapshot_error;
+
+ return 0;
+}
+
+int
+check_for_dataless_file(struct vnode *vp, uint64_t op_type)
+{
+ int error;
+
+ if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+ // there's nothing to do, it's not dataless
+ return 0;
+ }
+
+ /* Swap files are special; ignore them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
+ error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
+ if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
+ error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: dataless: timed out waiting for namespace handler...\n");
+ // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
+ return 0;
+ } else if (error == EINTR) {
+ // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
+ return EINTR;
+ }
+ } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
+ //
+ // if we're here, the dataless bit is still set on the file
+ // which means it didn't get handled. we return an error
+ // but it's presently ignored by all callers of this function.
+ //
+ // XXXdbg - EDATANOTPRESENT is what we really need...
+ //
+ return EBADF;
+ }
+
+ return error;
+}
+
+
+//
+// NOTE: this function takes care of starting a transaction and
+// acquiring the systemfile lock so that it can call
+// cat_update().
+//
+// NOTE: do NOT hold and cnode locks while calling this function
+// to avoid deadlocks (because we take a lock on the root
+// cnode)
+//
+int
+hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
+{
+ struct vnode *rvp;
+ struct cnode *cp;
+ int error;
+
+ error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
+ if (error) {
+ return error;
+ }
+
+ cp = VTOC(rvp);
+ if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+ return error;
+ }
+ struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
+
+ int lockflags;
+ if ((error = hfs_start_transaction(hfsmp)) != 0) {
+ return error;
+ }
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+ if (extinfo->document_id == 0) {
+ // initialize this to start at 3 (one greater than the root-dir id)
+ extinfo->document_id = 3;
+ }
+
+ *docid = extinfo->document_id++;
+
+ // mark the root cnode dirty
+ cp->c_flag |= C_MODIFIED;
+ hfs_update(cp->c_vp, 0);
+
+ hfs_systemfile_unlock (hfsmp, lockflags);
+ (void) hfs_end_transaction(hfsmp);
+
+ (void) hfs_unlock(cp);
+
+ vnode_put(rvp);
+ rvp = NULL;
+
+ return 0;
+}
+
+
+/*
+ * Return information about number of file system allocation blocks
+ * taken by metadata on a volume.
+ *
+ * This function populates struct hfsinfo_metadata with allocation blocks
+ * used by extents overflow btree, catalog btree, bitmap, attribute btree,
+ * journal file, and sum of all of the above.
+ */
+int
+hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
+{
+ int lockflags = 0;
+ int ret_lockflags = 0;
+
+ /* Zero out the output buffer */
+ bzero(hinfo, sizeof(struct hfsinfo_metadata));
+
+ /*
+ * Getting number of allocation blocks for all btrees
+ * should be a quick operation, so we grab locks for
+ * all of them at the same time
+ */
+ lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
+ ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ /*
+ * Make sure that we were able to acquire all locks requested
+ * to protect us against conditions like unmount in progress.
+ */
+ if ((lockflags & ret_lockflags) != lockflags) {
+ /* Release any locks that were acquired */
+ hfs_systemfile_unlock(hfsmp, ret_lockflags);
+ return EPERM;
+ }
+
+ /* Get information about all the btrees */
+ hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
+ hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
+ hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
+ hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
+
+ /* Done with btrees, give up the locks */
+ hfs_systemfile_unlock(hfsmp, ret_lockflags);
+
+ /* Get information about journal file */
+ hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
+
+ /* Calculate total number of metadata blocks */
+ hinfo->total = hinfo->extents + hinfo->catalog +
+ hinfo->allocation + hinfo->attribute +
+ hinfo->journal;
+
+ return 0;
+}
+
+static int
+hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
+{
+ vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
+
+ return 0;
+}
+
+__private_extern__
+int hfs_freeze(struct hfsmount *hfsmp)
+{
+ // First make sure some other process isn't freezing
+ hfs_lock_mount(hfsmp);
+ while (hfsmp->hfs_freeze_state != HFS_THAWED) {
+ if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+ PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
+ hfs_unlock_mount(hfsmp);
+ return EINTR;
+ }
+ }
+
+ // Stop new syncers from starting
+ hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
+
+ // Now wait for all syncers to finish
+ while (hfsmp->hfs_syncers) {
+ if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+ PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
+ hfs_thaw_locked(hfsmp);
+ hfs_unlock_mount(hfsmp);
+ return EINTR;
+ }
+ }
+ hfs_unlock_mount(hfsmp);
+
+ // flush things before we get started to try and prevent
+ // dirty data from being paged out while we're frozen.
+ // note: we can't do this once we're in the freezing state because
+ // other threads will need to take the global lock
+ vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
+
+ // Block everything in hfs_lock_global now
+ hfs_lock_mount(hfsmp);
+ hfsmp->hfs_freeze_state = HFS_FREEZING;
+ hfsmp->hfs_freezing_thread = current_thread();
+ hfs_unlock_mount(hfsmp);
+
+ /* Take the exclusive lock to flush out anything else that
+ might have the global lock at the moment and also so we
+ can flush the journal. */
+ hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
+ journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
+ hfs_unlock_global(hfsmp);
+
+ // don't need to iterate on all vnodes, we just need to
+ // wait for writes to the system files and the device vnode
+ //
+ // Now that journal flush waits for all metadata blocks to
+ // be written out, waiting for btree writes is probably no
+ // longer required.
+ if (HFSTOVCB(hfsmp)->extentsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
+ if (HFSTOVCB(hfsmp)->catalogRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
+ if (HFSTOVCB(hfsmp)->allocationsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
+ if (hfsmp->hfs_attribute_vp)
+ vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
+ vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
+
+ // We're done, mark frozen
+ hfs_lock_mount(hfsmp);
+ hfsmp->hfs_freeze_state = HFS_FROZEN;
+ hfsmp->hfs_freezing_proc = current_proc();
+ hfs_unlock_mount(hfsmp);
+
+ return 0;
+}
+
+__private_extern__
+int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
+{
+ hfs_lock_mount(hfsmp);
+
+ if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
+ hfs_unlock_mount(hfsmp);
+ return EINVAL;
+ }
+ if (process && hfsmp->hfs_freezing_proc != process) {
+ hfs_unlock_mount(hfsmp);
+ return EPERM;
+ }
+
+ hfs_thaw_locked(hfsmp);
+
+ hfs_unlock_mount(hfsmp);
+
+ return 0;
+}
+
+static void hfs_thaw_locked(struct hfsmount *hfsmp)
+{
+ hfsmp->hfs_freezing_proc = NULL;
+ hfsmp->hfs_freeze_state = HFS_THAWED;
+
+ wakeup(&hfsmp->hfs_freeze_state);
+}