+
+/*
+ * Calculate the allocation zone for metadata.
+ *
+ * This zone includes the following:
+ * Allocation Bitmap file
+ * Overflow Extents file
+ * Journal file
+ * Quota files
+ * Clustered Hot files
+ * Catalog file
+ *
+ * METADATA ALLOCATION ZONE
+ * ____________________________________________________________________________
+ * | | | | | | |
+ * | BM | JF | OEF | CATALOG |---> | HOT FILES |
+ * |____|____|_____|_______________|______________________________|___________|
+ *
+ * <------------------------------- N * 128 MB ------------------------------->
+ *
+ */
+#define GIGABYTE (u_int64_t)(1024*1024*1024)
+
+#define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
+#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
+#define JOURNAL_DEFAULT_SIZE (8*1024*1024)
+#define JOURNAL_MAXIMUM_SIZE (512*1024*1024)
+#define HOTBAND_MINIMUM_SIZE (10*1024*1024)
+#define HOTBAND_MAXIMUM_SIZE (512*1024*1024)
+
+void
+hfs_metadatazone_init(struct hfsmount *hfsmp)
+{
+ ExtendedVCB *vcb;
+ u_int64_t fs_size;
+ u_int64_t zonesize;
+ u_int64_t temp;
+ u_int64_t filesize;
+ u_int32_t blk;
+ int items, really_do_it=1;
+
+ vcb = HFSTOVCB(hfsmp);
+ fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
+
+ /*
+ * For volumes less than 10 GB, don't bother.
+ */
+ if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
+ really_do_it = 0;
+ }
+
+ /*
+ * Skip non-journaled volumes as well.
+ */
+ if (hfsmp->jnl == NULL) {
+ really_do_it = 0;
+ }
+
+ /*
+ * Start with space for the boot blocks and Volume Header.
+ * 1536 = byte offset from start of volume to end of volume header:
+ * 1024 bytes is the offset from the start of the volume to the
+ * start of the volume header (defined by the volume format)
+ * + 512 bytes (the size of the volume header).
+ */
+ zonesize = roundup(1536, hfsmp->blockSize);
+
+ /*
+ * Add the on-disk size of allocation bitmap.
+ */
+ zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+
+ /*
+ * Add space for the Journal Info Block and Journal (if they're in
+ * this file system).
+ */
+ if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
+ zonesize += hfsmp->blockSize + hfsmp->jnl_size;
+ }
+
+ /*
+ * Add the existing size of the Extents Overflow B-tree.
+ * (It rarely grows, so don't bother reserving additional room for it.)
+ */
+ zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+
+ /*
+ * If there is an Attributes B-tree, leave room for 11 clumps worth.
+ * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
+ * When installing a full OS install onto a 20GB volume, we use
+ * 7 to 8 clumps worth of space (depending on packages), so that leaves
+ * us with another 3 or 4 clumps worth before we need another extent.
+ */
+ if (hfsmp->hfs_attribute_cp) {
+ zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
+ }
+
+ /*
+ * Leave room for 11 clumps of the Catalog B-tree.
+ * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
+ * When installing a full OS install onto a 20GB volume, we use
+ * 7 to 8 clumps worth of space (depending on packages), so that leaves
+ * us with another 3 or 4 clumps worth before we need another extent.
+ */
+ zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
+
+ /*
+ * Add space for hot file region.
+ *
+ * ...for now, use 5 MB per 1 GB (0.5 %)
+ */
+ filesize = (fs_size / 1024) * 5;
+ if (filesize > HOTBAND_MAXIMUM_SIZE)
+ filesize = HOTBAND_MAXIMUM_SIZE;
+ else if (filesize < HOTBAND_MINIMUM_SIZE)
+ filesize = HOTBAND_MINIMUM_SIZE;
+ /*
+ * Calculate user quota file requirements.
+ */
+ if (hfsmp->hfs_flags & HFS_QUOTAS) {
+ items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
+ if (items < QF_MIN_USERS)
+ items = QF_MIN_USERS;
+ else if (items > QF_MAX_USERS)
+ items = QF_MAX_USERS;
+ if (!powerof2(items)) {
+ int x = items;
+ items = 4;
+ while (x>>1 != 1) {
+ x = x >> 1;
+ items = items << 1;
+ }
+ }
+ filesize += (items + 1) * sizeof(struct dqblk);
+ /*
+ * Calculate group quota file requirements.
+ *
+ */
+ items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
+ if (items < QF_MIN_GROUPS)
+ items = QF_MIN_GROUPS;
+ else if (items > QF_MAX_GROUPS)
+ items = QF_MAX_GROUPS;
+ if (!powerof2(items)) {
+ int x = items;
+ items = 4;
+ while (x>>1 != 1) {
+ x = x >> 1;
+ items = items << 1;
+ }
+ }
+ filesize += (items + 1) * sizeof(struct dqblk);
+ }
+ zonesize += filesize;
+
+ /*
+ * Round up entire zone to a bitmap block's worth.
+ * The extra space goes to the catalog file and hot file area.
+ */
+ temp = zonesize;
+ zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+ hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
+ /*
+ * If doing the round up for hfs_min_alloc_start would push us past
+ * allocLimit, then just reset it back to 0. Though using a value
+ * bigger than allocLimit would not cause damage in the block allocator
+ * code, this value could get stored in the volume header and make it out
+ * to disk, making the volume header technically corrupt.
+ */
+ if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
+ hfsmp->hfs_min_alloc_start = 0;
+ }
+
+ if (really_do_it == 0) {
+ /* If metadata zone needs to be disabled because the
+ * volume was truncated, clear the bit and zero out
+ * the values that are no longer needed.
+ */
+ if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+ /* Disable metadata zone */
+ hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
+
+ /* Zero out mount point values that are not required */
+ hfsmp->hfs_catalog_maxblks = 0;
+ hfsmp->hfs_hotfile_maxblks = 0;
+ hfsmp->hfs_hotfile_start = 0;
+ hfsmp->hfs_hotfile_end = 0;
+ hfsmp->hfs_hotfile_freeblks = 0;
+ hfsmp->hfs_metazone_start = 0;
+ hfsmp->hfs_metazone_end = 0;
+ }
+
+ return;
+ }
+
+ temp = zonesize - temp; /* temp has extra space */
+ filesize += temp / 3;
+ hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
+
+ hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+
+ /* Convert to allocation blocks. */
+ blk = zonesize / vcb->blockSize;
+
+ /* The default metadata zone location is at the start of volume. */
+ hfsmp->hfs_metazone_start = 1;
+ hfsmp->hfs_metazone_end = blk - 1;
+
+ /* The default hotfile area is at the end of the zone. */
+ hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
+ hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
+ hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
+#if 0
+ printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+ printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+ printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+ hfsmp->hfs_flags |= HFS_METADATA_ZONE;
+}
+
+
+static u_int32_t
+hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
+{
+ ExtendedVCB *vcb = HFSTOVCB(hfsmp);
+ int lockflags;
+ int freeblocks;
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+ freeblocks = MetaZoneFreeBlocks(vcb);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ /* Minus Extents overflow file reserve. */
+ freeblocks -=
+ hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
+ /* Minus catalog file reserve. */
+ freeblocks -=
+ hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
+ if (freeblocks < 0)
+ freeblocks = 0;
+
+ return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
+}
+
+/*
+ * Determine if a file is a "virtual" metadata file.
+ * This includes journal and quota files.
+ */
+__private_extern__
+int
+hfs_virtualmetafile(struct cnode *cp)
+{
+ const char * filename;
+
+
+ if (cp->c_parentcnid != kHFSRootFolderID)
+ return (0);
+
+ filename = (const char *)cp->c_desc.cd_nameptr;
+ if (filename == NULL)
+ return (0);
+
+ if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
+ (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
+ (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
+ (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
+ (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
+ return (1);
+
+ return (0);
+}
+
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+ if (hfsmp->hfs_syncer) {
+ clock_sec_t secs;
+ clock_usec_t usecs;
+ uint64_t now;
+
+ clock_get_calendar_microtime(&secs, &usecs);
+ now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
+
+ if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
+ // if we have a sync scheduled but i/o is starting to pile up,
+ // don't call thread_call_enter_delayed() again because that
+ // will defer the sync.
+ return;
+ }
+
+ if (hfsmp->hfs_sync_scheduled == 0) {
+ uint64_t deadline;
+
+ hfsmp->hfs_last_sync_request_time = now;
+
+ clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
+
+ /*
+ * Increment hfs_sync_scheduled on the assumption that we're the
+ * first thread to schedule the timer. If some other thread beat
+ * us, then we'll decrement it. If we *were* the first to
+ * schedule the timer, then we need to keep track that the
+ * callback is waiting to complete.
+ */
+ OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+ if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
+ OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+ else
+ OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+ }
+ }
+}
+
+
+__private_extern__
+int
+hfs_start_transaction(struct hfsmount *hfsmp)
+{
+ int ret, unlock_on_err=0;
+ void * thread = current_thread();
+
+#ifdef HFS_CHECK_LOCK_ORDER
+ /*
+ * You cannot start a transaction while holding a system
+ * file lock. (unless the transaction is nested.)
+ */
+ if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
+ if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
+ }
+ if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
+ }
+ if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
+ }
+ }
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+ if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
+ lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+ OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+ unlock_on_err = 1;
+ }
+
+ /* If a downgrade to read-only mount is in progress, no other
+ * process than the downgrade process is allowed to modify
+ * the file system.
+ */
+ if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
+ (hfsmp->hfs_downgrading_proc != thread)) {
+ ret = EROFS;
+ goto out;
+ }
+
+ if (hfsmp->jnl) {
+ ret = journal_start_transaction(hfsmp->jnl);
+ if (ret == 0) {
+ OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
+ }
+ } else {
+ ret = 0;
+ }
+
+out:
+ if (ret != 0 && unlock_on_err) {
+ lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+ }
+
+ return ret;
+}
+
+__private_extern__
+int
+hfs_end_transaction(struct hfsmount *hfsmp)
+{
+ int need_unlock=0, ret;
+
+ if ( hfsmp->jnl == NULL
+ || ( journal_owner(hfsmp->jnl) == current_thread()
+ && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
+
+ need_unlock = 1;
+ }
+
+ if (hfsmp->jnl) {
+ ret = journal_end_transaction(hfsmp->jnl);
+ } else {
+ ret = 0;
+ }
+
+ if (need_unlock) {
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+ lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+ hfs_sync_ejectable(hfsmp);
+ }
+
+ return ret;
+}
+
+
+__private_extern__
+int
+hfs_journal_flush(struct hfsmount *hfsmp)
+{
+ int ret;
+
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+ if (hfsmp->jnl) {
+ ret = journal_flush(hfsmp->jnl);
+ } else {
+ ret = 0;
+ }
+ lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+
+ return ret;
+}
+
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>). If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair? We can't always be
+ * certain. If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs. Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired. A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+__private_extern__
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+ int result;
+ struct filefork *catalog;
+ int lockflags;
+
+ if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+ {
+ /* This volume has already been checked and repaired. */
+ return 0;
+ }
+
+ if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+ {
+ /* This volume is too old to have had the problem. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ catalog = hfsmp->hfs_catalog_cp->c_datafork;
+ if (catalog->ff_size > catalog->ff_clumpsize)
+ {
+ /* The entire first clump must have been in use at some point. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ /*
+ * If we get here, we need to zero out those unused nodes.
+ *
+ * We start a transaction and lock the catalog since we're going to be
+ * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
+ * do its writing via the journal, because that would be too much I/O
+ * to fit in a transaction, and it's a pain to break it up into multiple
+ * transactions. (It behaves more like growing a B-tree would.)
+ */
+ printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+ result = hfs_start_transaction(hfsmp);
+ if (result)
+ goto done;
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+ result = BTZeroUnusedNodes(catalog);
+ vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ hfs_end_transaction(hfsmp);
+ if (result == 0)
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+ return result;
+}