+
+//
+// This function will go and re-locate the .journal_info_block and
+// the .journal files in case they moved (which can happen if you
+// run Norton SpeedDisk). If we fail to find either file we just
+// disable journaling for this volume and return. We turn off the
+// journaling bit in the vcb and assume it will get written to disk
+// later (if it doesn't on the next mount we'd do the same thing
+// again which is harmless). If we disable journaling we don't
+// return an error so that the volume is still mountable.
+//
+// If the info we find for the .journal_info_block and .journal files
+// isn't what we had stored, we re-set our cached info and proceed
+// with opening the journal normally.
+//
+static int
+hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
+{
+ JournalInfoBlock *jibp;
+ struct buf *jinfo_bp;
+ int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
+ int retval, write_jibp = 0, recreate_journal = 0;
+ struct vnode *devvp;
+ struct cat_attr jib_attr, jattr;
+ struct cat_fork jib_fork, jfork;
+ ExtendedVCB *vcb;
+ u_int32_t fid;
+ struct hfs_mount_args *args = _args;
+ u_int32_t jib_flags;
+ u_int64_t jib_offset;
+ u_int64_t jib_size;
+
+ devvp = hfsmp->hfs_devvp;
+ vcb = HFSTOVCB(hfsmp);
+
+ if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
+ if (args->journal_disable) {
+ return 0;
+ }
+
+ arg_flags = args->journal_flags;
+ arg_tbufsz = args->journal_tbuffer_size;
+ }
+
+ fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
+ if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
+ printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
+ jib_fork.cf_extents[0].startBlock);
+ vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+ return 0;
+ }
+ hfsmp->hfs_jnlinfoblkid = fid;
+
+ // make sure the journal_info_block begins where we think it should.
+ if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
+ printf("hfs: The journal_info_block moved (was: %d; is: %d). Fixing up\n",
+ SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
+
+ vcb->vcbJinfoBlock = jib_fork.cf_extents[0].startBlock;
+ vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
+ recreate_journal = 1;
+ }
+
+
+ sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
+ jinfo_bp = NULL;
+ retval = (int)buf_meta_bread(devvp,
+ (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
+ ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+ hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
+ if (retval) {
+ if (jinfo_bp) {
+ buf_brelse(jinfo_bp);
+ }
+ printf("hfs: can't read journal info block. disabling journaling.\n");
+ vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+ return 0;
+ }
+
+ jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
+ jib_flags = SWAP_BE32(jibp->flags);
+ jib_offset = SWAP_BE64(jibp->offset);
+ jib_size = SWAP_BE64(jibp->size);
+
+ fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
+ if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
+ printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
+ jfork.cf_extents[0].startBlock);
+ buf_brelse(jinfo_bp);
+ vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+ return 0;
+ }
+ hfsmp->hfs_jnlfileid = fid;
+
+ // make sure the journal file begins where we think it should.
+ if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
+ printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n",
+ (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
+
+ jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
+ write_jibp = 1;
+ recreate_journal = 1;
+ }
+
+ // check the size of the journal file.
+ if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
+ printf("hfs: The journal file changed size! (was %lld; is %lld). Fixing up.\n",
+ jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
+
+ jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
+ write_jibp = 1;
+ recreate_journal = 1;
+ }
+
+ if (jib_flags & kJIJournalInFSMask) {
+ hfsmp->jvp = hfsmp->hfs_devvp;
+ jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
+ } else {
+ const char *dev_name;
+ int need_init = 0;
+
+ dev_name = vnode_getname_printable(devvp);
+
+ // since the journal is empty, just use any available external journal
+ *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+
+ // this fills in the uuid of the device we actually get
+ hfsmp->jvp = open_journal_dev(dev_name,
+ !(jib_flags & kJIJournalNeedInitMask),
+ (char *)&jibp->ext_jnl_uuid[0],
+ (char *)&jibp->machine_serial_num[0],
+ jib_size,
+ hfsmp->hfs_logical_block_size,
+ &need_init);
+ if (hfsmp->jvp == NULL) {
+ buf_brelse(jinfo_bp);
+ vnode_putname_printable(dev_name);
+ return EROFS;
+ } else {
+ if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+ strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
+ }
+ }
+ jib_offset = 0;
+ recreate_journal = 1;
+ write_jibp = 1;
+ if (need_init) {
+ jib_flags |= kJIJournalNeedInitMask;
+ }
+ vnode_putname_printable(dev_name);
+ }
+
+ // save this off for the hack-y check in hfs_remove()
+ hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
+ hfsmp->jnl_size = jib_size;
+
+ if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
+ // if the file system is read-only, check if the journal is empty.
+ // if it is, then we can allow the mount. otherwise we have to
+ // return failure.
+ retval = journal_is_clean(hfsmp->jvp,
+ jib_offset,
+ jib_size,
+ devvp,
+ hfsmp->hfs_logical_block_size);
+
+ hfsmp->jnl = NULL;
+
+ buf_brelse(jinfo_bp);
+
+ if (retval) {
+ const char *name = vnode_getname_printable(devvp);
+ printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
+ name);
+ vnode_putname_printable(name);
+ }
+
+ return retval;
+ }
+
+ if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
+ printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
+ jib_offset, jib_size);
+ hfsmp->jnl = journal_create(hfsmp->jvp,
+ jib_offset,
+ jib_size,
+ devvp,
+ hfsmp->hfs_logical_block_size,
+ arg_flags,
+ arg_tbufsz,
+ hfs_sync_metadata, hfsmp->hfs_mp,
+ hfsmp->hfs_mp);
+ if (hfsmp->jnl)
+ journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+
+ // no need to start a transaction here... if this were to fail
+ // we'd just re-init it on the next mount.
+ jib_flags &= ~kJIJournalNeedInitMask;
+ write_jibp = 1;
+
+ } else {
+ //
+ // if we weren't the last person to mount this volume
+ // then we need to throw away the journal because it
+ // is likely that someone else mucked with the disk.
+ // if the journal is empty this is no big deal. if the
+ // disk is dirty this prevents us from replaying the
+ // journal over top of changes that someone else made.
+ //
+ arg_flags |= JOURNAL_RESET;
+
+ //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
+ // jib_offset,
+ // jib_size, SWAP_BE32(vhp->blockSize));
+
+ hfsmp->jnl = journal_open(hfsmp->jvp,
+ jib_offset,
+ jib_size,
+ devvp,
+ hfsmp->hfs_logical_block_size,
+ arg_flags,
+ arg_tbufsz,
+ hfs_sync_metadata, hfsmp->hfs_mp,
+ hfsmp->hfs_mp);
+ if (hfsmp->jnl)
+ journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+ }
+
+
+ if (write_jibp) {
+ jibp->flags = SWAP_BE32(jib_flags);
+ jibp->offset = SWAP_BE64(jib_offset);
+ jibp->size = SWAP_BE64(jib_size);
+
+ buf_bwrite(jinfo_bp);
+ } else {
+ buf_brelse(jinfo_bp);
+ }
+ jinfo_bp = NULL;
+ jibp = NULL;
+
+ // if we expected the journal to be there and we couldn't
+ // create it or open it then we have to bail out.
+ if (hfsmp->jnl == NULL) {
+ printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Calculate the allocation zone for metadata.
+ *
+ * This zone includes the following:
+ * Allocation Bitmap file
+ * Overflow Extents file
+ * Journal file
+ * Quota files
+ * Clustered Hot files
+ * Catalog file
+ *
+ * METADATA ALLOCATION ZONE
+ * ____________________________________________________________________________
+ * | | | | | | |
+ * | BM | JF | OEF | CATALOG |---> | HOT FILES |
+ * |____|____|_____|_______________|______________________________|___________|
+ *
+ * <------------------------------- N * 128 MB ------------------------------->
+ *
+ */
+#define GIGABYTE (u_int64_t)(1024*1024*1024)
+
+#define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
+#define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
+#define JOURNAL_DEFAULT_SIZE (8*1024*1024)
+#define JOURNAL_MAXIMUM_SIZE (512*1024*1024)
+#define HOTBAND_MINIMUM_SIZE (10*1024*1024)
+#define HOTBAND_MAXIMUM_SIZE (512*1024*1024)
+
+/* Initialize the metadata zone.
+ *
+ * If the size of the volume is less than the minimum size for
+ * metadata zone, metadata zone is disabled.
+ *
+ * If disable is true, disable metadata zone unconditionally.
+ */
+void
+hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
+{
+ ExtendedVCB *vcb;
+ u_int64_t fs_size;
+ u_int64_t zonesize;
+ u_int64_t temp;
+ u_int64_t filesize;
+ u_int32_t blk;
+ int items, really_do_it=1;
+
+ vcb = HFSTOVCB(hfsmp);
+ fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
+
+ /*
+ * For volumes less than 10 GB, don't bother.
+ */
+ if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
+ really_do_it = 0;
+ }
+
+ /*
+ * Skip non-journaled volumes as well.
+ */
+ if (hfsmp->jnl == NULL) {
+ really_do_it = 0;
+ }
+
+ /* If caller wants to disable metadata zone, do it */
+ if (disable == true) {
+ really_do_it = 0;
+ }
+
+ /*
+ * Start with space for the boot blocks and Volume Header.
+ * 1536 = byte offset from start of volume to end of volume header:
+ * 1024 bytes is the offset from the start of the volume to the
+ * start of the volume header (defined by the volume format)
+ * + 512 bytes (the size of the volume header).
+ */
+ zonesize = roundup(1536, hfsmp->blockSize);
+
+ /*
+ * Add the on-disk size of allocation bitmap.
+ */
+ zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+
+ /*
+ * Add space for the Journal Info Block and Journal (if they're in
+ * this file system).
+ */
+ if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
+ zonesize += hfsmp->blockSize + hfsmp->jnl_size;
+ }
+
+ /*
+ * Add the existing size of the Extents Overflow B-tree.
+ * (It rarely grows, so don't bother reserving additional room for it.)
+ */
+ zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+
+ /*
+ * If there is an Attributes B-tree, leave room for 11 clumps worth.
+ * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
+ * When installing a full OS install onto a 20GB volume, we use
+ * 7 to 8 clumps worth of space (depending on packages), so that leaves
+ * us with another 3 or 4 clumps worth before we need another extent.
+ */
+ if (hfsmp->hfs_attribute_cp) {
+ zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
+ }
+
+ /*
+ * Leave room for 11 clumps of the Catalog B-tree.
+ * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
+ * When installing a full OS install onto a 20GB volume, we use
+ * 7 to 8 clumps worth of space (depending on packages), so that leaves
+ * us with another 3 or 4 clumps worth before we need another extent.
+ */
+ zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
+
+ /*
+ * Add space for hot file region.
+ *
+ * ...for now, use 5 MB per 1 GB (0.5 %)
+ */
+ filesize = (fs_size / 1024) * 5;
+ if (filesize > HOTBAND_MAXIMUM_SIZE)
+ filesize = HOTBAND_MAXIMUM_SIZE;
+ else if (filesize < HOTBAND_MINIMUM_SIZE)
+ filesize = HOTBAND_MINIMUM_SIZE;
+ /*
+ * Calculate user quota file requirements.
+ */
+ if (hfsmp->hfs_flags & HFS_QUOTAS) {
+ items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
+ if (items < QF_MIN_USERS)
+ items = QF_MIN_USERS;
+ else if (items > QF_MAX_USERS)
+ items = QF_MAX_USERS;
+ if (!powerof2(items)) {
+ int x = items;
+ items = 4;
+ while (x>>1 != 1) {
+ x = x >> 1;
+ items = items << 1;
+ }
+ }
+ filesize += (items + 1) * sizeof(struct dqblk);
+ /*
+ * Calculate group quota file requirements.
+ *
+ */
+ items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
+ if (items < QF_MIN_GROUPS)
+ items = QF_MIN_GROUPS;
+ else if (items > QF_MAX_GROUPS)
+ items = QF_MAX_GROUPS;
+ if (!powerof2(items)) {
+ int x = items;
+ items = 4;
+ while (x>>1 != 1) {
+ x = x >> 1;
+ items = items << 1;
+ }
+ }
+ filesize += (items + 1) * sizeof(struct dqblk);
+ }
+ zonesize += filesize;
+
+ /*
+ * Round up entire zone to a bitmap block's worth.
+ * The extra space goes to the catalog file and hot file area.
+ */
+ temp = zonesize;
+ zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+ hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
+ /*
+ * If doing the round up for hfs_min_alloc_start would push us past
+ * allocLimit, then just reset it back to 0. Though using a value
+ * bigger than allocLimit would not cause damage in the block allocator
+ * code, this value could get stored in the volume header and make it out
+ * to disk, making the volume header technically corrupt.
+ */
+ if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
+ hfsmp->hfs_min_alloc_start = 0;
+ }
+
+ if (really_do_it == 0) {
+ /* If metadata zone needs to be disabled because the
+ * volume was truncated, clear the bit and zero out
+ * the values that are no longer needed.
+ */
+ if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+ /* Disable metadata zone */
+ hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
+
+ /* Zero out mount point values that are not required */
+ hfsmp->hfs_catalog_maxblks = 0;
+ hfsmp->hfs_hotfile_maxblks = 0;
+ hfsmp->hfs_hotfile_start = 0;
+ hfsmp->hfs_hotfile_end = 0;
+ hfsmp->hfs_hotfile_freeblks = 0;
+ hfsmp->hfs_metazone_start = 0;
+ hfsmp->hfs_metazone_end = 0;
+ }
+
+ return;
+ }
+
+ temp = zonesize - temp; /* temp has extra space */
+ filesize += temp / 3;
+ hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
+
+ hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+
+ /* Convert to allocation blocks. */
+ blk = zonesize / vcb->blockSize;
+
+ /* The default metadata zone location is at the start of volume. */
+ hfsmp->hfs_metazone_start = 1;
+ hfsmp->hfs_metazone_end = blk - 1;
+
+ /* The default hotfile area is at the end of the zone. */
+ if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
+ hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
+ hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
+ hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
+ }
+ else {
+ hfsmp->hfs_hotfile_start = 0;
+ hfsmp->hfs_hotfile_end = 0;
+ hfsmp->hfs_hotfile_freeblks = 0;
+ }
+#if 0
+ printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+ printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+ printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+ hfsmp->hfs_flags |= HFS_METADATA_ZONE;
+}
+
+
+static u_int32_t
+hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
+{
+ ExtendedVCB *vcb = HFSTOVCB(hfsmp);
+ int lockflags;
+ int freeblocks;
+
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+ freeblocks = MetaZoneFreeBlocks(vcb);
+ hfs_systemfile_unlock(hfsmp, lockflags);
+
+ /* Minus Extents overflow file reserve. */
+ freeblocks -=
+ hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
+ /* Minus catalog file reserve. */
+ freeblocks -=
+ hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
+ if (freeblocks < 0)
+ freeblocks = 0;
+
+ return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
+}
+
+/*
+ * Determine if a file is a "virtual" metadata file.
+ * This includes journal and quota files.
+ */
+int
+hfs_virtualmetafile(struct cnode *cp)
+{
+ const char * filename;
+
+
+ if (cp->c_parentcnid != kHFSRootFolderID)
+ return (0);
+
+ filename = (const char *)cp->c_desc.cd_nameptr;
+ if (filename == NULL)
+ return (0);
+
+ if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
+ (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
+ (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
+ (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
+ (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
+ return (1);
+
+ return (0);
+}
+
+__private_extern__
+void hfs_syncer_lock(struct hfsmount *hfsmp)
+{
+ hfs_lock_mount(hfsmp);
+}
+
+__private_extern__
+void hfs_syncer_unlock(struct hfsmount *hfsmp)
+{
+ hfs_unlock_mount(hfsmp);
+}
+
+__private_extern__
+void hfs_syncer_wait(struct hfsmount *hfsmp)
+{
+ msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
+ "hfs_syncer_wait", NULL);
+}
+
+__private_extern__
+void hfs_syncer_wakeup(struct hfsmount *hfsmp)
+{
+ wakeup(&hfsmp->hfs_sync_incomplete);
+}
+
+__private_extern__
+uint64_t hfs_usecs_to_deadline(uint64_t usecs)
+{
+ uint64_t deadline;
+ clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
+ return deadline;
+}
+
+__private_extern__
+void hfs_syncer_queue(thread_call_t syncer)
+{
+ if (thread_call_enter_delayed_with_leeway(syncer,
+ NULL,
+ hfs_usecs_to_deadline(HFS_META_DELAY),
+ 0,
+ THREAD_CALL_DELAY_SYS_BACKGROUND)) {
+ printf ("hfs: syncer already scheduled!");
+ }
+}
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+ // If we don't have a syncer or we get called by the syncer, just return
+ if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
+ return;
+
+ hfs_syncer_lock(hfsmp);
+
+ if (!timerisset(&hfsmp->hfs_sync_req_oldest))
+ microuptime(&hfsmp->hfs_sync_req_oldest);
+
+ /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
+ don't want to queue again if there is a sync outstanding. */
+ if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
+ hfs_syncer_unlock(hfsmp);
+ return;
+ }
+
+ hfsmp->hfs_sync_incomplete = TRUE;
+
+ thread_call_t syncer = hfsmp->hfs_syncer;
+
+ hfs_syncer_unlock(hfsmp);
+
+ hfs_syncer_queue(syncer);
+}
+
+int
+hfs_start_transaction(struct hfsmount *hfsmp)
+{
+ int ret, unlock_on_err=0;
+ void * thread = current_thread();
+
+#ifdef HFS_CHECK_LOCK_ORDER
+ /*
+ * You cannot start a transaction while holding a system
+ * file lock. (unless the transaction is nested.)
+ */
+ if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
+ if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
+ }
+ if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
+ }
+ if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
+ panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
+ }
+ }
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+ if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+ OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+ unlock_on_err = 1;
+ }
+
+ /* If a downgrade to read-only mount is in progress, no other
+ * process than the downgrade process is allowed to modify
+ * the file system.
+ */
+ if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
+ (hfsmp->hfs_downgrading_proc != thread)) {
+ ret = EROFS;
+ goto out;
+ }
+
+ if (hfsmp->jnl) {
+ ret = journal_start_transaction(hfsmp->jnl);
+ if (ret == 0) {
+ OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
+ }
+ } else {
+ ret = 0;
+ }
+
+out:
+ if (ret != 0 && unlock_on_err) {
+ hfs_unlock_global (hfsmp);
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+ }
+
+ return ret;
+}
+
+int
+hfs_end_transaction(struct hfsmount *hfsmp)
+{
+ int need_unlock=0, ret;
+
+ if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
+ && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
+ need_unlock = 1;
+ }
+
+ if (hfsmp->jnl) {
+ ret = journal_end_transaction(hfsmp->jnl);
+ } else {
+ ret = 0;
+ }
+
+ if (need_unlock) {
+ OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+ hfs_unlock_global (hfsmp);
+ hfs_sync_ejectable(hfsmp);
+ }
+
+ return ret;
+}
+
+
+/*
+ * Flush the contents of the journal to the disk.
+ *
+ * Input:
+ * wait_for_IO -
+ * If TRUE, wait to write in-memory journal to the disk
+ * consistently, and also wait to write all asynchronous
+ * metadata blocks to its corresponding locations
+ * consistently on the disk. This means that the journal
+ * is empty at this point and does not contain any
+ * transactions. This is overkill in normal scenarios
+ * but is useful whenever the metadata blocks are required
+ * to be consistent on-disk instead of just the journal
+ * being consistent; like before live verification
+ * and live volume resizing.
+ *
+ * If FALSE, only wait to write in-memory journal to the
+ * disk consistently. This means that the journal still
+ * contains uncommitted transactions and the file system
+ * metadata blocks in the journal transactions might be
+ * written asynchronously to the disk. But there is no
+ * guarantee that they are written to the disk before
+ * returning to the caller. Note that this option is
+ * sufficient for file system data integrity as it
+ * guarantees consistent journal content on the disk.
+ */
+int
+hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
+{
+ int ret;
+
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+ if (hfsmp->jnl) {
+ ret = journal_flush(hfsmp->jnl, wait_for_IO);
+ } else {
+ ret = 0;
+ }
+ hfs_unlock_global (hfsmp);
+
+ return ret;
+}
+
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>). If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair? We can't always be
+ * certain. If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs. Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired. A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+ int result;
+ struct filefork *catalog;
+ int lockflags;
+
+ if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+ {
+ /* This volume has already been checked and repaired. */
+ return 0;
+ }
+
+ if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+ {
+ /* This volume is too old to have had the problem. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ catalog = hfsmp->hfs_catalog_cp->c_datafork;
+ if (catalog->ff_size > catalog->ff_clumpsize)
+ {
+ /* The entire first clump must have been in use at some point. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ /*
+ * If we get here, we need to zero out those unused nodes.
+ *
+ * We start a transaction and lock the catalog since we're going to be
+ * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
+ * do its writing via the journal, because that would be too much I/O
+ * to fit in a transaction, and it's a pain to break it up into multiple
+ * transactions. (It behaves more like growing a B-tree would.)
+ */
+ printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+ result = hfs_start_transaction(hfsmp);
+ if (result)
+ goto done;
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+ result = BTZeroUnusedNodes(catalog);
+ vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ hfs_end_transaction(hfsmp);
+ if (result == 0)
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+ return result;
+}
+
+
+extern time_t snapshot_timestamp;
+
+int
+check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
+{
+ int tracked_error = 0, snapshot_error = 0;
+
+ if (vp == NULL) {
+ return 0;
+ }
+
+ /* Swap files are special; skip them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ if (VTOC(vp)->c_bsdflags & UF_TRACKED) {
+ // the file has the tracked bit set, so send an event to the tracked-file handler
+ int error;
+
+ // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
+ error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
+ if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
+
+ } else if (error == EINTR) {
+ // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
+ tracked_error = EINTR;
+ }
+ }
+ }
+
+ if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+ // the change time is within this epoch
+ int error;
+
+ error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+ if (error == EDEADLK) {
+ snapshot_error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
+ } else if (error == EINTR) {
+ // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
+ snapshot_error = EINTR;
+ }
+ }
+ }
+
+ if (tracked_error) return tracked_error;
+ if (snapshot_error) return snapshot_error;
+
+ return 0;
+}
+
+int
+check_for_dataless_file(struct vnode *vp, uint64_t op_type)
+{
+ int error;
+
+ if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+ // there's nothing to do, it's not dataless
+ return 0;
+ }
+
+ /* Swap files are special; ignore them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
+ error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
+ if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
+ error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: dataless: timed out waiting for namespace handler...\n");
+ // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
+ return 0;
+ } else if (error == EINTR) {
+ // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
+ return EINTR;
+ }
+ } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
+ //
+ // if we're here, the dataless bit is still set on the file
+ // which means it didn't get handled. we return an error
+ // but it's presently ignored by all callers of this function.
+ //
+ // XXXdbg - EDATANOTPRESENT is what we really need...
+ //
+ return EBADF;
+ }
+
+ return error;
+}
+
+
+//
+// NOTE: this function takes care of starting a transaction and
+// acquiring the systemfile lock so that it can call
+// cat_update().
+//
+// NOTE: do NOT hold and cnode locks while calling this function
+// to avoid deadlocks (because we take a lock on the root
+// cnode)
+//
+int
+hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
+{
+ struct vnode *rvp;
+ struct cnode *cp;
+ int error;
+
+ error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
+ if (error) {
+ return error;
+ }
+
+ cp = VTOC(rvp);
+ if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+ return error;
+ }
+ struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
+
+ int lockflags;
+ if (hfs_start_transaction(hfsmp) != 0) {
+ return error;
+ }
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+ if (extinfo->document_id == 0) {
+ // initialize this to start at 3 (one greater than the root-dir id)
+ extinfo->document_id = 3;
+ }
+
+ *docid = extinfo->document_id++;
+
+ // mark the root cnode dirty
+ cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
+ (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+
+ hfs_systemfile_unlock (hfsmp, lockflags);
+ (void) hfs_end_transaction(hfsmp);
+
+ (void) hfs_unlock(cp);
+
+ vnode_put(rvp);
+ rvp = NULL;
+
+ return 0;
+}