+
+
+void
+hfs_journal_lock(struct hfsmount *hfsmp)
+{
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+ if (hfsmp->jnl) {
+ journal_lock(hfsmp->jnl);
+ }
+ hfs_unlock_global (hfsmp);
+}
+
+void
+hfs_journal_unlock(struct hfsmount *hfsmp)
+{
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+ if (hfsmp->jnl) {
+ journal_unlock(hfsmp->jnl);
+ }
+ hfs_unlock_global (hfsmp);
+}
+
+/*
+ * Flush the contents of the journal to the disk.
+ *
+ * - HFS_FLUSH_JOURNAL
+ * Wait to write in-memory journal to the disk consistently.
+ * This means that the journal still contains uncommitted
+ * transactions and the file system metadata blocks in
+ * the journal transactions might be written asynchronously
+ * to the disk. But there is no guarantee that they are
+ * written to the disk before returning to the caller.
+ * Note that this option is sufficient for file system
+ * data integrity as it guarantees consistent journal
+ * content on the disk.
+ *
+ * - HFS_FLUSH_JOURNAL_META
+ * Wait to write in-memory journal to the disk
+ * consistently, and also wait to write all asynchronous
+ * metadata blocks to its corresponding locations
+ * consistently on the disk. This is overkill in normal
+ * scenarios but is useful whenever the metadata blocks
+ * are required to be consistent on-disk instead of
+ * just the journalbeing consistent; like before live
+ * verification and live volume resizing. The update of the
+ * metadata doesn't include a barrier of track cache flush.
+ *
+ * - HFS_FLUSH_FULL
+ * HFS_FLUSH_JOURNAL + force a track cache flush to media
+ *
+ * - HFS_FLUSH_CACHE
+ * Force a track cache flush to media.
+ *
+ * - HFS_FLUSH_BARRIER
+ * Barrier-only flush to ensure write order
+ *
+ */
+errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
+{
+ errno_t error = 0;
+ journal_flush_options_t options = 0;
+ dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
+
+ switch (mode) {
+ case HFS_FLUSH_JOURNAL_META:
+ // wait for journal, metadata blocks and previous async flush to finish
+ SET(options, JOURNAL_WAIT_FOR_IO);
+
+ // no break
+
+ case HFS_FLUSH_JOURNAL:
+ case HFS_FLUSH_JOURNAL_BARRIER:
+ case HFS_FLUSH_FULL:
+
+ if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
+ !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+ mode = HFS_FLUSH_FULL;
+
+ if (mode == HFS_FLUSH_FULL)
+ SET(options, JOURNAL_FLUSH_FULL);
+
+ /* Only peek at hfsmp->jnl while holding the global lock */
+ hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+ if (hfsmp->jnl)
+ error = journal_flush(hfsmp->jnl, options);
+
+ hfs_unlock_global (hfsmp);
+
+ /*
+ * This may result in a double barrier as
+ * journal_flush may have issued a barrier itself
+ */
+ if (mode == HFS_FLUSH_JOURNAL_BARRIER)
+ error = VNOP_IOCTL(hfsmp->hfs_devvp,
+ DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+ FWRITE, vfs_context_kernel());
+
+ break;
+
+ case HFS_FLUSH_CACHE:
+ // Do a full sync
+ sync_req.options = 0;
+
+ // no break
+
+ case HFS_FLUSH_BARRIER:
+ // If barrier only flush doesn't support, fall back to use full flush.
+ if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+ sync_req.options = 0;
+
+ error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+ FWRITE, vfs_context_kernel());
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ return error;
+}
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>). If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair? We can't always be
+ * certain. If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs. Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired. A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+ int result;
+ struct filefork *catalog;
+ int lockflags;
+
+ if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+ {
+ /* This volume has already been checked and repaired. */
+ return 0;
+ }
+
+ if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+ {
+ /* This volume is too old to have had the problem. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ catalog = hfsmp->hfs_catalog_cp->c_datafork;
+ if (catalog->ff_size > catalog->ff_clumpsize)
+ {
+ /* The entire first clump must have been in use at some point. */
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ return 0;
+ }
+
+ /*
+ * If we get here, we need to zero out those unused nodes.
+ *
+ * We start a transaction and lock the catalog since we're going to be
+ * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
+ * do its writing via the journal, because that would be too much I/O
+ * to fit in a transaction, and it's a pain to break it up into multiple
+ * transactions. (It behaves more like growing a B-tree would.)
+ */
+ printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+ result = hfs_start_transaction(hfsmp);
+ if (result)
+ goto done;
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+ result = BTZeroUnusedNodes(catalog);
+ vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+ hfs_systemfile_unlock(hfsmp, lockflags);
+ hfs_end_transaction(hfsmp);
+ if (result == 0)
+ hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+ printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+ return result;
+}
+
+
+extern time_t snapshot_timestamp;
+
+int
+check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
+{
+ int snapshot_error = 0;
+
+ if (vp == NULL) {
+ return 0;
+ }
+
+ /* Swap files are special; skip them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+ // the change time is within this epoch
+ int error;
+
+ error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+ if (error == EDEADLK) {
+ snapshot_error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
+ } else if (error == EINTR) {
+ // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
+ snapshot_error = EINTR;
+ }
+ }
+ }
+
+ if (snapshot_error) return snapshot_error;
+
+ return 0;
+}
+
+int
+check_for_dataless_file(struct vnode *vp, uint64_t op_type)
+{
+ int error;
+
+ if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+ // there's nothing to do, it's not dataless
+ return 0;
+ }
+
+ /* Swap files are special; ignore them */
+ if (vnode_isswap(vp)) {
+ return 0;
+ }
+
+ // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
+ error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
+ if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
+ error = 0;
+ } else if (error) {
+ if (error == EAGAIN) {
+ printf("hfs: dataless: timed out waiting for namespace handler...\n");
+ // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
+ return 0;
+ } else if (error == EINTR) {
+ // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
+ return EINTR;
+ }
+ } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
+ //
+ // if we're here, the dataless bit is still set on the file
+ // which means it didn't get handled. we return an error
+ // but it's presently ignored by all callers of this function.
+ //
+ // XXXdbg - EDATANOTPRESENT is what we really need...
+ //
+ return EBADF;
+ }
+
+ return error;
+}
+
+
+//
+// NOTE: this function takes care of starting a transaction and
+// acquiring the systemfile lock so that it can call
+// cat_update().
+//
+// NOTE: do NOT hold and cnode locks while calling this function
+// to avoid deadlocks (because we take a lock on the root
+// cnode)
+//
+int
+hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
+{
+ struct vnode *rvp;
+ struct cnode *cp;
+ int error;
+
+ error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
+ if (error) {
+ return error;
+ }
+
+ cp = VTOC(rvp);
+ if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+ return error;
+ }
+ struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
+
+ int lockflags;
+ if (hfs_start_transaction(hfsmp) != 0) {
+ return error;
+ }
+ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+ if (extinfo->document_id == 0) {
+ // initialize this to start at 3 (one greater than the root-dir id)
+ extinfo->document_id = 3;
+ }
+
+ *docid = extinfo->document_id++;
+
+ // mark the root cnode dirty
+ cp->c_flag |= C_MODIFIED;
+ hfs_update(cp->c_vp, 0);
+
+ hfs_systemfile_unlock (hfsmp, lockflags);
+ (void) hfs_end_transaction(hfsmp);
+
+ (void) hfs_unlock(cp);
+
+ vnode_put(rvp);
+ rvp = NULL;
+
+ return 0;
+}
+
+
+/*
+ * Return information about number of file system allocation blocks
+ * taken by metadata on a volume.
+ *
+ * This function populates struct hfsinfo_metadata with allocation blocks
+ * used by extents overflow btree, catalog btree, bitmap, attribute btree,
+ * journal file, and sum of all of the above.
+ */
+int
+hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
+{
+ int lockflags = 0;
+ int ret_lockflags = 0;
+
+ /* Zero out the output buffer */
+ bzero(hinfo, sizeof(struct hfsinfo_metadata));
+
+ /*
+ * Getting number of allocation blocks for all btrees
+ * should be a quick operation, so we grab locks for
+ * all of them at the same time
+ */
+ lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
+ ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+ /*
+ * Make sure that we were able to acquire all locks requested
+ * to protect us against conditions like unmount in progress.
+ */
+ if ((lockflags & ret_lockflags) != lockflags) {
+ /* Release any locks that were acquired */
+ hfs_systemfile_unlock(hfsmp, ret_lockflags);
+ return EPERM;
+ }
+
+ /* Get information about all the btrees */
+ hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
+ hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
+ hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
+ hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
+
+ /* Done with btrees, give up the locks */
+ hfs_systemfile_unlock(hfsmp, ret_lockflags);
+
+ /* Get information about journal file */
+ hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
+
+ /* Calculate total number of metadata blocks */
+ hinfo->total = hinfo->extents + hinfo->catalog +
+ hinfo->allocation + hinfo->attribute +
+ hinfo->journal;
+
+ return 0;
+}
+
+static int
+hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
+{
+ vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
+
+ return 0;
+}
+
+__private_extern__
+int hfs_freeze(struct hfsmount *hfsmp)
+{
+ // First make sure some other process isn't freezing
+ hfs_lock_mount(hfsmp);
+ while (hfsmp->hfs_freeze_state != HFS_THAWED) {
+ if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+ PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
+ hfs_unlock_mount(hfsmp);
+ return EINTR;
+ }
+ }
+
+ // Stop new syncers from starting
+ hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
+
+ // Now wait for all syncers to finish
+ while (hfsmp->hfs_syncers) {
+ if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+ PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
+ hfs_thaw_locked(hfsmp);
+ hfs_unlock_mount(hfsmp);
+ return EINTR;
+ }
+ }
+ hfs_unlock_mount(hfsmp);
+
+ // flush things before we get started to try and prevent
+ // dirty data from being paged out while we're frozen.
+ // note: we can't do this once we're in the freezing state because
+ // other threads will need to take the global lock
+ vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
+
+ // Block everything in hfs_lock_global now
+ hfs_lock_mount(hfsmp);
+ hfsmp->hfs_freeze_state = HFS_FREEZING;
+ hfsmp->hfs_freezing_thread = current_thread();
+ hfs_unlock_mount(hfsmp);
+
+ /* Take the exclusive lock to flush out anything else that
+ might have the global lock at the moment and also so we
+ can flush the journal. */
+ hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
+ journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
+ hfs_unlock_global(hfsmp);
+
+ // don't need to iterate on all vnodes, we just need to
+ // wait for writes to the system files and the device vnode
+ //
+ // Now that journal flush waits for all metadata blocks to
+ // be written out, waiting for btree writes is probably no
+ // longer required.
+ if (HFSTOVCB(hfsmp)->extentsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
+ if (HFSTOVCB(hfsmp)->catalogRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
+ if (HFSTOVCB(hfsmp)->allocationsRefNum)
+ vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
+ if (hfsmp->hfs_attribute_vp)
+ vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
+ vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
+
+ // We're done, mark frozen
+ hfs_lock_mount(hfsmp);
+ hfsmp->hfs_freeze_state = HFS_FROZEN;
+ hfsmp->hfs_freezing_proc = current_proc();
+ hfs_unlock_mount(hfsmp);
+
+ return 0;
+}
+
+__private_extern__
+int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
+{
+ hfs_lock_mount(hfsmp);
+
+ if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
+ hfs_unlock_mount(hfsmp);
+ return EINVAL;
+ }
+ if (process && hfsmp->hfs_freezing_proc != process) {
+ hfs_unlock_mount(hfsmp);
+ return EPERM;
+ }
+
+ hfs_thaw_locked(hfsmp);
+
+ hfs_unlock_mount(hfsmp);
+
+ return 0;
+}
+
+static void hfs_thaw_locked(struct hfsmount *hfsmp)
+{
+ hfsmp->hfs_freezing_proc = NULL;
+ hfsmp->hfs_freeze_state = HFS_THAWED;
+
+ wakeup(&hfsmp->hfs_freeze_state);
+}