xnu-1699.32.7.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_vfsutils.c
diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c

index ce577ec74165a91d876ad56cfc7e3aef5e61b3b1..1032324317b75165d25011e572e18ecc282272ef 100644 (file)
--- a/bsd/hfs/hfs_vfsutils.c
+++ b/bsd/hfs/hfs_vfsutils.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -38,6 +38,7 @@
  #include <sys/malloc.h>
  #include <sys/stat.h>
  #include <sys/mount.h>
+#include <sys/mount_internal.h>
  #include <sys/buf.h>
  #include <sys/buf_internal.h>
  #include <sys/ubc.h>
@@ -45,7 +46,9 @@
  #include <sys/utfconv.h>
  #include <sys/kauth.h>
  #include <sys/fcntl.h>
+#include <sys/fsctl.h>
  #include <sys/vnode_internal.h>
+#include <kern/clock.h>
  
  #include <libkern/OSAtomic.h>
  
@@ -64,9 +67,10 @@
  static void ReleaseMetaFileVNode(struct vnode *vp);
  static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  
-static void hfs_metadatazone_init(struct hfsmount *);
  static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  
+#define HFS_MOUNT_DEBUG 1
+
  
  //*******************************************************************************
  // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
@@ -86,7 +90,6 @@ unsigned char hfs_attrname[] = "Attribute B-tree";
  unsigned char hfs_startupname[] = "Startup File";
  
  
-__private_extern__
  OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
                 __unused struct proc *p)
  {
@@ -96,6 +99,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         struct cat_desc cndesc;
         struct cat_attr cnattr;
         struct cat_fork fork;
+       int newvnode_flags = 0;
  
         /* Block size must be a multiple of 512 */
         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
@@ -114,7 +118,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
          *
          */
         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
-       vcb->vcbCrDate          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
+       vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
@@ -143,8 +147,13 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
          * When an HFS name cannot be encoded with the current
          * volume encoding we use MacRoman as a fallback.
          */
-       if (error || (utf8chars == 0))
-               (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
+       if (error || (utf8chars == 0)) {
+               error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
+               /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
+               if (error) {
+                       goto MtVolErr;
+               }
+       }
  
         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
         vcb->vcbVBMIOSize = kHFSBlockSize;
@@ -179,11 +188,19 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         cnattr.ca_blocks = fork.cf_blocks;
  
         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
-                               &hfsmp->hfs_extents_vp);
-       if (error) goto MtVolErr;
+                               &hfsmp->hfs_extents_vp, &newvnode_flags);
+       if (error) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
+               }
+               goto MtVolErr;
+       }
         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
                                          (KeyCompareProcPtr)CompareExtentKeys));
         if (error) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
+               }
                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
         }
@@ -208,14 +225,20 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         cnattr.ca_blocks = fork.cf_blocks;
  
         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
-                               &hfsmp->hfs_catalog_vp);
+                               &hfsmp->hfs_catalog_vp, &newvnode_flags);
         if (error) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
+               }
                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
         }
         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
                                          (KeyCompareProcPtr)CompareCatalogKeys));
         if (error) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
+               }
                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
@@ -232,8 +255,11 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         cnattr.ca_blocks = 0;
  
         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
-                                &hfsmp->hfs_allocation_vp);
+                                &hfsmp->hfs_allocation_vp, &newvnode_flags);
         if (error) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
+               }
                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
@@ -243,34 +269,36 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         /* mark the volume dirty (clear clean unmount bit) */
         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
  
-    if (error == noErr)
-      {
+    if (error == noErr) {
                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
-      }
-
-    if ( error == noErr )
-      {
-        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )            //      if the disk is not write protected
-          {
-            MarkVCBDirty( vcb );                                                               //      mark VCB dirty so it will be written
-          }
-      }
-
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
+               }
+       }
+       
+    if (error == noErr) {
+               /* If the disk isn't write protected.. */
+        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
+            MarkVCBDirty (vcb); //     mark VCB dirty so it will be written
+               }
+       }
+       
         /*
          * all done with system files so we can unlock now...
          */
         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
-
-    goto       CmdDone;
+       
+       if (error == noErr) {
+               /* If successful, then we can just return once we've unlocked the cnodes */
+               return error;
+       }
  
      //--       Release any resources allocated so far before exiting with an error:
  MtVolErr:
-       ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
-       ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+       hfsUnmount(hfsmp, NULL);
  
-CmdDone:
      return (error);
  }
  
@@ -280,7 +308,6 @@ CmdDone:
  //
  //*******************************************************************************
  
-__private_extern__
  OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
  {
@@ -293,8 +320,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         struct BTreeInfoRec btinfo;
         u_int16_t  signature;
         u_int16_t  hfs_version;
+       int newvnode_flags = 0;
         int  i;
         OSErr retval;
+       char converted_volname[256];
+       size_t volname_length = 0;
+       size_t conv_volname_length = 0;
  
         signature = SWAP_BE16(vhp->signature);
         hfs_version = SWAP_BE16(vhp->version);
@@ -316,23 +347,38 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 /* Removed printf for invalid HFS+ signature because it gives
                  * false error for UFS root volume 
                  */
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: unknown Volume Signature\n");
+               }
                 return (EINVAL);
         }
  
         /* Block size must be at least 512 and a power of 2 */
         blockSize = SWAP_BE32(vhp->blockSize);
-       if (blockSize < 512 || !powerof2(blockSize))
+       if (blockSize < 512 || !powerof2(blockSize)) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
+               }
                 return (EINVAL);
+       }
     
         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
-           (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0)
+           (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
+               }
                 return (EINVAL);
+       }
  
         /* Make sure we can live with the physical block size. */
         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
             (blockSize < hfsmp->hfs_logical_block_size)) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n", 
+                                       blockSize, hfsmp->hfs_logical_block_size);
+               }
                 return (ENXIO);
         }
  
@@ -437,9 +483,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
         }
         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-                                &hfsmp->hfs_extents_vp);
+                                &hfsmp->hfs_extents_vp, &newvnode_flags);
         if (retval)
         {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
+               }
                 goto ErrorExit;
         }
         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
@@ -449,6 +498,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
         if (retval)
         {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
+               }
                 goto ErrorExit;
         }
         /*
@@ -470,8 +522,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
         }
         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-                                &hfsmp->hfs_catalog_vp);
+                                &hfsmp->hfs_catalog_vp, &newvnode_flags);
         if (retval) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
+               }
                 goto ErrorExit;
         }
         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
@@ -480,6 +535,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
         if (retval) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
+               }
                 goto ErrorExit;
         }
         if ((hfsmp->hfs_flags & HFS_X) &&
@@ -511,8 +569,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
         }
         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-                                &hfsmp->hfs_allocation_vp);
+                                &hfsmp->hfs_allocation_vp, &newvnode_flags);
         if (retval) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
+               }
                 goto ErrorExit;
         }
         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
@@ -538,8 +599,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
                 }
                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-                                        &hfsmp->hfs_attribute_vp);
+                                        &hfsmp->hfs_attribute_vp, &newvnode_flags);
                 if (retval) {
+                       if (HFS_MOUNT_DEBUG) {
+                               printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
+                       }
                         goto ErrorExit;
                 }
                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
@@ -547,6 +611,22 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
                 if (retval) {
+                       if (HFS_MOUNT_DEBUG) {
+                               printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
+                       }
+                       goto ErrorExit;
+               }
+
+               /* Initialize vnode for virtual attribute data file that spans the 
+                * entire file system space for performing I/O to attribute btree
+                * We hold iocount on the attrdata vnode for the entire duration 
+                * of mount (similar to btree vnodes)
+                */
+               retval = init_attrdata_vnode(hfsmp);
+               if (retval) {
+                       if (HFS_MOUNT_DEBUG) {
+                               printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
+                       }
                         goto ErrorExit;
                 }
         }
@@ -571,8 +651,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
                 }
                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-                                        &hfsmp->hfs_startup_vp);
+                                        &hfsmp->hfs_startup_vp, &newvnode_flags);
                 if (retval) {
+                       if (HFS_MOUNT_DEBUG) {
+                               printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
+                       }
                         goto ErrorExit;
                 }
                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
@@ -582,13 +665,29 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         /* Pick up volume name and create date */
         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL);
         if (retval) {
+               if (HFS_MOUNT_DEBUG) {
+                       printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
+               }
                 goto ErrorExit;
         }
-       vcb->vcbCrDate = cnattr.ca_itime;
+       vcb->hfs_itime = cnattr.ca_itime;
         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
+       volname_length = strlen ((const char*)vcb->vcbVN);
         cat_releasedesc(&cndesc);
+       
+#define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
  
+
+       /* Send the volume name down to CoreStorage if necessary */     
+       retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
+       if (retval == 0) {
+               (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+       }       
+       
+       /* reset retval == 0. we don't care about errors in volname conversion */
+       retval = 0;
+       
         /* mark the volume dirty (clear clean unmount bit) */
         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
@@ -612,6 +711,17 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
  
                 retval = hfs_late_journal_init(hfsmp, vhp, args);
                 if (retval != 0) {
+                       if (retval == EROFS) {
+                               // EROFS is a special error code that means the volume has an external
+                               // journal which we couldn't find.  in that case we do not want to
+                               // rewrite the volume header - we'll just refuse to mount the volume.
+                               if (HFS_MOUNT_DEBUG) {
+                                       printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
+                               }
+                               retval = EINVAL;
+                               goto ErrorExit;
+                       }
+
                         hfsmp->jnl = NULL;
                         
                         // if the journal failed to open, then set the lastMountedVersion
@@ -626,6 +736,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                     
                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
  
+                               bp = NULL;
                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
                                                 hfsmp->hfs_physical_block_size, cred, &bp);
@@ -646,7 +757,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                         bp = NULL;
                             }
                         }
-
+                       
+                       if (HFS_MOUNT_DEBUG) {
+                               printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
+                       }
                         retval = EINVAL;
                         goto ErrorExit;
                 } else if (hfsmp->jnl) {
@@ -680,7 +794,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         /*
          * Establish a metadata allocation zone.
          */
-       hfs_metadatazone_init(hfsmp);
+       hfs_metadatazone_init(hfsmp, false);
  
         /*
          * Make any metadata zone adjustments.
@@ -691,7 +805,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {       
                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
                 }
+       } else {
+               if (vcb->nextAllocation <= 1) {
+                       vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
+               }
         }
+       vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
  
         /* Setup private/hidden directories for hardlinks. */
         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
@@ -700,6 +819,19 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) 
                 hfs_remove_orphans(hfsmp);
  
+       /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
+       {
+               retval = hfs_erase_unused_nodes(hfsmp);
+               if (retval) {
+                       if (HFS_MOUNT_DEBUG) {
+                               printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
+                       }
+
+                       goto ErrorExit;
+               }
+       }
+       
         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
         {
                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
@@ -709,29 +841,33 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
          * Allow hot file clustering if conditions allow.
          */
         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
-           ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+           ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
                 (void) hfs_recording_init(hfsmp);
         }
  
         /* Force ACLs on HFS+ file systems. */
         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
  
-       /* Check if volume supports writing of extent-based extended attributes */
-       hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE);
+       /* Enable extent-based extended attributes by default */
+       hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
+
+       /* See if this volume should have per-file content protection enabled */
+       if (vcb->vcbAtrb & kHFSContentProtectionMask) {
+               vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
+       }
  
         return (0);
  
  ErrorExit:
         /*
-        * A fatal error occurred and the volume cannot be mounted
-        * release any resources that we aquired...
+        * A fatal error occurred and the volume cannot be mounted, so 
+        * release any resources that we acquired...
          */
-       if (hfsmp->hfs_attribute_vp)
-               ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
-       ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
-       ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
-       ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
-
+       hfsUnmount(hfsmp, NULL);
+               
+       if (HFS_MOUNT_DEBUG) {
+               printf("hfs_mounthfsplus: encountered errorr (%d)\n", retval);
+       }
         return (retval);
  }
  
@@ -766,44 +902,47 @@ static void ReleaseMetaFileVNode(struct vnode *vp)
  *
  *************************************************************/
  
-__private_extern__
  int
  hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
  {
-       /* Get rid of our attribute data vnode (if any). */
+       /* Get rid of our attribute data vnode (if any).  This is done 
+        * after the vflush() during mount, so we don't need to worry 
+        * about any locks.
+        */
         if (hfsmp->hfs_attrdata_vp) {
-               vnode_t advp = hfsmp->hfs_attrdata_vp;
-       
-               if (vnode_get(advp) == 0) {
-                       vnode_rele_ext(advp, O_EVTONLY, 0);
-                       vnode_put(advp);
-               }
+               ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
                 hfsmp->hfs_attrdata_vp = NULLVP;
         }
  
-       if (hfsmp->hfs_startup_vp)
+       if (hfsmp->hfs_startup_vp) {
                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
-
-       if (hfsmp->hfs_allocation_vp)
-               ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
-
-       if (hfsmp->hfs_attribute_vp)
+               hfsmp->hfs_startup_cp = NULL;
+               hfsmp->hfs_startup_vp = NULL;
+       }
+       
+       if (hfsmp->hfs_attribute_vp) {
                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
+               hfsmp->hfs_attribute_cp = NULL;
+               hfsmp->hfs_attribute_vp = NULL;
+       }
  
-       ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
-       ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+       if (hfsmp->hfs_catalog_vp) {
+               ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
+               hfsmp->hfs_catalog_cp = NULL;
+               hfsmp->hfs_catalog_vp = NULL;
+       }
  
-       /*
-        * Setting these pointers to NULL so that any references
-        * past this point will fail, and tell us the point of failure.
-        * Also, facilitates a check in hfs_update for a null catalog
-        * vp
-        */
-       hfsmp->hfs_allocation_vp = NULL;
-       hfsmp->hfs_attribute_vp = NULL;
-       hfsmp->hfs_catalog_vp = NULL;
-       hfsmp->hfs_extents_vp = NULL;
-       hfsmp->hfs_startup_vp = NULL;
+       if (hfsmp->hfs_extents_vp) {
+               ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+               hfsmp->hfs_extents_cp = NULL;
+               hfsmp->hfs_extents_vp = NULL;
+       }
+
+       if (hfsmp->hfs_allocation_vp) {
+               ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
+               hfsmp->hfs_allocation_cp = NULL;
+               hfsmp->hfs_allocation_vp = NULL;
+       }
  
         return (0);
  }
@@ -816,7 +955,7 @@ __private_extern__
  int
  overflow_extents(struct filefork *fp)
  {
-       u_long blocks;
+       u_int32_t blocks;
  
         //
         // If the vnode pointer is NULL then we're being called
@@ -849,11 +988,56 @@ overflow_extents(struct filefork *fp)
         return (fp->ff_blocks > blocks);
  }
  
+/*
+ * Lock the HFS global journal lock 
+ */
+int 
+hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype) {
+
+       void *thread = current_thread();
+
+       if (hfsmp->hfs_global_lockowner == thread) {
+               panic ("hfs_lock_global: locking against myself!");
+       }
+
+    /* HFS_SHARED_LOCK */
+       if (locktype == HFS_SHARED_LOCK) {
+               lck_rw_lock_shared (&hfsmp->hfs_global_lock);
+               hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
+       }
+    /* HFS_EXCLUSIVE_LOCK */
+       else {
+               lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
+               hfsmp->hfs_global_lockowner = thread;
+       }
+
+       return 0;
+}
+
+
+/*
+ * Unlock the HFS global journal lock
+ */
+void 
+hfs_unlock_global (struct hfsmount *hfsmp) {
+       
+       void *thread = current_thread();
+
+    /* HFS_LOCK_EXCLUSIVE */
+       if (hfsmp->hfs_global_lockowner == thread) {
+               hfsmp->hfs_global_lockowner = NULL;
+               lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
+       }
+    /* HFS_LOCK_SHARED */
+       else {
+               lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
+       }
+}
+
  
  /*
   * Lock HFS system file(s).
   */
-__private_extern__
  int
  hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
  {
@@ -874,7 +1058,12 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
                 }
  #endif /* HFS_CHECK_LOCK_ORDER */
  
-               (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
+               if (hfsmp->hfs_catalog_cp) {
+                       (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
+               } else {
+                       flags &= ~SFL_CATALOG;
+               }
+
                 /*
                  * When the catalog file has overflow extents then
                  * also acquire the extents b-tree lock if its not
@@ -918,7 +1107,12 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
                 }
  #endif /* HFS_CHECK_LOCK_ORDER */
  
-               (void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
+               if (hfsmp->hfs_startup_cp) {
+                       (void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
+               } else {
+                       flags &= ~SFL_STARTUP;
+               }
+
                 /*
                  * When the startup file has overflow extents then
                  * also acquire the extents b-tree lock if its not
@@ -935,17 +1129,14 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
          */
         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
                 /*
-                * Since the only bitmap operations are clearing and
-                * setting bits we always need exclusive access. And
-                * when we have a journal, we can "hide" behind that
-                * lock since we can only change the bitmap from
-                * within a transaction.
+                * If there's no bitmap cnode, ignore the bitmap lock.
                  */
-               if (hfsmp->jnl || (hfsmp->hfs_allocation_cp == NULL)) {
+               if (hfsmp->hfs_allocation_cp == NULL) {
                         flags &= ~SFL_BITMAP;
                 } else {
                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
-                       /* The bitmap lock is also grabbed when only extent lock 
+                       /* 
+                        * The bitmap lock is also grabbed when only extent lock 
                          * was requested. Set the bitmap lock bit in the lock
                          * flags which callers will use during unlock.
                          */
@@ -957,7 +1148,11 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
                  * Since the extents btree lock is recursive we always
                  * need exclusive access.
                  */
-               (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
+               if (hfsmp->hfs_extents_cp) {
+                       (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
+               } else {
+                       flags &= ~SFL_EXTENTS;
+               }
         }
         return (flags);
  }
@@ -965,7 +1160,6 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
  /*
   * unlock HFS system file(s).
   */
-__private_extern__
  void
  hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
  {
@@ -992,7 +1186,7 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
                 }
                 hfs_unlock(hfsmp->hfs_attribute_cp);
         }
-       if (flags & SFL_CATALOG) {
+       if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
                 if (hfsmp->jnl == NULL) {
                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
                         numOfLockedBuffs = count_lock_queue();
@@ -1004,10 +1198,10 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
                 }
                 hfs_unlock(hfsmp->hfs_catalog_cp);
         }
-       if (flags & SFL_BITMAP) {
+       if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
                 hfs_unlock(hfsmp->hfs_allocation_cp);
         }
-       if (flags & SFL_EXTENTS) {
+       if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
                 if (hfsmp->jnl == NULL) {
                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
                         numOfLockedBuffs = count_lock_queue();
@@ -1044,20 +1238,20 @@ void RequireFileLock(FileReference vp, int shareable)
         if (!locked && !shareable) {
                 switch (VTOC(vp)->c_fileid) {
                 case kHFSExtentsFileID:
-                       panic("extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
                         break;
                 case kHFSCatalogFileID:
-                       panic("catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
                         break;
                 case kHFSAllocationFileID:
                         /* The allocation file can hide behind the jornal lock. */
                         if (VTOHFS(vp)->jnl == NULL)
-                               panic("allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
+                               panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
                         break;
                 case kHFSStartupFileID:
-                       panic("startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
                 case kHFSAttributesFileID:
-                       panic("attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
                         break;
                 }
         }
@@ -1094,15 +1288,15 @@ hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
  }
  
  
-unsigned long BestBlockSizeFit(unsigned long allocationBlockSize,
-                               unsigned long blockSizeLimit,
-                               unsigned long baseMultiple) {
+u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
+                               u_int32_t blockSizeLimit,
+                               u_int32_t baseMultiple) {
      /*
         Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
         specified limit but still an even multiple of the baseMultiple.
       */
      int baseBlockCount, blockCount;
-    unsigned long trialBlockSize;
+    u_int32_t trialBlockSize;
  
      if (allocationBlockSize % baseMultiple != 0) {
          /*
@@ -1137,8 +1331,7 @@ unsigned long BestBlockSizeFit(unsigned long allocationBlockSize,
  }
  
  
-__private_extern__
-u_long
+u_int32_t
  GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
                         struct cat_attr *fattr, struct cat_fork *forkinfo)
  {
@@ -1177,7 +1370,6 @@ GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
   * If the volume was not cleanly unmounted then some of these may
   * have persisted and need to be removed.
   */
-__private_extern__
  void
  hfs_remove_orphans(struct hfsmount * hfsmp)
  {
@@ -1197,7 +1389,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
         int started_tr = 0;
         int lockflags;
         int result;
-       int orphanedlinks = 0;
+       int orphaned_files = 0;
+       int orphaned_dirs = 0;
  
         bzero(&cookie, sizeof(cookie));
  
@@ -1254,8 +1447,9 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                  */
                 if (bcmp(tempname, filename, namelen) == 0) {
                         struct filefork dfork;
-                       struct filefork rfork;
+               struct filefork rfork;
                         struct cnode cnode;
+                       int mode = 0;
  
                         bzero(&dfork, sizeof(dfork));
                         bzero(&rfork, sizeof(rfork));
@@ -1312,8 +1506,10 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                                                 fsize = 0;
                                         }
  
-                                       if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) {
-                                               printf("error truncting data fork!\n");
+                                       if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, 
+                                                                         cnode.c_attr.ca_fileid, false) != 0) {
+                                               printf("hfs: error truncating data fork!\n");
+
                                                 break;
                                         }
  
@@ -1344,8 +1540,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                                 rfork.ff_cp = &cnode;
                                 cnode.c_datafork = NULL;
                                 cnode.c_rsrcfork = &rfork;
-                               if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) {
-                                       printf("error truncting rsrc fork!\n");
+                               if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
+                                       printf("hfs: error truncating rsrc fork!\n");
                                         break;
                                 }
                         }
@@ -1358,11 +1554,19 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
                                 break;
                         }
-                       ++orphanedlinks;
+                       
+                       mode = cnode.c_attr.ca_mode & S_IFMT;
+
+                       if (mode == S_IFDIR) {
+                               orphaned_dirs++;
+                       }
+                       else {
+                               orphaned_files++;
+                       }
  
                         /* Update parent and volume counts */   
                         hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
-                       if (cnode.c_attr.ca_mode & S_IFDIR) {
+                       if (mode == S_IFDIR) {
                                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
                         }
  
@@ -1378,7 +1582,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                            Now that Catalog is unlocked, update the volume info, making
                            sure to differentiate between files and directories
                         */
-                       if (cnode.c_attr.ca_mode & S_IFDIR) {
+                       if (mode == S_IFDIR) {
                                 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
                         }
                         else{
@@ -1392,8 +1596,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
  
                 } /* end if */
         } /* end for */
-       if (orphanedlinks > 0)
-               printf("HFS: Removed %d orphaned unlinked files or directories \n", orphanedlinks);
+       if (orphaned_files > 0 || orphaned_dirs > 0)
+               printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
  exit:
         if (catlock) {
                 hfs_systemfile_unlock(hfsmp, lockflags);
@@ -1451,7 +1655,6 @@ u_int32_t logBlockSize;
         return logBlockSize;    
  }
  
-__private_extern__
  u_int32_t
  hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
  {
@@ -1462,7 +1665,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
         /*
          * We don't bother taking the mount lock
          * to look at these values since the values
-        * themselves are each updated automically
+        * themselves are each updated atomically
          * on aligned addresses.
          */
         freeblks = hfsmp->freeBlocks;
@@ -1479,7 +1682,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
         else
                 freeblks = 0;
  
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
         /* 
          * When the underlying device is sparse, check the
          * available space on the backing store volume.
@@ -1518,6 +1721,9 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
                         else
                                 vfreeblks = 0;
  
+                       if (hfsmp->hfs_backingfs_maxblocks) {
+                               vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
+                       }
                         freeblks = MIN(vfreeblks, freeblks);
                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
                 }
@@ -1772,7 +1978,132 @@ out:
  }
  
  
-__private_extern__
+typedef struct jopen_cb_info {
+       off_t   jsize;
+       char   *desired_uuid;
+        struct  vnode *jvp;
+       size_t  blksize;
+       int     need_clean;
+       int     need_init;
+} jopen_cb_info;
+
+static int
+journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
+{
+       struct nameidata nd;
+       jopen_cb_info *ji = (jopen_cb_info *)arg;
+       char bsd_name[256];
+       int error;
+       
+       strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
+       strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
+
+       if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
+               return 1;   // keep iterating
+       }
+
+       // if we're here, either the desired uuid matched or there was no
+       // desired uuid so let's try to open the device for writing and
+       // see if it works.  if it does, we'll use it.
+       
+       NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
+       if ((error = namei(&nd))) {
+               printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
+               return 1;   // keep iterating
+       }
+
+       ji->jvp = nd.ni_vp;
+       nameidone(&nd);
+
+       if (ji->jvp == NULL) {
+               printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
+       } else {
+               error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
+               if (error == 0) {
+                       // if the journal is dirty and we didn't specify a desired
+                       // journal device uuid, then do not use the journal.  but
+                       // if the journal is just invalid (e.g. it hasn't been
+                       // initialized) then just set the need_init flag.
+                       if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
+                               error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
+                               if (error == EBUSY) {
+                                       VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
+                                       vnode_put(ji->jvp);
+                                       ji->jvp = NULL;
+                                       return 1;    // keep iterating
+                               } else if (error == EINVAL) {
+                                       ji->need_init = 1;
+                               }
+                       }
+
+                       if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
+                               strlcpy(ji->desired_uuid, uuid_str, 128);
+                       }
+                       vnode_setmountedon(ji->jvp);
+                       // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
+                       return 0;   // stop iterating
+               } else {
+                       vnode_put(ji->jvp);
+                       ji->jvp = NULL;
+               }
+       }
+
+       return 1;   // keep iterating
+}
+
+extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
+extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
+kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
+
+
+static vnode_t
+open_journal_dev(const char *vol_device,
+                int need_clean,
+                char *uuid_str,
+                char *machine_serial_num,
+                off_t jsize,
+                size_t blksize,
+                int *need_init)
+{
+    int retry_counter=0;
+    jopen_cb_info ji;
+
+    ji.jsize        = jsize;
+    ji.desired_uuid = uuid_str;
+    ji.jvp          = NULL;
+    ji.blksize      = blksize;
+    ji.need_clean   = need_clean;
+    ji.need_init    = 0;
+
+//    if (uuid_str[0] == '\0') {
+//         printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
+//    } else {
+//         printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
+//    }
+    while (ji.jvp == NULL && retry_counter++ < 4) {
+           if (retry_counter > 1) {
+                   if (uuid_str[0]) {
+                           printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
+                   } else {
+                           printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
+                   }
+                   delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
+           }
+
+           IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
+    }
+
+    if (ji.jvp == NULL) {
+           printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
+                  vol_device, uuid_str, machine_serial_num);
+    }
+
+    *need_init = ji.need_init;
+
+    return ji.jvp;
+}
+
+
  int
  hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
@@ -1781,15 +2112,20 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         JournalInfoBlock *jibp;
         struct buf       *jinfo_bp, *bp;
         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
-       int               retval;
+       int               retval, write_jibp = 0;
         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
         struct vnode     *devvp;
         struct hfs_mount_args *args = _args;
         u_int32_t         jib_flags;
         u_int64_t         jib_offset;
         u_int64_t         jib_size;
+       const char *dev_name;
         
         devvp = hfsmp->hfs_devvp;
+       dev_name = vnode_name(devvp);
+       if (dev_name == NULL) {
+               dev_name = "unknown-dev";
+       }
  
         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
                 arg_flags  = args->journal_flags;
@@ -1798,24 +2134,56 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
  
         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
                                 
+       jinfo_bp = NULL;
         retval = (int)buf_meta_bread(devvp,
                                                 (daddr64_t)((embeddedOffset/blksize) + 
-                                               (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
-                                               SWAP_BE32(vhp->blockSize), cred, &jinfo_bp);
-       if (retval)
+                                               ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+                                               hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
+       if (retval) {
+               if (jinfo_bp) {
+                       buf_brelse(jinfo_bp);
+               }
                 return retval;
-
+       }
+       
         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
         jib_flags  = SWAP_BE32(jibp->flags);
-       jib_offset = SWAP_BE64(jibp->offset);
         jib_size   = SWAP_BE64(jibp->size);
  
         if (jib_flags & kJIJournalInFSMask) {
                 hfsmp->jvp = hfsmp->hfs_devvp;
+               jib_offset = SWAP_BE64(jibp->offset);
         } else {
-               printf("hfs: journal not stored in fs! don't know what to do.\n");
+           int need_init=0;
+       
+           // if the volume was unmounted cleanly then we'll pick any
+           // available external journal partition
+           //
+           if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
+                   *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+           }
+
+           hfsmp->jvp = open_journal_dev(dev_name,
+                                         !(jib_flags & kJIJournalNeedInitMask),
+                                         (char *)&jibp->ext_jnl_uuid[0],
+                                         (char *)&jibp->machine_serial_num[0],
+                                         jib_size,
+                                         hfsmp->hfs_logical_block_size,
+                                         &need_init);
+           if (hfsmp->jvp == NULL) {
                 buf_brelse(jinfo_bp);
-               return EINVAL;
+               return EROFS;
+           } else {
+                   if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+                           strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
+                   }
+           }
+
+           jib_offset = 0;
+           write_jibp = 1;
+           if (need_init) {
+                   jib_flags |= kJIJournalNeedInitMask;
+           }
         }
  
         // save this off for the hack-y check in hfs_remove()
@@ -1827,18 +2195,21 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
             // if it is, then we can allow the mount.  otherwise we have to
             // return failure.
             retval = journal_is_clean(hfsmp->jvp,
-                                     jib_offset + embeddedOffset,
+                                     jib_offset + embeddedOffset,
                                       jib_size,
                                       devvp,
-                                     hfsmp->hfs_logical_block_size);
+                                     hfsmp->hfs_logical_block_size);
  
             hfsmp->jnl = NULL;
  
             buf_brelse(jinfo_bp);
  
             if (retval) {
+               const char *name = vnode_getname(devvp);
               printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
-                     vnode_name(devvp));
+                     name ? name : "");
+               if (name)
+                       vnode_putname(name);
             }
  
             return retval;
@@ -1855,6 +2226,8 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                                                         arg_flags,
                                                                         arg_tbufsz,
                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
+               if (hfsmp->jnl)
+                       journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
  
                 // no need to start a transaction here... if this were to fail
                 // we'd just re-init it on the next mount.
@@ -1876,8 +2249,14 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                                                   arg_flags,
                                                                   arg_tbufsz,
                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
+               if (hfsmp->jnl)
+                       journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
  
-               buf_brelse(jinfo_bp);
+               if (write_jibp) {
+                       buf_bwrite(jinfo_bp);
+               } else {
+                       buf_brelse(jinfo_bp);
+               }
                 jinfo_bp = NULL;
                 jibp     = NULL;
  
@@ -1887,11 +2266,14 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                         if (mdb_offset == 0) {
                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
                         }
+                       bp = NULL;
                         retval = (int)buf_meta_bread(devvp, 
                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
                                         hfsmp->hfs_physical_block_size, cred, &bp);
                         if (retval) {
-                               buf_brelse(bp);
+                               if (bp) {
+                                       buf_brelse(bp);
+                               }
                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
                                            retval);
                                 return retval;
@@ -1941,7 +2323,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
         struct cat_attr   jib_attr, jattr;
         struct cat_fork   jib_fork, jfork;
         ExtendedVCB      *vcb;
-       u_long            fid;
+       u_int32_t            fid;
         struct hfs_mount_args *args = _args;
         u_int32_t         jib_flags;
         u_int64_t         jib_offset;
@@ -1980,11 +2362,15 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
  
  
         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
+       jinfo_bp = NULL;
         retval = (int)buf_meta_bread(devvp,
-                                               (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + 
-                                               (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
-                                               SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp);
+                                               (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + 
+                                               ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+                                               hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
         if (retval) {
+               if (jinfo_bp) {
+                       buf_brelse(jinfo_bp);
+               }
                 printf("hfs: can't read journal info block. disabling journaling.\n");
                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
                 return 0;
@@ -2006,7 +2392,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
         hfsmp->hfs_jnlfileid = fid;
  
         // make sure the journal file begins where we think it should.
-       if ((jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
+       if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
  
@@ -2027,10 +2413,41 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
         
         if (jib_flags & kJIJournalInFSMask) {
                 hfsmp->jvp = hfsmp->hfs_devvp;
+               jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
         } else {
-               printf("hfs: journal not stored in fs! don't know what to do.\n");
+           const char *dev_name;
+           int need_init = 0;
+
+           dev_name = vnode_name(devvp);
+           if (dev_name == NULL) {
+                   dev_name = "unknown-dev";
+           }
+
+            // since the journal is empty, just use any available external journal
+           *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+
+           // this fills in the uuid of the device we actually get
+           hfsmp->jvp = open_journal_dev(dev_name,
+                                         !(jib_flags & kJIJournalNeedInitMask),
+                                         (char *)&jibp->ext_jnl_uuid[0],
+                                         (char *)&jibp->machine_serial_num[0],
+                                         jib_size,
+                                         hfsmp->hfs_logical_block_size,
+                                         &need_init);
+           if (hfsmp->jvp == NULL) {
                 buf_brelse(jinfo_bp);
-               return EINVAL;
+               return EROFS;
+           } else {
+                   if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+                           strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
+                   }
+           } 
+           jib_offset = 0;
+           recreate_journal = 1;
+           write_jibp = 1;
+           if (need_init) {
+                   jib_flags |= kJIJournalNeedInitMask;
+           }
         }
  
         // save this off for the hack-y check in hfs_remove()
@@ -2042,7 +2459,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
             // if it is, then we can allow the mount.  otherwise we have to
             // return failure.
             retval = journal_is_clean(hfsmp->jvp,
-                                     jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
+                                     jib_offset,
                                       jib_size,
                                       devvp,
                                       hfsmp->hfs_logical_block_size);
@@ -2052,8 +2469,11 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
             buf_brelse(jinfo_bp);
  
             if (retval) {
+               const char *name = vnode_getname(devvp);
               printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
-                    vnode_name(devvp));
+                    name ? name : "");
+               if (name)
+                       vnode_putname(name);
             }
  
             return retval;
@@ -2061,15 +2481,17 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
  
         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
-                          jib_offset + (off_t)vcb->hfsPlusIOPosOffset, jib_size);
+                          jib_offset, jib_size);
                 hfsmp->jnl = journal_create(hfsmp->jvp,
-                                                                       jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
+                                                                       jib_offset,
                                                                         jib_size,
                                                                         devvp,
                                                                         hfsmp->hfs_logical_block_size,
                                                                         arg_flags,
                                                                         arg_tbufsz,
                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
+               if (hfsmp->jnl)
+                       journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
  
                 // no need to start a transaction here... if this were to fail
                 // we'd just re-init it on the next mount.
@@ -2088,17 +2510,19 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
                 arg_flags |= JOURNAL_RESET;
                 
                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
-               //         jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
+               //         jib_offset,
                 //         jib_size, SWAP_BE32(vhp->blockSize));
                                 
                 hfsmp->jnl = journal_open(hfsmp->jvp,
-                                                                 jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
+                                                                 jib_offset,
                                                                   jib_size,
                                                                   devvp,
                                                                   hfsmp->hfs_logical_block_size,
                                                                   arg_flags,
                                                                   arg_tbufsz,
                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
+               if (hfsmp->jnl)
+                       journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
         }
                         
  
@@ -2114,7 +2538,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
         jinfo_bp = NULL;
         jibp     = NULL;
  
-       //printf("journal @ 0x%x\n", hfsmp->jnl);
+       //printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
         
         // if we expected the journal to be there and we couldn't
         // create it or open it then we have to bail out.
@@ -2155,8 +2579,15 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
  #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
  #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
  
-static void
-hfs_metadatazone_init(struct hfsmount *hfsmp)
+/* Initialize the metadata zone.
+ *
+ * If the size of  the volume is less than the minimum size for
+ * metadata zone, metadata zone is disabled.
+ *
+ * If disable is true, disable metadata zone unconditionally.
+ */
+void
+hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
  {
         ExtendedVCB  *vcb;
         u_int64_t  fs_size;
@@ -2164,58 +2595,78 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
         u_int64_t  temp;
         u_int64_t  filesize;
         u_int32_t  blk;
-       int  items;
+       int  items, really_do_it=1;
  
         vcb = HFSTOVCB(hfsmp);
-       fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->totalBlocks;
+       fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
  
         /*
          * For volumes less than 10 GB, don't bother.
          */
-       if (fs_size < ((u_int64_t)10 * GIGABYTE))
-               return;
+       if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
+               really_do_it = 0;
+       }
+       
         /*
          * Skip non-journaled volumes as well.
          */
-       if (hfsmp->jnl == NULL)
-               return;
+       if (hfsmp->jnl == NULL) {
+               really_do_it = 0;
+       }
+
+       /* If caller wants to disable metadata zone, do it */
+       if (disable == true) {
+               really_do_it = 0;
+       }
  
         /*
-        * Start with allocation bitmap (a fixed size).
+        * Start with space for the boot blocks and Volume Header.
+        * 1536 = byte offset from start of volume to end of volume header:
+        * 1024 bytes is the offset from the start of the volume to the
+        * start of the volume header (defined by the volume format)
+        * + 512 bytes (the size of the volume header).
          */
-       zonesize = roundup(vcb->totalBlocks / 8, vcb->vcbVBMIOSize);
-
+       zonesize = roundup(1536, hfsmp->blockSize);
+       
         /*
-        * Overflow Extents file gets 4 MB per 100 GB.
+        * Add the on-disk size of allocation bitmap.
          */
-       items = fs_size / ((u_int64_t)100 * GIGABYTE);
-       filesize = (u_int64_t)(items + 1) * OVERFLOW_DEFAULT_SIZE;
-       if (filesize > OVERFLOW_MAXIMUM_SIZE)
-               filesize = OVERFLOW_MAXIMUM_SIZE;
-       zonesize += filesize;
-       hfsmp->hfs_overflow_maxblks = filesize / vcb->blockSize;
-
+       zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+       
+       /* 
+        * Add space for the Journal Info Block and Journal (if they're in
+        * this file system).
+        */
+       if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
+               zonesize += hfsmp->blockSize + hfsmp->jnl_size;
+       }
+       
         /*
-        * Plan for at least 8 MB of journal for each
-        * 100 GB of disk space (up to a 512 MB).
+        * Add the existing size of the Extents Overflow B-tree.
+        * (It rarely grows, so don't bother reserving additional room for it.)
          */
-       items = fs_size / ((u_int64_t)100 * GIGABYTE);
-       filesize = (u_int64_t)(items + 1) * JOURNAL_DEFAULT_SIZE;
-       if (filesize > JOURNAL_MAXIMUM_SIZE)
-               filesize = JOURNAL_MAXIMUM_SIZE;
-       zonesize += filesize;
-
+       zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+       
         /*
-        * Catalog file gets 10 MB per 1 GB.
-        *
-        * How about considering the current catalog size (used nodes * node size)
-        * and the current file data size to help estimate the required
-        * catalog size.
+        * If there is an Attributes B-tree, leave room for 11 clumps worth.
+        * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
+        * When installing a full OS install onto a 20GB volume, we use
+        * 7 to 8 clumps worth of space (depending on packages), so that leaves
+        * us with another 3 or 4 clumps worth before we need another extent.
          */
-       filesize = MIN((fs_size / 1024) * 10, GIGABYTE);
-       hfsmp->hfs_catalog_maxblks = filesize / vcb->blockSize;
-       zonesize += filesize;
-
+       if (hfsmp->hfs_attribute_cp) {
+               zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
+       }
+       
+       /*
+        * Leave room for 11 clumps of the Catalog B-tree.
+        * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
+        * When installing a full OS install onto a 20GB volume, we use
+        * 7 to 8 clumps worth of space (depending on packages), so that leaves
+        * us with another 3 or 4 clumps worth before we need another extent.
+        */
+       zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
+       
         /*
          * Add space for hot file region.
          *
@@ -2229,38 +2680,40 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
         /*
          * Calculate user quota file requirements.
          */
-       items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
-       if (items < QF_MIN_USERS)
-               items = QF_MIN_USERS;
-       else if (items > QF_MAX_USERS)
-               items = QF_MAX_USERS;
-       if (!powerof2(items)) {
-               int x = items;
-               items = 4;
-               while (x>>1 != 1) {
-                       x = x >> 1;
-                       items = items << 1;
-               }
-       }
-       filesize += (items + 1) * sizeof(struct dqblk);
-       /*
-        * Calculate group quota file requirements.
-        *
-        */
-       items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
-       if (items < QF_MIN_GROUPS)
-               items = QF_MIN_GROUPS;
-       else if (items > QF_MAX_GROUPS)
-               items = QF_MAX_GROUPS;
-       if (!powerof2(items)) {
-               int x = items;
-               items = 4;
-               while (x>>1 != 1) {
-                       x = x >> 1;
-                       items = items << 1;
-               }
-       }
-       filesize += (items + 1) * sizeof(struct dqblk);
+       if (hfsmp->hfs_flags & HFS_QUOTAS) {
+               items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
+               if (items < QF_MIN_USERS)
+                       items = QF_MIN_USERS;
+               else if (items > QF_MAX_USERS)
+                       items = QF_MAX_USERS;
+               if (!powerof2(items)) {
+                       int x = items;
+                       items = 4;
+                       while (x>>1 != 1) {
+                               x = x >> 1;
+                               items = items << 1;
+                       }
+               }
+               filesize += (items + 1) * sizeof(struct dqblk);
+               /*
+                * Calculate group quota file requirements.
+                *
+                */
+               items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
+               if (items < QF_MIN_GROUPS)
+                       items = QF_MIN_GROUPS;
+               else if (items > QF_MAX_GROUPS)
+                       items = QF_MAX_GROUPS;
+               if (!powerof2(items)) {
+                       int x = items;
+                       items = 4;
+                       while (x>>1 != 1) {
+                               x = x >> 1;
+                               items = items << 1;
+                       }
+               }
+               filesize += (items + 1) * sizeof(struct dqblk);
+       }
         zonesize += filesize;
  
         /*
@@ -2269,6 +2722,40 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
          */
         temp = zonesize;
         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+       hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
+       /*
+        * If doing the round up for hfs_min_alloc_start would push us past
+        * allocLimit, then just reset it back to 0.  Though using a value 
+        * bigger than allocLimit would not cause damage in the block allocator
+        * code, this value could get stored in the volume header and make it out 
+        * to disk, making the volume header technically corrupt.
+        */
+       if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
+               hfsmp->hfs_min_alloc_start = 0;
+       }
+
+       if (really_do_it == 0) {
+               /* If metadata zone needs to be disabled because the 
+                * volume was truncated, clear the bit and zero out 
+                * the values that are no longer needed.
+                */
+               if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+                       /* Disable metadata zone */
+                       hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
+                       
+                       /* Zero out mount point values that are not required */
+                       hfsmp->hfs_catalog_maxblks = 0;
+                       hfsmp->hfs_hotfile_maxblks = 0;
+                       hfsmp->hfs_hotfile_start = 0;
+                       hfsmp->hfs_hotfile_end = 0;
+                       hfsmp->hfs_hotfile_freeblks = 0;
+                       hfsmp->hfs_metazone_start = 0;
+                       hfsmp->hfs_metazone_end = 0;
+               }
+               
+               return;
+       }
+       
         temp = zonesize - temp;  /* temp has extra space */
         filesize += temp / 3;
         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
@@ -2287,9 +2774,9 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
         hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
         hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
  #if 0
-       printf("HFS: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
-       printf("HFS: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
-       printf("HFS: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
+       printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+       printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+       printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
  #endif
         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
  }
@@ -2322,7 +2809,6 @@ hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
   * Determine if a file is a "virtual" metadata file.
   * This includes journal and quota files.
   */
-__private_extern__
  int
  hfs_virtualmetafile(struct cnode *cp)
  {
@@ -2347,7 +2833,53 @@ hfs_virtualmetafile(struct cnode *cp)
  }
  
  
-__private_extern__
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+       if (hfsmp->hfs_syncer)  {
+               clock_sec_t secs;
+               clock_usec_t usecs;
+               uint64_t now;
+
+               clock_get_calendar_microtime(&secs, &usecs);
+               now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
+
+               if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
+                       // if we have a sync scheduled but i/o is starting to pile up,
+                       // don't call thread_call_enter_delayed() again because that
+                       // will defer the sync.
+                       return;
+               }
+
+               if (hfsmp->hfs_sync_scheduled == 0) {
+                       uint64_t deadline;
+
+                       hfsmp->hfs_last_sync_request_time = now;
+
+                       clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
+
+                       /*
+                        * Increment hfs_sync_scheduled on the assumption that we're the
+                        * first thread to schedule the timer.  If some other thread beat
+                        * us, then we'll decrement it.  If we *were* the first to
+                        * schedule the timer, then we need to keep track that the
+                        * callback is waiting to complete.
+                        */
+                       OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+                       if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
+                               OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+                       else
+                               OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+               }               
+       }
+}
+
+
  int
  hfs_start_transaction(struct hfsmount *hfsmp)
  {
@@ -2372,10 +2904,11 @@ hfs_start_transaction(struct hfsmount *hfsmp)
         }
  #endif /* HFS_CHECK_LOCK_ORDER */
  
-    if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
-       lck_rw_lock_shared(&hfsmp->hfs_global_lock);
-       unlock_on_err = 1;
-    }
+       if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
+               hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+               OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+               unlock_on_err = 1;
+       }
  
         /* If a downgrade to read-only mount is in progress, no other
          * process than the downgrade process is allowed to modify 
@@ -2387,45 +2920,251 @@ hfs_start_transaction(struct hfsmount *hfsmp)
                 goto out;
         }
  
-    if (hfsmp->jnl) {
-       ret = journal_start_transaction(hfsmp->jnl);
-       if (ret == 0) {
-           OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_global_lock_nesting);
+       if (hfsmp->jnl) {
+               ret = journal_start_transaction(hfsmp->jnl);
+               if (ret == 0) {
+                       OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
+               }
+       } else {
+               ret = 0;
         }
-    } else {
-       ret = 0;
-    }
  
  out:
-    if (ret != 0 && unlock_on_err) {
-       lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
-    }
+       if (ret != 0 && unlock_on_err) {
+               hfs_unlock_global (hfsmp);
+               OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+       }
  
      return ret;
  }
  
-__private_extern__
  int
  hfs_end_transaction(struct hfsmount *hfsmp)
  {
      int need_unlock=0, ret;
  
-    if (    hfsmp->jnl == NULL
-       || (   journal_owner(hfsmp->jnl) == current_thread()
-           && (OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_global_lock_nesting) == 1)) ) {
-
+    if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
+           && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
             need_unlock = 1;
      } 
  
-    if (hfsmp->jnl) {
-       ret = journal_end_transaction(hfsmp->jnl);
-    } else {
-       ret = 0;
-    }
+       if (hfsmp->jnl) {
+               ret = journal_end_transaction(hfsmp->jnl);
+       } else {
+               ret = 0;
+       }
  
-    if (need_unlock) {
-       lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
-    }
+       if (need_unlock) {
+               OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+               hfs_unlock_global (hfsmp);
+               hfs_sync_ejectable(hfsmp);
+       }
  
      return ret;
  }
+
+
+/* 
+ * Flush the contents of the journal to the disk. 
+ *
+ *  Input: 
+ *     wait_for_IO - 
+ *     If TRUE, wait to write in-memory journal to the disk 
+ *     consistently, and also wait to write all asynchronous 
+ *     metadata blocks to its corresponding locations
+ *     consistently on the disk.  This means that the journal 
+ *     is empty at this point and does not contain any 
+ *     transactions.  This is overkill in normal scenarios  
+ *     but is useful whenever the metadata blocks are required 
+ *     to be consistent on-disk instead of just the journal 
+ *     being consistent; like before live verification 
+ *     and live volume resizing.  
+ *
+ *     If FALSE, only wait to write in-memory journal to the 
+ *     disk consistently.  This means that the journal still 
+ *     contains uncommitted transactions and the file system 
+ *     metadata blocks in the journal transactions might be 
+ *     written asynchronously to the disk.  But there is no 
+ *     guarantee that they are written to the disk before 
+ *     returning to the caller.  Note that this option is 
+ *     sufficient for file system data integrity as it 
+ *     guarantees consistent journal content on the disk.
+ */
+int
+hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
+{
+       int ret;
+
+       /* Only peek at hfsmp->jnl while holding the global lock */
+       hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+       if (hfsmp->jnl) {
+               ret = journal_flush(hfsmp->jnl, wait_for_IO);
+       } else {
+               ret = 0;
+       }
+       hfs_unlock_global (hfsmp);
+       
+       return ret;
+}
+
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair?  We can't always be
+ * certain.  If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+       int result; 
+       struct filefork *catalog;
+       int lockflags;
+       
+       if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+       {
+               /* This volume has already been checked and repaired. */
+               return 0;
+       }
+
+       if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+       {
+               /* This volume is too old to have had the problem. */
+               hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+               return 0;
+       }
+
+       catalog = hfsmp->hfs_catalog_cp->c_datafork;
+       if (catalog->ff_size > catalog->ff_clumpsize)
+       {
+               /* The entire first clump must have been in use at some point. */
+               hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+               return 0;
+       }
+       
+       /*
+        * If we get here, we need to zero out those unused nodes.
+        *
+        * We start a transaction and lock the catalog since we're going to be
+        * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
+        * do its writing via the journal, because that would be too much I/O
+        * to fit in a transaction, and it's a pain to break it up into multiple
+        * transactions.  (It behaves more like growing a B-tree would.)
+        */
+       printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+       result = hfs_start_transaction(hfsmp);
+       if (result)
+               goto done;
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+       result = BTZeroUnusedNodes(catalog);
+       vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+       hfs_systemfile_unlock(hfsmp, lockflags);
+       hfs_end_transaction(hfsmp);
+       if (result == 0)
+               hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+       printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+       return result;
+}
+
+
+extern time_t snapshot_timestamp;
+
+int
+check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
+{
+       int tracked_error = 0, snapshot_error = 0;
+       
+       if (vp == NULL) {
+               return 0;
+       }
+       
+       if (VTOC(vp)->c_flags & UF_TRACKED) {
+               // the file has the tracked bit set, so send an event to the tracked-file handler
+               int error;
+               
+               // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
+               error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
+               if (error) {
+                       if (error == EAGAIN) {
+                               printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
+                               
+                       } else if (error == EINTR) {
+                               // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
+                               tracked_error = EINTR;
+                       }
+               }
+       }
+
+       if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+               // the change time is within this epoch
+               int error;
+               
+               error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+               if (error == EDEADLK) {
+                       snapshot_error = 0;
+               } else if (error) {
+                       if (error == EAGAIN) {
+                               printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
+                       } else if (error == EINTR) {
+                               // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
+                               snapshot_error = EINTR;
+                       }
+               }
+       }
+       
+       if (tracked_error) return tracked_error;
+       if (snapshot_error) return snapshot_error;
+       
+       return 0;
+}
+
+int
+check_for_dataless_file(struct vnode *vp, uint64_t op_type)
+{
+       int error;
+
+       if (vp == NULL || (VTOC(vp)->c_flags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+               // there's nothing to do, it's not dataless
+               return 0;
+       }
+                       
+       // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
+       error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
+       if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
+               error = 0;
+       } else if (error) {
+               if (error == EAGAIN) {
+                       printf("hfs: dataless: timed out waiting for namespace handler...\n");
+                       // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
+                       return 0;                               
+               } else if (error == EINTR) {
+                       // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
+                       return EINTR;
+               }
+       } else if (VTOC(vp)->c_flags & UF_COMPRESSED) {
+               //
+               // if we're here, the dataless bit is still set on the file 
+               // which means it didn't get handled.  we return an error
+               // but it's presently ignored by all callers of this function.
+               //
+               // XXXdbg - EDATANOTPRESENT is what we really need...
+               //
+               return EBADF;
+       }                               
+
+       return error;
+}