xnu-1456.1.26.tar.gz

[apple/xnu.git] / bsd / hfs / hfs_vfsutils.c
diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c

index 19b5c03c5d239383bb884edf9a65ebb280d5fb92..2485c73f6a283b7a74e4747954346c3d8b1d274a 100644 (file)
--- a/bsd/hfs/hfs_vfsutils.c
+++ b/bsd/hfs/hfs_vfsutils.c
@@ -1,23 +1,29 @@
  /*
- * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
   * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
   * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  /*     @(#)hfs_vfsutils.c      4.0
  *
@@ -32,11 +38,18 @@
  #include <sys/malloc.h>
  #include <sys/stat.h>
  #include <sys/mount.h>
-#include <sys/namei.h>
-#include <sys/lock.h>
+#include <sys/mount_internal.h>
  #include <sys/buf.h>
+#include <sys/buf_internal.h>
  #include <sys/ubc.h>
  #include <sys/unistd.h>
+#include <sys/utfconv.h>
+#include <sys/kauth.h>
+#include <sys/fcntl.h>
+#include <sys/vnode_internal.h>
+#include <kern/clock.h>
+
+#include <libkern/OSAtomic.h>
  
  #include "hfs.h"
  #include "hfs_catalog.h"
@@ -44,15 +57,12 @@
  #include "hfs_mount.h"
  #include "hfs_endian.h"
  #include "hfs_cnode.h"
+#include "hfs_fsctl.h"
  
  #include "hfscommon/headers/FileMgrInternal.h"
  #include "hfscommon/headers/BTreesInternal.h"
  #include "hfscommon/headers/HFSUnicodeWrappers.h"
  
-
-extern int count_lock_queue __P((void));
-
-
  static void ReleaseMetaFileVNode(struct vnode *vp);
  static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  
@@ -60,15 +70,6 @@ static void hfs_metadatazone_init(struct hfsmount *);
  static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  
  
-
-u_int32_t GetLogicalBlockSize(struct vnode *vp);
-
-/* BTree accessor routines */
-extern OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block);
-extern OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount);
-extern OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF);
-extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options);
-
  //*******************************************************************************
  // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  //       hence are not in the right byte order on little endian machines. It is
@@ -80,16 +81,16 @@ extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, Relea
  //
  //
  //*******************************************************************************
-char hfs_catname[] = "Catalog B-tree";
-char hfs_extname[] = "Extents B-tree";
-char hfs_vbmname[] = "Volume Bitmap";
+unsigned char hfs_catname[] = "Catalog B-tree";
+unsigned char hfs_extname[] = "Extents B-tree";
+unsigned char hfs_vbmname[] = "Volume Bitmap";
+unsigned char hfs_attrname[] = "Attribute B-tree";
+unsigned char hfs_startupname[] = "Startup File";
  
-char hfs_privdirname[] =
-       "\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80HFS+ Private Data";
  
  __private_extern__
  OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
-               struct proc *p)
+               __unused struct proc *p)
  {
         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
         int error;
@@ -123,6 +124,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
+       vcb->allocLimit         = vcb->totalBlocks;
         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
@@ -146,16 +148,17 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         if (error || (utf8chars == 0))
                 (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
  
-       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size);
+       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
         vcb->vcbVBMIOSize = kHFSBlockSize;
  
-       VCB_LOCK_INIT(vcb);
+       hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+                                                 hfsmp->hfs_logical_block_count);
  
         bzero(&cndesc, sizeof(cndesc));
-       cndesc.cd_parentcnid = kRootParID;
+       cndesc.cd_parentcnid = kHFSRootParentID;
         cndesc.cd_flags |= CD_ISMETA;
         bzero(&cnattr, sizeof(cnattr));
-       cnattr.ca_nlink = 1;
+       cnattr.ca_linkcount = 1;
         cnattr.ca_mode = S_IFREG;
         bzero(&fork, sizeof(fork));
  
@@ -163,7 +166,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
          * Set up Extents B-tree vnode
          */
         cndesc.cd_nameptr = hfs_extname;
-       cndesc.cd_namelen = strlen(hfs_extname);
+       cndesc.cd_namelen = strlen((char *)hfs_extname);
         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
         fork.cf_blocks = fork.cf_size / vcb->blockSize;
@@ -177,21 +180,22 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
         cnattr.ca_blocks = fork.cf_blocks;
  
-       error = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &fork,
-                               &vcb->extentsRefNum);
+       error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
+                               &hfsmp->hfs_extents_vp);
         if (error) goto MtVolErr;
-       error = MacToVFSError(BTOpenPath(VTOF(vcb->extentsRefNum),
+       error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
                                          (KeyCompareProcPtr)CompareExtentKeys));
         if (error) {
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
+               hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
         }
+       hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
  
         /*
          * Set up Catalog B-tree vnode...
          */ 
         cndesc.cd_nameptr = hfs_catname;
-       cndesc.cd_namelen = strlen(hfs_catname);
+       cndesc.cd_namelen = strlen((char *)hfs_catname);
         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
         fork.cf_blocks = fork.cf_size / vcb->blockSize;
@@ -205,28 +209,46 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
         cnattr.ca_blocks = fork.cf_blocks;
  
-       error = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &fork,
-                               &vcb->catalogRefNum);
+       error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
+                               &hfsmp->hfs_catalog_vp);
         if (error) {
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
+               hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
         }
-       error = MacToVFSError(BTOpenPath(VTOF(vcb->catalogRefNum),
+       error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
                                          (KeyCompareProcPtr)CompareCatalogKeys));
         if (error) {
-               VOP_UNLOCK(vcb->catalogRefNum, 0, p);
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
+               hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+               hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+               goto MtVolErr;
+       }
+       hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
+
+       /*
+        * Set up dummy Allocation file vnode (used only for locking bitmap)
+        */  
+       cndesc.cd_nameptr = hfs_vbmname;
+       cndesc.cd_namelen = strlen((char *)hfs_vbmname);
+       cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
+       bzero(&fork, sizeof(fork));
+       cnattr.ca_blocks = 0;
+
+       error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
+                                &hfsmp->hfs_allocation_vp);
+       if (error) {
+               hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+               hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
                 goto MtVolErr;
         }
+       hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
  
         /* mark the volume dirty (clear clean unmount bit) */
         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
  
-       /*
-        * all done with b-trees so we can unlock now...
-        */
-       VOP_UNLOCK(vcb->catalogRefNum, 0, p);
-       VOP_UNLOCK(vcb->extentsRefNum, 0, p);
+    if (error == noErr)
+      {
+               error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
+      }
  
      if ( error == noErr )
        {
@@ -235,12 +257,20 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
              MarkVCBDirty( vcb );                                                               //      mark VCB dirty so it will be written
            }
        }
+
+       /*
+        * all done with system files so we can unlock now...
+        */
+       hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
+       hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+       hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+
      goto       CmdDone;
  
      //--       Release any resources allocated so far before exiting with an error:
  MtVolErr:
-       ReleaseMetaFileVNode(vcb->catalogRefNum);
-       ReleaseMetaFileVNode(vcb->extentsRefNum);
+       ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
+       ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
  
  CmdDone:
      return (error);
@@ -254,38 +284,40 @@ CmdDone:
  
  __private_extern__
  OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
-       off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args)
+       off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
  {
         register ExtendedVCB *vcb;
         struct cat_desc cndesc;
         struct cat_attr cnattr;
         struct cat_fork cfork;
-       UInt32 blockSize;
-       u_int64_t volumesize;
+       u_int32_t blockSize;
+       daddr64_t spare_sectors;
         struct BTreeInfoRec btinfo;
         u_int16_t  signature;
-       u_int16_t  version;
+       u_int16_t  hfs_version;
         int  i;
         OSErr retval;
  
         signature = SWAP_BE16(vhp->signature);
-       version = SWAP_BE16(vhp->version);
+       hfs_version = SWAP_BE16(vhp->version);
  
         if (signature == kHFSPlusSigWord) {
-               if (version != kHFSPlusVersion) {
-                       printf("hfs_mount: invalid HFS+ version: %d\n", version);
+               if (hfs_version != kHFSPlusVersion) {
+                       printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version);
                         return (EINVAL);
                 }
         } else if (signature == kHFSXSigWord) {
-               if (version != kHFSXVersion) {
-                       printf("hfs_mount: invalid HFSX version: %d\n", version);
+               if (hfs_version != kHFSXVersion) {
+                       printf("hfs_mount: invalid HFSX version: %d\n", hfs_version);
                         return (EINVAL);
                 }
                 /* The in-memory signature is always 'H+'. */
                 signature = kHFSPlusSigWord;
                 hfsmp->hfs_flags |= HFS_X;
         } else {
-               printf("hfs_mount: invalid HFS+ sig 0x%04x\n", signature);
+               /* Removed printf for invalid HFS+ signature because it gives
+                * false error for UFS root volume 
+                */
                 return (EINVAL);
         }
  
@@ -300,11 +332,25 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 return (EINVAL);
  
         /* Make sure we can live with the physical block size. */
-       if ((disksize & (hfsmp->hfs_phys_block_size - 1)) ||
-           (embeddedOffset & (hfsmp->hfs_phys_block_size - 1)) ||
-           (blockSize < hfsmp->hfs_phys_block_size)) {
+       if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
+           (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
+           (blockSize < hfsmp->hfs_logical_block_size)) {
                 return (ENXIO);
         }
+
+       /* If allocation block size is less than the physical 
+        * block size, we assume that the physical block size 
+        * is same as logical block size.  The physical block 
+        * size value is used to round down the offsets for 
+        * reading and writing the primary and alternate volume 
+        * headers at physical block boundary and will cause 
+        * problems if it is less than the block size.
+        */
+       if (blockSize < hfsmp->hfs_physical_block_size) {
+               hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
+               hfsmp->hfs_log_per_phys = 1;
+       }
+
         /*
          * The VolumeHeader seems OK: transfer info from it into VCB
          * Note - the VCB starts out clear (all zeros)
@@ -314,7 +360,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         vcb->vcbSigWord = signature;
         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
-       vcb->vcbAtrb    = (UInt16)SWAP_BE32(vhp->attributes);
+       vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
@@ -329,11 +375,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
  
-       VCB_LOCK_INIT(vcb);
-
         /* Now fill in the Extended VCB info */
         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
+       vcb->allocLimit         = vcb->totalBlocks;
         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
         vcb->blockSize          = blockSize;
         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
@@ -349,24 +394,40 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
          * (currently set up from the wrapper MDB) using the
          * new blocksize value:
          */
-       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size);
+       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
  
+       /*
+        * Validate and initialize the location of the alternate volume header.
+        */
+       spare_sectors = hfsmp->hfs_logical_block_count -
+                       (((daddr64_t)vcb->totalBlocks * blockSize) /
+                          hfsmp->hfs_logical_block_size);
+
+       if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
+               hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
+       } else {
+               hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+                                          HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+                                                         hfsmp->hfs_logical_block_count);
+       }
+
         bzero(&cndesc, sizeof(cndesc));
-       cndesc.cd_parentcnid = kRootParID;
+       cndesc.cd_parentcnid = kHFSRootParentID;
         cndesc.cd_flags |= CD_ISMETA;
         bzero(&cnattr, sizeof(cnattr));
-       cnattr.ca_nlink = 1;
+       cnattr.ca_linkcount = 1;
         cnattr.ca_mode = S_IFREG;
  
         /*
          * Set up Extents B-tree vnode
          */
         cndesc.cd_nameptr = hfs_extname;
-       cndesc.cd_namelen = strlen(hfs_extname);
+       cndesc.cd_namelen = strlen((char *)hfs_extname);
         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
  
         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
+       cfork.cf_new_size= 0;
         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
         cfork.cf_vblocks = 0;
@@ -377,22 +438,26 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 cfork.cf_extents[i].blockCount =
                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
         }
-       retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &cfork,
-                                &vcb->extentsRefNum);
+       retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+                                &hfsmp->hfs_extents_vp);
+       if (retval)
+       {
+               goto ErrorExit;
+       }
+       hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
+       hfs_unlock(hfsmp->hfs_extents_cp);
  
-       if (retval) goto ErrorExit;
-       retval = MacToVFSError(BTOpenPath(VTOF(vcb->extentsRefNum),
+       retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
-       if (retval) {
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
+       if (retval)
+       {
                 goto ErrorExit;
         }
-
         /*
          * Set up Catalog B-tree vnode
          */ 
         cndesc.cd_nameptr = hfs_catname;
-       cndesc.cd_namelen = strlen(hfs_catname);
+       cndesc.cd_namelen = strlen((char *)hfs_catname);
         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
  
         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
@@ -406,25 +471,25 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 cfork.cf_extents[i].blockCount =
                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
         }
-       retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &cfork,
-                                &vcb->catalogRefNum);
+       retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+                                &hfsmp->hfs_catalog_vp);
         if (retval) {
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
                 goto ErrorExit;
         }
-       retval = MacToVFSError(BTOpenPath(VTOF(vcb->catalogRefNum),
+       hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
+       hfs_unlock(hfsmp->hfs_catalog_cp);
+
+       retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
         if (retval) {
-               VOP_UNLOCK(vcb->catalogRefNum, 0, p);
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
                 goto ErrorExit;
         }
         if ((hfsmp->hfs_flags & HFS_X) &&
-           BTGetInformation(VTOF(vcb->catalogRefNum), 0, &btinfo) == 0) {
+           BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
                         /* Install a case-sensitive key compare */
-                       (void) BTOpenPath(VTOF(vcb->catalogRefNum),
+                       (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
                                           (KeyCompareProcPtr)cat_binarykeycompare);
                 }
         }
@@ -433,7 +498,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
          * Set up Allocation file vnode
          */  
         cndesc.cd_nameptr = hfs_vbmname;
-       cndesc.cd_namelen = strlen(hfs_vbmname);
+       cndesc.cd_namelen = strlen((char *)hfs_vbmname);
         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
  
         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
@@ -447,20 +512,78 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 cfork.cf_extents[i].blockCount =
                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
         }
-       retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &cfork,
-                                &vcb->allocationsRefNum);
+       retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+                                &hfsmp->hfs_allocation_vp);
         if (retval) {
-               VOP_UNLOCK(vcb->catalogRefNum, 0, p);
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
                 goto ErrorExit;
         }
+       hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
+       hfs_unlock(hfsmp->hfs_allocation_cp);
  
+       /*
+        * Set up Attribute B-tree vnode
+        */
+       if (vhp->attributesFile.totalBlocks != 0) {
+               cndesc.cd_nameptr = hfs_attrname;
+               cndesc.cd_namelen = strlen((char *)hfs_attrname);
+               cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
+       
+               cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
+               cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
+               cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
+               cfork.cf_vblocks = 0;
+               cnattr.ca_blocks = cfork.cf_blocks;
+               for (i = 0; i < kHFSPlusExtentDensity; i++) {
+                       cfork.cf_extents[i].startBlock =
+                                       SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
+                       cfork.cf_extents[i].blockCount =
+                                       SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
+               }
+               retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+                                        &hfsmp->hfs_attribute_vp);
+               if (retval) {
+                       goto ErrorExit;
+               }
+               hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
+               hfs_unlock(hfsmp->hfs_attribute_cp);
+               retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
+                                                 (KeyCompareProcPtr) hfs_attrkeycompare));
+               if (retval) {
+                       goto ErrorExit;
+               }
+       }
+
+       /*
+        * Set up Startup file vnode
+        */
+       if (vhp->startupFile.totalBlocks != 0) {
+               cndesc.cd_nameptr = hfs_startupname;
+               cndesc.cd_namelen = strlen((char *)hfs_startupname);
+               cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
+       
+               cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
+               cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
+               cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
+               cfork.cf_vblocks = 0;
+               cnattr.ca_blocks = cfork.cf_blocks;
+               for (i = 0; i < kHFSPlusExtentDensity; i++) {
+                       cfork.cf_extents[i].startBlock =
+                                       SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
+                       cfork.cf_extents[i].blockCount =
+                                       SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
+               }
+               retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+                                        &hfsmp->hfs_startup_vp);
+               if (retval) {
+                       goto ErrorExit;
+               }
+               hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
+               hfs_unlock(hfsmp->hfs_startup_cp);
+       }
+       
         /* Pick up volume name and create date */
-       retval = cat_idlookup(hfsmp, kHFSRootFolderID, &cndesc, &cnattr, NULL);
+       retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL);
         if (retval) {
-               VOP_UNLOCK(vcb->allocationsRefNum, 0, p);
-               VOP_UNLOCK(vcb->catalogRefNum, 0, p);
-               VOP_UNLOCK(vcb->extentsRefNum, 0, p);
                 goto ErrorExit;
         }
         vcb->vcbCrDate = cnattr.ca_itime;
@@ -471,15 +594,13 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         /* mark the volume dirty (clear clean unmount bit) */
         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
-               hfs_flushvolumeheader(hfsmp, TRUE, TRUE);
+               hfs_flushvolumeheader(hfsmp, TRUE, 0);
         }
  
-       /*
-        * all done with metadata files so we can unlock now...
-        */
-       VOP_UNLOCK(vcb->allocationsRefNum, 0, p);
-       VOP_UNLOCK(vcb->catalogRefNum, 0, p);
-       VOP_UNLOCK(vcb->extentsRefNum, 0, p);
+       /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
+       if ((hfsmp->hfs_flags & HFS_X) != 0) {
+               hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
+       }
  
         //
         // Check if we need to do late journal initialization.  This only
@@ -493,14 +614,62 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
  
                 retval = hfs_late_journal_init(hfsmp, vhp, args);
                 if (retval != 0) {
+                       if (retval == EROFS) {
+                               // EROFS is a special error code that means the volume has an external
+                               // journal which we couldn't find.  in that case we do not want to
+                               // rewrite the volume header - we'll just refuse to mount the volume.
+                               retval = EINVAL;
+                               goto ErrorExit;
+                       }
+
                         hfsmp->jnl = NULL;
+                       
+                       // if the journal failed to open, then set the lastMountedVersion
+                       // to be "FSK!" which fsck_hfs will see and force the fsck instead
+                       // of just bailing out because the volume is journaled.
+                       if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
+                               HFSPlusVolumeHeader *jvhp;
+                               daddr64_t mdb_offset;
+                               struct buf *bp = NULL;
+                               
+                               hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
+                                   
+                               mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
+
+                               bp = NULL;
+                               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+                                               HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+                                               hfsmp->hfs_physical_block_size, cred, &bp);
+                               if (retval == 0) {
+                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
+                                           
+                                       if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
+                                               printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
+                                               jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
+                                               buf_bwrite(bp);
+                                       } else {
+                                               buf_brelse(bp);
+                                       }
+                                       bp = NULL;
+                               } else if (bp) {
+                                       buf_brelse(bp);
+                                       // clear this so the error exit path won't try to use it
+                                       bp = NULL;
+                           }
+                       }
+
+                       retval = EINVAL;
                         goto ErrorExit;
                 } else if (hfsmp->jnl) {
-                       hfsmp->hfs_mp->mnt_flag |= MNT_JOURNALED;
+                       vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
                 }
-       } else if (hfsmp->jnl) {
+       } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
                 struct cat_attr jinfo_attr, jnl_attr;
                 
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                   vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+               }
+
                 // if we're here we need to fill in the fileid's for the
                 // journal and journal_info_block.
                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
@@ -509,6 +678,14 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
                 }
+
+               if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+                   vcb->vcbAtrb |= kHFSVolumeJournaledMask;
+               }
+
+               if (hfsmp->jnl == NULL) {
+                   vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+               }
         }
  
         /*
@@ -523,41 +700,61 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                 /* Keep the roving allocator out of the metadata zone. */
                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {       
-                       vcb->nextAllocation = hfsmp->hfs_metazone_end + 1;
+                       HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+               }
+       } else {
+               if (vcb->nextAllocation <= 1) {
+                       vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
                 }
         }
+       vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
+
+       /* Setup private/hidden directories for hardlinks. */
+       hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
+       hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
  
-       /* setup private/hidden directory for unlinked files */
-       FindMetaDataDirectory(vcb);
-       if (hfsmp->jnl && ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0))
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) 
                 hfs_remove_orphans(hfsmp);
  
+       /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
+       {
+               retval = hfs_erase_unused_nodes(hfsmp);
+               if (retval)
+                       goto ErrorExit;
+       }
+       
         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
         {
                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
         }
  
-
         /*
          * Allow hot file clustering if conditions allow.
          */
         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
             ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
-               (void) hfs_recording_init(hfsmp, p);
+               (void) hfs_recording_init(hfsmp);
         }
  
+       /* Force ACLs on HFS+ file systems. */
+       vfs_setextendedsecurity(HFSTOVFS(hfsmp));
+
+       /* Check if volume supports writing of extent-based extended attributes */
+       hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE);
+
         return (0);
  
  ErrorExit:
         /*
-        * A fatal error occured and the volume cannot be mounted
+        * A fatal error occurred and the volume cannot be mounted
          * release any resources that we aquired...
          */
-
-       InvalidateCatalogCache(vcb);   
-       ReleaseMetaFileVNode(vcb->allocationsRefNum);
-       ReleaseMetaFileVNode(vcb->catalogRefNum);
-       ReleaseMetaFileVNode(vcb->extentsRefNum);
+       if (hfsmp->hfs_attribute_vp)
+               ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
+       ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
+       ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
+       ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
  
         return (retval);
  }
@@ -573,12 +770,15 @@ static void ReleaseMetaFileVNode(struct vnode *vp)
         struct filefork *fp;
  
         if (vp && (fp = VTOF(vp))) {
-               if (fp->fcbBTCBPtr != NULL)
+               if (fp->fcbBTCBPtr != NULL) {
+                       (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
                         (void) BTClosePath(fp);
+                       hfs_unlock(VTOC(vp));
+               }
  
                 /* release the node even if BTClosePath fails */
-               vrele(vp);
-               vgone(vp);
+               vnode_recycle(vp);
+               vnode_put(vp);
         }
  }
  
@@ -592,38 +792,64 @@ static void ReleaseMetaFileVNode(struct vnode *vp)
  
  __private_extern__
  int
-hfsUnmount( register struct hfsmount *hfsmp, struct proc *p)
+hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
  {
-       ExtendedVCB *vcb = HFSTOVCB(hfsmp);
-       int retval = E_NONE;
+       /* Get rid of our attribute data vnode (if any). */
+       if (hfsmp->hfs_attrdata_vp) {
+               vnode_t advp = hfsmp->hfs_attrdata_vp;
+       
+               if (vnode_get(advp) == 0) {
+                       vnode_rele_ext(advp, O_EVTONLY, 0);
+                       vnode_put(advp);
+               }
+               hfsmp->hfs_attrdata_vp = NULLVP;
+       }
  
-       InvalidateCatalogCache( vcb );
+       if (hfsmp->hfs_startup_vp)
+               ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
  
-       if (hfsmp->hfc_filevp) {
-               ReleaseMetaFileVNode(hfsmp->hfc_filevp);
-               hfsmp->hfc_filevp = NULL;
-       }
-               
-       if (vcb->vcbSigWord == kHFSPlusSigWord)
-               ReleaseMetaFileVNode(vcb->allocationsRefNum);
+       if (hfsmp->hfs_allocation_vp)
+               ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
  
-       ReleaseMetaFileVNode(vcb->catalogRefNum);
-       ReleaseMetaFileVNode(vcb->extentsRefNum);
+       if (hfsmp->hfs_attribute_vp)
+               ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
  
-       return (retval);
+       ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
+       ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+
+       /*
+        * Setting these pointers to NULL so that any references
+        * past this point will fail, and tell us the point of failure.
+        * Also, facilitates a check in hfs_update for a null catalog
+        * vp
+        */
+       hfsmp->hfs_allocation_vp = NULL;
+       hfsmp->hfs_attribute_vp = NULL;
+       hfsmp->hfs_catalog_vp = NULL;
+       hfsmp->hfs_extents_vp = NULL;
+       hfsmp->hfs_startup_vp = NULL;
+
+       return (0);
  }
  
  
  /*
- * Test is fork has overflow extents.
+ * Test if fork has overflow extents.
   */
  __private_extern__
  int
  overflow_extents(struct filefork *fp)
  {
-       u_long blocks;
+       u_int32_t blocks;
  
-       if (VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
+       //
+       // If the vnode pointer is NULL then we're being called
+       // from hfs_remove_orphans() with a faked-up filefork
+       // and therefore it has to be an HFS+ volume.  Otherwise
+       // we check through the volume header to see what type
+       // of volume we're on.
+        //
+       if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
                 if (fp->ff_extents[7].blockCount == 0)
                         return (0);
  
@@ -649,55 +875,177 @@ overflow_extents(struct filefork *fp)
  
  
  /*
- * Lock/Unlock a metadata file.
+ * Lock HFS system file(s).
   */
  __private_extern__
  int
-hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct proc *p)
+hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
  {
-       ExtendedVCB             *vcb;
-       struct vnode    *vp = NULL;
-       int                             numOfLockedBuffs;
-       int     retval = 0;
+       /*
+        * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
+        */
+       if (flags & SFL_CATALOG) {
  
-       vcb = HFSTOVCB(hfsmp);
+#ifdef HFS_CHECK_LOCK_ORDER
+               if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
+                       panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
+               }
+               if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
+                       panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
+               }
+               if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+                       panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
+               }
+#endif /* HFS_CHECK_LOCK_ORDER */
  
-       switch (fileID) {
-       case kHFSExtentsFileID:
-               vp = vcb->extentsRefNum;
-               break;
+               (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
+               /*
+                * When the catalog file has overflow extents then
+                * also acquire the extents b-tree lock if its not
+                * already requested.
+                */
+               if ((flags & SFL_EXTENTS) == 0 &&
+                   overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) {
+                       flags |= SFL_EXTENTS;
+               }
+       }
+       if (flags & SFL_ATTRIBUTE) {
  
-       case kHFSCatalogFileID:
-               vp = vcb->catalogRefNum;
-               break;
+#ifdef HFS_CHECK_LOCK_ORDER
+               if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
+                       panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
+               }
+               if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+                       panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
+               }
+#endif /* HFS_CHECK_LOCK_ORDER */
  
-       case kHFSAllocationFileID:
-               /* bitmap is covered by Extents B-tree locking */
-               /* FALL THROUGH */
-       default:
-               panic("hfs_lockmetafile: invalid fileID");
+               if (hfsmp->hfs_attribute_cp) {
+                       (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype);
+                       /*
+                        * When the attribute file has overflow extents then
+                        * also acquire the extents b-tree lock if its not
+                        * already requested.
+                        */
+                       if ((flags & SFL_EXTENTS) == 0 &&
+                           overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) {
+                               flags |= SFL_EXTENTS;
+                       }
+               } else {
+                       flags &= ~SFL_ATTRIBUTE;
+               }
         }
+       if (flags & SFL_STARTUP) {
+#ifdef HFS_CHECK_LOCK_ORDER
+               if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+                       panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
+               }
+#endif /* HFS_CHECK_LOCK_ORDER */
  
-       if ((flags & LK_TYPE_MASK) != LK_RELEASE) {
-               flags |= LK_RETRY;
-       } else if (hfsmp->jnl == NULL) {
-               struct timeval tv = time;
-               u_int32_t               lastfsync = tv.tv_sec; 
-               
-               (void) BTGetLastSync((FCB*)VTOF(vp), &lastfsync);
-               
-               numOfLockedBuffs = count_lock_queue();
-               if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
-                   ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > kMaxSecsForFsync))) {
-                       hfs_btsync(vp, HFS_SYNCTRANS);
+               (void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
+               /*
+                * When the startup file has overflow extents then
+                * also acquire the extents b-tree lock if its not
+                * already requested.
+                */
+               if ((flags & SFL_EXTENTS) == 0 &&
+                   overflow_extents(VTOF(hfsmp->hfs_startup_vp))) {
+                       flags |= SFL_EXTENTS;
                 }
         }
-       
-       retval = lockmgr(&VTOC(vp)->c_lock, flags, &vp->v_interlock, p);
+       /* 
+        * To prevent locks being taken in the wrong order, the extent lock
+        * gets a bitmap lock as well.
+        */
+       if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
+               /*
+                * Since the only bitmap operations are clearing and
+                * setting bits we always need exclusive access. And
+                * when we have a journal, we can "hide" behind that
+                * lock since we can only change the bitmap from
+                * within a transaction.
+                */
+               if (hfsmp->jnl || (hfsmp->hfs_allocation_cp == NULL)) {
+                       flags &= ~SFL_BITMAP;
+               } else {
+                       (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
+                       /* The bitmap lock is also grabbed when only extent lock 
+                        * was requested. Set the bitmap lock bit in the lock
+                        * flags which callers will use during unlock.
+                        */
+                       flags |= SFL_BITMAP;
+               }
+       }
+       if (flags & SFL_EXTENTS) {
+               /*
+                * Since the extents btree lock is recursive we always
+                * need exclusive access.
+                */
+               (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
+       }
+       return (flags);
+}
  
-       return (retval);
+/*
+ * unlock HFS system file(s).
+ */
+__private_extern__
+void
+hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
+{
+       struct timeval tv;
+       u_int32_t lastfsync;
+       int numOfLockedBuffs;
+
+       if (hfsmp->jnl == NULL) {
+               microuptime(&tv);
+               lastfsync = tv.tv_sec;
+       }
+       if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
+               hfs_unlock(hfsmp->hfs_startup_cp);
+       }
+       if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
+               if (hfsmp->jnl == NULL) {
+                       BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
+                       numOfLockedBuffs = count_lock_queue();
+                       if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
+                           ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
+                             kMaxSecsForFsync))) {
+                               hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
+                       }
+               }
+               hfs_unlock(hfsmp->hfs_attribute_cp);
+       }
+       if (flags & SFL_CATALOG) {
+               if (hfsmp->jnl == NULL) {
+                       BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
+                       numOfLockedBuffs = count_lock_queue();
+                       if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
+                           ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
+                             kMaxSecsForFsync))) {
+                               hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
+                       }
+               }
+               hfs_unlock(hfsmp->hfs_catalog_cp);
+       }
+       if (flags & SFL_BITMAP) {
+               hfs_unlock(hfsmp->hfs_allocation_cp);
+       }
+       if (flags & SFL_EXTENTS) {
+               if (hfsmp->jnl == NULL) {
+                       BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
+                       numOfLockedBuffs = count_lock_queue();
+                       if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
+                           ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
+                             kMaxSecsForFsync))) {
+                               hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
+                       }
+               }
+               hfs_unlock(hfsmp->hfs_extents_cp);
+       }
  }
  
+
  /*
   * RequireFileLock
   *
@@ -707,37 +1055,34 @@ hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct p
  #if HFS_DIAGNOSTIC
  void RequireFileLock(FileReference vp, int shareable)
  {
-       struct lock__bsd__ *lkp;
-       int locked = false;
-       pid_t pid;
-       void * self;
+       int locked;
  
-       pid = current_proc()->p_pid;
-       self = (void *) current_act();
-       lkp = &VTOC(vp)->c_lock;
-
-       simple_lock(&lkp->lk_interlock);
+       /* The extents btree and allocation bitmap are always exclusive. */
+       if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
+           VTOC(vp)->c_fileid == kHFSAllocationFileID) {
+               shareable = 0;
+       }
         
-       if (shareable && (lkp->lk_sharecount > 0) && (lkp->lk_lockholder == LK_NOPROC))
-               locked = true;
-       else if ((lkp->lk_exclusivecount > 0) && (lkp->lk_lockholder == pid) && (lkp->lk_lockthread == self))
-               locked = true;
-
-       simple_unlock(&lkp->lk_interlock);
+       locked = VTOC(vp)->c_lockowner == (void *)current_thread();
         
-       if (!locked) {
+       if (!locked && !shareable) {
                 switch (VTOC(vp)->c_fileid) {
-                       case 3:
-                               DEBUG_BREAK_MSG((" #\n # RequireFileLock: extent btree vnode not locked! v: 0x%08X\n #\n", (u_int)vp));
-                               break;
-
-                       case 4:
-                               DEBUG_BREAK_MSG((" #\n # RequireFileLock: catalog btree vnode not locked! v: 0x%08X\n #\n", (u_int)vp));
-                               break;
-
-                       default:
-                               DEBUG_BREAK_MSG((" #\n # RequireFileLock: file (%d) not locked! v: 0x%08X\n #\n", VTOC(vp)->c_fileid, (u_int)vp));
-                               break;
+               case kHFSExtentsFileID:
+                       panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       break;
+               case kHFSCatalogFileID:
+                       panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       break;
+               case kHFSAllocationFileID:
+                       /* The allocation file can hide behind the jornal lock. */
+                       if (VTOHFS(vp)->jnl == NULL)
+                               panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       break;
+               case kHFSStartupFileID:
+                       panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
+               case kHFSAttributesFileID:
+                       panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+                       break;
                 }
         }
  }
@@ -757,15 +1102,15 @@ void RequireFileLock(FileReference vp, int shareable)
   *
   */
  int
-hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, struct ucred *cred,
-               struct proc *p, int invokesuperuserstatus)
+hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
+               __unused struct proc *p, int invokesuperuserstatus)
  {
-       if ((cred->cr_uid == cnode_uid) ||                                    /* [1a] */
+       if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
-           ((HFSTOVFS(hfsmp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
-             ((cred->cr_uid == hfsmp->hfs_uid) ||                            /* [2a] */
+           ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
+             ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
-           (invokesuperuserstatus && (suser(cred, &p->p_acflag) == 0))) {    /* [3] */
+           (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
                 return (0);
         } else {        
                 return (EPERM);
@@ -773,15 +1118,15 @@ hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, struct ucred *cred,
  }
  
  
-unsigned long BestBlockSizeFit(unsigned long allocationBlockSize,
-                               unsigned long blockSizeLimit,
-                               unsigned long baseMultiple) {
+u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
+                               u_int32_t blockSizeLimit,
+                               u_int32_t baseMultiple) {
      /*
         Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
         specified limit but still an even multiple of the baseMultiple.
       */
      int baseBlockCount, blockCount;
-    unsigned long trialBlockSize;
+    u_int32_t trialBlockSize;
  
      if (allocationBlockSize % baseMultiple != 0) {
          /*
@@ -816,167 +1161,14 @@ unsigned long BestBlockSizeFit(unsigned long allocationBlockSize,
  }
  
  
-/*
- * To make the HFS Plus filesystem follow UFS unlink semantics, a remove
- * of an active vnode is translated to a move/rename so the file appears
- * deleted. The destination folder for these move/renames is setup here
- * and a reference to it is place in hfsmp->hfs_privdir_desc.
- */
  __private_extern__
-u_long
-FindMetaDataDirectory(ExtendedVCB *vcb)
-{
-       struct hfsmount * hfsmp;
-       struct vnode * dvp = NULL;
-       struct cnode * dcp = NULL;
-       struct FndrDirInfo * fndrinfo;
-       struct cat_desc out_desc = {0};
-       struct proc *p = current_proc();
-       struct timeval tv;
-       cat_cookie_t cookie;
-       int error;
-       
-       if (vcb->vcbSigWord != kHFSPlusSigWord)
-               return (0);
-
-       hfsmp = VCBTOHFS(vcb);
-
-       if (hfsmp->hfs_privdir_desc.cd_parentcnid == 0) {
-               hfsmp->hfs_privdir_desc.cd_parentcnid = kRootDirID;
-               hfsmp->hfs_privdir_desc.cd_nameptr = hfs_privdirname;
-               hfsmp->hfs_privdir_desc.cd_namelen = strlen(hfs_privdirname);
-               hfsmp->hfs_privdir_desc.cd_flags = CD_ISDIR;
-       }
-
-       /* Lock catalog b-tree */
-       if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p) != 0)
-               return (0);
-
-       error = cat_lookup(hfsmp, &hfsmp->hfs_privdir_desc, 0, NULL,
-                       &hfsmp->hfs_privdir_attr, NULL);
-
-       /* Unlock catalog b-tree */
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-
-       if (error == 0) {
-               hfsmp->hfs_metadata_createdate = hfsmp->hfs_privdir_attr.ca_itime;
-               hfsmp->hfs_privdir_desc.cd_cnid = hfsmp->hfs_privdir_attr.ca_fileid;
-               /*
-                * Clear the system immutable flag if set...
-                */
-               if ((hfsmp->hfs_privdir_attr.ca_flags & SF_IMMUTABLE) &&
-                   (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
-                       hfsmp->hfs_privdir_attr.ca_flags &= ~SF_IMMUTABLE;
-
-                       hfs_global_shared_lock_acquire(hfsmp);
-                       if (hfsmp->jnl) {
-                               if ((error = journal_start_transaction(hfsmp->jnl)) != 0) {
-                                       hfs_global_shared_lock_release(hfsmp);
-                                       return (hfsmp->hfs_privdir_attr.ca_fileid);
-                               }
-                       }
-                       if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p) == 0) {
-                               (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc,
-                                            &hfsmp->hfs_privdir_attr, NULL, NULL);
-                               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-                       }
-                       if (hfsmp->jnl) {
-                               journal_end_transaction(hfsmp->jnl);
-                       }
-                       hfs_global_shared_lock_release(hfsmp);
-               }
-               return (hfsmp->hfs_privdir_attr.ca_fileid);
-
-       } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
-
-               return (0);
-       }
-    
-       /* Setup the default attributes */
-       bzero(&hfsmp->hfs_privdir_attr, sizeof(struct cat_attr));
-       hfsmp->hfs_privdir_attr.ca_mode = S_IFDIR;
-       hfsmp->hfs_privdir_attr.ca_nlink = 2;
-       hfsmp->hfs_privdir_attr.ca_itime = vcb->vcbCrDate;
-       hfsmp->hfs_privdir_attr.ca_mtime = time.tv_sec;
-
-       /* hidden and off the desktop view */
-       fndrinfo = (struct FndrDirInfo *)&hfsmp->hfs_privdir_attr.ca_finderinfo;
-       fndrinfo->frLocation.v = SWAP_BE16 (22460);
-       fndrinfo->frLocation.h = SWAP_BE16 (22460);
-       fndrinfo->frFlags |= SWAP_BE16 (kIsInvisible + kNameLocked);            
-
-       // XXXdbg
-       hfs_global_shared_lock_acquire(hfsmp);
-       if (hfsmp->jnl) {
-           if ((error = journal_start_transaction(hfsmp->jnl)) != 0) {
-                       hfs_global_shared_lock_release(hfsmp);
-                       return (0);
-           }
-       }
-       /* Reserve some space in the Catalog file. */
-       if (cat_preflight(hfsmp, CAT_CREATE, &cookie, p) != 0) {
-               if (hfsmp->jnl) {
-                       journal_end_transaction(hfsmp->jnl);
-               }
-               hfs_global_shared_lock_release(hfsmp);
-               return (0);
-       }
-
-       if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p) == 0) {
-               error = cat_create(hfsmp, &hfsmp->hfs_privdir_desc,
-                               &hfsmp->hfs_privdir_attr, &out_desc);
-
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-       }
-
-       cat_postflight(hfsmp, &cookie, p);
-       
-       if (error) {
-           if (hfsmp->jnl) {
-                       journal_end_transaction(hfsmp->jnl);
-           }
-               hfs_global_shared_lock_release(hfsmp);
-
-           return (0);
-       }
-
-       hfsmp->hfs_privdir_desc.cd_hint = out_desc.cd_hint;
-       hfsmp->hfs_privdir_desc.cd_cnid = out_desc.cd_cnid;
-       hfsmp->hfs_privdir_attr.ca_fileid = out_desc.cd_cnid;
-       hfsmp->hfs_metadata_createdate = vcb->vcbCrDate;
-       
-       if (VFS_ROOT(HFSTOVFS(hfsmp), &dvp) == 0) {
-               dcp = VTOC(dvp);
-               dcp->c_childhint = out_desc.cd_hint;
-               dcp->c_nlink++;
-               dcp->c_entries++;
-               dcp->c_flag |= C_CHANGE | C_UPDATE;
-               tv = time;
-               (void) VOP_UPDATE(dvp, &tv, &tv, 0);
-               vput(dvp);
-       }
-       hfs_volupdate(hfsmp, VOL_MKDIR, 1);
-       if (hfsmp->jnl) {
-           journal_end_transaction(hfsmp->jnl);
-       } 
-       hfs_global_shared_lock_release(hfsmp);
-
-       cat_releasedesc(&out_desc);
-
-       return (out_desc.cd_cnid);
-}
-
-__private_extern__
-u_long
-GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name,
+u_int32_t
+GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
                         struct cat_attr *fattr, struct cat_fork *forkinfo)
  {
         struct hfsmount * hfsmp;
-       struct vnode * dvp = NULL;
-       struct cnode * dcp = NULL;
-       struct FndrDirInfo * fndrinfo;
         struct cat_desc jdesc;
-       struct timeval tv;
+       int lockflags;
         int error;
         
         if (vcb->vcbSigWord != kHFSPlusSigWord)
@@ -986,30 +1178,27 @@ GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name,
  
         memset(&jdesc, 0, sizeof(struct cat_desc));
         jdesc.cd_parentcnid = kRootDirID;
-       jdesc.cd_nameptr = name;
+       jdesc.cd_nameptr = (const u_int8_t *)name;
         jdesc.cd_namelen = strlen(name);
  
-       /* Lock catalog b-tree */
-       error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, current_proc());    
-       if (error)
-               return (0);
-
-       error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo);
-
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc());
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+       error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL);
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
         if (error == 0) {
                 return (fattr->ca_fileid);
         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
                 return (0);
         }
+
+       return (0);     /* XXX what callers expect on an error */
  }
  
  
  /*
- * On Journaled HFS, there can be orphaned files.  These
- * are files that were unlinked while busy. If the volume
- * was not cleanly unmounted then some of these files may
+ * On HFS Plus Volumes, there can be orphaned files or directories
+ * These are files or directories that were unlinked while busy. 
+ * If the volume was not cleanly unmounted then some of these may
   * have persisted and need to be removed.
   */
  __private_extern__
@@ -1026,18 +1215,22 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
         char filename[32];
         char tempname[32];
         size_t namelen;
-       cat_cookie_t cookie = {0};
+       cat_cookie_t cookie;
         int catlock = 0;
         int catreserve = 0;
         int started_tr = 0;
-       int shared_lock = 0;
+       int lockflags;
         int result;
-       
+       int orphaned_files = 0;
+       int orphaned_dirs = 0;
+
+       bzero(&cookie, sizeof(cookie));
+
         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
                 return;
  
         vcb = HFSTOVCB(hfsmp);
-       fcb = VTOF(vcb->catalogRefNum);
+       fcb = VTOF(hfsmp->hfs_catalog_vp);
  
         btdata.bufferAddress = &filerec;
         btdata.itemSize = sizeof(filerec);
@@ -1045,84 +1238,80 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
  
         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
         bzero(iterator, sizeof(*iterator));
+       
+       /* Build a key to "temp" */
         keyp = (HFSPlusCatalogKey*)&iterator->key;
-       keyp->parentID = hfsmp->hfs_privdir_desc.cd_cnid;
+       keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+       keyp->nodeName.length = 4;  /* "temp" */
+       keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
+       keyp->nodeName.unicode[0] = 't';
+       keyp->nodeName.unicode[1] = 'e';
+       keyp->nodeName.unicode[2] = 'm';
+       keyp->nodeName.unicode[3] = 'p';
  
-       result = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);        
-       if (result)
-               goto exit;
         /*
-        * Position the iterator at the folder thread record.
-        * (i.e. one record before first child)
+        * Position the iterator just before the first real temp file/dir.
          */
-       result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
-
-       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
-       if (result)
-               goto exit;
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+       (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
+       hfs_systemfile_unlock(hfsmp, lockflags);
  
-       /* Visit all the children in the HFS+ private directory. */
+       /* Visit all the temp files/dirs in the HFS+ private directory. */
         for (;;) {
-               result = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p);        
-               if (result)
-                       goto exit;
-
+               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
-
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+               hfs_systemfile_unlock(hfsmp, lockflags);
                 if (result)
                         break;
-
-               if (keyp->parentID != hfsmp->hfs_privdir_desc.cd_cnid)
+               if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
                         break;
-               if (filerec.recordType != kHFSPlusFileRecord)
-                       continue;
                 
                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
-                                     filename, &namelen, sizeof(filename), 0, 0);
+                                     (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
                 
-               (void) sprintf(tempname, "%s%d", HFS_DELETE_PREFIX, filerec.fileID);
+               (void) snprintf(tempname, sizeof(tempname), "%s%d",
+                               HFS_DELETE_PREFIX, filerec.fileID);
                 
                 /*
-                * Delete all files named "tempxxx", where
-                * xxx is the file's cnid in decimal.
+                * Delete all files (and directories) named "tempxxx", 
+                * where xxx is the file's cnid in decimal.
                  *
                  */
                 if (bcmp(tempname, filename, namelen) == 0) {
-                       struct filefork dfork = {0};
-                       struct filefork rfork = {0};
-                       struct cnode cnode = {0};
-
-                       // XXXdbg
-                       hfs_global_shared_lock_acquire(hfsmp);
-                       shared_lock = 1;
-                       if (hfsmp->jnl) {
-                               if (journal_start_transaction(hfsmp->jnl) != 0) {
-                                       goto exit;
-                               }
-                               started_tr = 1;
+                       struct filefork dfork;
+                       struct filefork rfork;
+                       struct cnode cnode;
+
+                       bzero(&dfork, sizeof(dfork));
+                       bzero(&rfork, sizeof(rfork));
+                       bzero(&cnode, sizeof(cnode));
+                       
+                       /* Delete any attributes, ignore errors */
+                       (void) hfs_removeallattr(hfsmp, filerec.fileID);
+                       
+                       if (hfs_start_transaction(hfsmp) != 0) {
+                           printf("hfs_remove_orphans: failed to start transaction\n");
+                           goto exit;
                         }
+                       started_tr = 1;
                 
                         /*
                          * Reserve some space in the Catalog file.
                          */
                         if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
+                           printf("hfs_remove_orphans: cat_preflight failed\n");
                                 goto exit;
                         }
                         catreserve = 1;
  
-                       /* Lock catalog b-tree */
-                       if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID,
-                                               LK_EXCLUSIVE, p) != 0) {
-                               goto exit;
-                       }
+                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
                         catlock = 1;
  
                         /* Build a fake cnode */
                         cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
                                         &dfork.ff_data, &rfork.ff_data);
-                       cnode.c_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid;
-                       cnode.c_desc.cd_nameptr = filename;
+                       cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+                       cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
                         cnode.c_desc.cd_namelen = namelen;
                         cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
                         cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
@@ -1142,14 +1331,14 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                                 cnode.c_rsrcfork = NULL;
                                 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
                                 while (fsize > 0) {
-                                       if (fsize > HFS_BIGFILE_SIZE) {
+                                   if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
                                                 fsize -= HFS_BIGFILE_SIZE;
                                         } else {
                                                 fsize = 0;
                                         }
  
                                         if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) {
-                                               printf("error truncting data fork!\n");
+                                               printf("hfs: error truncting data fork!\n");
                                                 break;
                                         }
  
@@ -1159,11 +1348,19 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                                         // that no one transaction gets too big.
                                         //
                                         if (fsize > 0 && started_tr) {
-                                               journal_end_transaction(hfsmp->jnl);
-                                               if (journal_start_transaction(hfsmp->jnl) != 0) {
+                                               /* Drop system file locks before starting 
+                                                * another transaction to preserve lock order.
+                                                */
+                                               hfs_systemfile_unlock(hfsmp, lockflags);
+                                               catlock = 0;
+                                               hfs_end_transaction(hfsmp);
+
+                                               if (hfs_start_transaction(hfsmp) != 0) {
                                                         started_tr = 0;
                                                         break;
                                                 }
+                                               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+                                               catlock = 1;
                                         }
                                 }
                         }
@@ -1173,49 +1370,70 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
                                 cnode.c_datafork = NULL;
                                 cnode.c_rsrcfork = &rfork;
                                 if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) {
-                                       printf("error truncting rsrc fork!\n");
+                                       printf("hfs: error truncting rsrc fork!\n");
                                         break;
                                 }
                         }
  
-                       /* Remove the file record from the Catalog */   
+                       /* Remove the file or folder record from the Catalog */ 
                         if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
-                               printf("error deleting cat rec!\n");
+                               printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               catlock = 0;
+                               hfs_volupdate(hfsmp, VOL_UPDATE, 0);
                                 break;
                         }
                         
+                       if (cnode.c_attr.ca_mode & S_IFDIR) {
+                               orphaned_dirs++;
+                       }
+                       else {
+                               orphaned_files++;
+                       }
+
                         /* Update parent and volume counts */   
-                       hfsmp->hfs_privdir_attr.ca_entries--;
-                       (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc,
-                                        &hfsmp->hfs_privdir_attr, NULL, NULL);
-                       hfs_volupdate(hfsmp, VOL_RMFILE, 0);
+                       hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+                       if (cnode.c_attr.ca_mode & S_IFDIR) {
+                               DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+                       }
+
+                       (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+                                        &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
  
                         /* Drop locks and end the transaction */
-                       (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+                       hfs_systemfile_unlock(hfsmp, lockflags);
                         cat_postflight(hfsmp, &cookie, p);
                         catlock = catreserve = 0;
+
+                       /* 
+                          Now that Catalog is unlocked, update the volume info, making
+                          sure to differentiate between files and directories
+                       */
+                       if (cnode.c_attr.ca_mode & S_IFDIR) {
+                               hfs_volupdate(hfsmp, VOL_RMDIR, 0);
+                       }
+                       else{
+                               hfs_volupdate(hfsmp, VOL_RMFILE, 0);
+                       }
+
                         if (started_tr) {
-                               journal_end_transaction(hfsmp->jnl);
+                               hfs_end_transaction(hfsmp);
                                 started_tr = 0;
                         }
-                       hfs_global_shared_lock_release(hfsmp);
-                       shared_lock = 0;
  
                 } /* end if */
         } /* end for */
-       
+       if (orphaned_files > 0 || orphaned_dirs > 0)
+               printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
  exit:
         if (catlock) {
-               (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p);
+               hfs_systemfile_unlock(hfsmp, lockflags);
         }
         if (catreserve) {
                 cat_postflight(hfsmp, &cookie, p);
         }
         if (started_tr) {
-               journal_end_transaction(hfsmp->jnl);
-       }
-       if (shared_lock) {
-               hfs_global_shared_lock_release(hfsmp);
+               hfs_end_transaction(hfsmp);
         }
  
         FREE(iterator, M_TEMP);
@@ -1238,7 +1456,7 @@ u_int32_t logBlockSize;
         /* start with default */
         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
  
-       if (vp->v_flag & VSYSTEM) {
+       if (vnode_issystem(vp)) {
                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
                         BTreeInfoRec                    bTreeInfo;
         
@@ -1268,18 +1486,27 @@ __private_extern__
  u_int32_t
  hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
  {
-       struct vcb_t *vcb = HFSTOVCB(hfsmp);
         u_int32_t freeblks;
+       u_int32_t rsrvblks;
+       u_int32_t loanblks;
  
-       freeblks = vcb->freeBlocks;
+       /*
+        * We don't bother taking the mount lock
+        * to look at these values since the values
+        * themselves are each updated automically
+        * on aligned addresses.
+        */
+       freeblks = hfsmp->freeBlocks;
+       rsrvblks = hfsmp->reserveBlocks;
+       loanblks = hfsmp->loanedBlocks;
         if (wantreserve) {
-               if (freeblks > vcb->reserveBlocks)
-                       freeblks -= vcb->reserveBlocks;
+               if (freeblks > rsrvblks)
+                       freeblks -= rsrvblks;
                 else
                         freeblks = 0;
         }
-       if (freeblks > vcb->loanedBlocks)
-               freeblks -= vcb->loanedBlocks;
+       if (freeblks > loanblks)
+               freeblks -= loanblks;
         else
                 freeblks = 0;
  
@@ -1289,32 +1516,44 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
          * available space on the backing store volume.
          */
         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
-               struct statfs statbuf;  /* 272 bytes */
-               u_int32_t vfreeblks;
+               struct vfsstatfs *vfsp;  /* 272 bytes */
+               u_int64_t vfreeblks;
                 u_int32_t loanedblks;
                 struct mount * backingfs_mp;
+               struct timeval now;
  
-               backingfs_mp = hfsmp->hfs_backingfs_rootvp->v_mount;
+               backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
  
-               if (VFS_STATFS(backingfs_mp, &statbuf, current_proc()) == 0) {
-                       vfreeblks = statbuf.f_bavail;
+               microtime(&now);
+               if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
+                   vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
+                   hfsmp->hfs_last_backingstatfs = now.tv_sec;
+               }
+
+               if ((vfsp = vfs_statfs(backingfs_mp))) {
+                       HFS_MOUNT_LOCK(hfsmp, TRUE);
+                       vfreeblks = vfsp->f_bavail;
                         /* Normalize block count if needed. */
-                       if (statbuf.f_bsize != vcb->blockSize) {
-                               vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)statbuf.f_bsize) / vcb->blockSize;
+                       if (vfsp->f_bsize != hfsmp->blockSize) {
+                               vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
                         }
-                       if (vfreeblks > hfsmp->hfs_sparsebandblks)
+                       if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
                                 vfreeblks -= hfsmp->hfs_sparsebandblks;
                         else
                                 vfreeblks = 0;
                         
                         /* Take into account any delayed allocations. */
-                       loanedblks = 2 * vcb->loanedBlocks;
+                       loanedblks = 2 * hfsmp->loanedBlocks;
                         if (vfreeblks > loanedblks)
                                 vfreeblks -= loanedblks;
                         else
                                 vfreeblks = 0;
  
+                       if (hfsmp->hfs_backingfs_maxblocks) {
+                               vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
+                       }
                         freeblks = MIN(vfreeblks, freeblks);
+                       HFS_MOUNT_UNLOCK(hfsmp, TRUE);
                 }
         }
  #endif /* HFS_SPARSE_DEV */
@@ -1339,7 +1578,7 @@ short MacToVFSError(OSErr err)
                 return EOVERFLOW;
         
         case btBadNode:                 /* -32731 */
-               return EBADF;
+               return EIO;
         
         case memFullErr:                /*  -108 */
                 return ENOMEM;          /*   +12 */
@@ -1378,96 +1617,161 @@ short MacToVFSError(OSErr err)
  
  
  /*
- * Get the directory entry name hint for a given index.
- * The directory cnode (dcp) must be locked.
+ * Find the current thread's directory hint for a given index.
+ *
+ * Requires an exclusive lock on directory cnode.
+ *
+ * Use detach if the cnode lock must be dropped while the hint is still active.
   */
  __private_extern__
-char *
-hfs_getnamehint(struct cnode *dcp, int index)
+directoryhint_t *
+hfs_getdirhint(struct cnode *dcp, int index, int detach)
  {
-       struct hfs_index *entry;
-       void *self;
-
-       if (index > 0) {
-               self = current_act();
-               SLIST_FOREACH(entry, &dcp->c_indexlist, hi_link) {
-                       if ((entry->hi_index == index)
-                       &&  (entry->hi_thread == self))
-                               return (entry->hi_name);
+       struct timeval tv;
+       directoryhint_t *hint;
+       boolean_t need_remove, need_init;
+       const u_int8_t * name;
+
+       microuptime(&tv);
+
+       /*
+        *  Look for an existing hint first.  If not found, create a new one (when
+        *  the list is not full) or recycle the oldest hint.  Since new hints are
+        *  always added to the head of the list, the last hint is always the
+        *  oldest.
+        */
+       TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
+               if (hint->dh_index == index)
+                       break;
+       }
+       if (hint != NULL) { /* found an existing hint */
+               need_init = false;
+               need_remove = true;
+       } else { /* cannot find an existing hint */
+               need_init = true;
+               if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
+                       /* Create a default directory hint */
+                       MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
+                       ++dcp->c_dirhintcnt;
+                       need_remove = false;
+               } else {                                /* recycle the last (i.e., the oldest) hint */
+                       hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
+                       if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
+                           (name = hint->dh_desc.cd_nameptr)) {
+                               hint->dh_desc.cd_nameptr = NULL;
+                               hint->dh_desc.cd_namelen = 0;
+                               hint->dh_desc.cd_flags &= ~CD_HASBUF;                           
+                               vfs_removename((const char *)name);
+                       }
+                       need_remove = true;
                 }
         }
  
-       return (NULL);
+       if (need_remove)
+               TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
+
+       if (detach)
+               --dcp->c_dirhintcnt;
+       else
+               TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
+
+       if (need_init) {
+               hint->dh_index = index;
+               hint->dh_desc.cd_flags = 0;
+               hint->dh_desc.cd_encoding = 0;
+               hint->dh_desc.cd_namelen = 0;
+               hint->dh_desc.cd_nameptr = NULL;
+               hint->dh_desc.cd_parentcnid = dcp->c_fileid;
+               hint->dh_desc.cd_hint = dcp->c_childhint;
+               hint->dh_desc.cd_cnid = 0;
+       }
+       hint->dh_time = tv.tv_sec;
+       return (hint);
  }
  
  /*
- * Save a directory entry name hint for a given index.
- * The directory cnode (dcp) must be locked.
+ * Release a single directory hint.
+ *
+ * Requires an exclusive lock on directory cnode.
   */
  __private_extern__
  void
-hfs_savenamehint(struct cnode *dcp, int index, const char * namehint)
+hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
  {
-       struct hfs_index *entry;
-       int len;
-
-       if (index > 0) {
-               len = strlen(namehint);
-               MALLOC(entry, struct hfs_index *, len + sizeof(struct hfs_index),
-                       M_TEMP, M_WAITOK);
-               entry->hi_index = index;
-               entry->hi_thread = current_act();
-               bcopy(namehint, entry->hi_name, len + 1);
-               SLIST_INSERT_HEAD(&dcp->c_indexlist, entry, hi_link);
+       const u_int8_t * name;
+       directoryhint_t *hint;
+
+       /* Check if item is on list (could be detached) */
+       TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
+               if (hint == relhint) {
+                       TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
+                       --dcp->c_dirhintcnt;
+                       break;
+               }
+       }
+       name = relhint->dh_desc.cd_nameptr;
+       if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
+               relhint->dh_desc.cd_nameptr = NULL;
+               relhint->dh_desc.cd_namelen = 0;
+               relhint->dh_desc.cd_flags &= ~CD_HASBUF;
+               vfs_removename((const char *)name);
         }
+       FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
  }
  
  /*
- * Release the directory entry name hint for a given index.
- * The directory cnode (dcp) must be locked.
+ * Release directory hints for given directory
+ *
+ * Requires an exclusive lock on directory cnode.
   */
  __private_extern__
  void
-hfs_relnamehint(struct cnode *dcp, int index)
+hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
  {
-       struct hfs_index *entry;
-       void *self;
-
-       if (index > 0) {
-               self = current_act();
-               SLIST_FOREACH(entry, &dcp->c_indexlist, hi_link) {
-                       if ((entry->hi_index == index)
-                       &&  (entry->hi_thread == self)) {
-                               SLIST_REMOVE(&dcp->c_indexlist, entry, hfs_index,
-                                       hi_link);
-                               FREE(entry, M_TEMP);
-                               break;
-                       }
+       struct timeval tv;
+       directoryhint_t *hint, *prev;
+       const u_int8_t * name;
+
+       if (stale_hints_only)
+               microuptime(&tv);
+
+       /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
+       for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
+               if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
+                       break;  /* stop here if this entry is too new */
+               name = hint->dh_desc.cd_nameptr;
+               if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
+                       hint->dh_desc.cd_nameptr = NULL;
+                       hint->dh_desc.cd_namelen = 0;
+                       hint->dh_desc.cd_flags &= ~CD_HASBUF;
+                       vfs_removename((const char *)name);
                 }
+               prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
+               TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
+               FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
+               --dcp->c_dirhintcnt;
         }
  }
  
  /*
- * Release all directory entry name hints.
+ * Insert a detached directory hint back into the list of dirhints.
+ *
+ * Requires an exclusive lock on directory cnode.
   */
  __private_extern__
  void
-hfs_relnamehints(struct cnode *dcp)
+hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
  {
-       struct hfs_index *entry;
-       struct hfs_index *next;
-
-       if (!SLIST_EMPTY(&dcp->c_indexlist)) {
-               for(entry = SLIST_FIRST(&dcp->c_indexlist);
-                   entry != NULL;
-                   entry = next) {
-                       next = SLIST_NEXT(entry, hi_link);
-                       SLIST_REMOVE(&dcp->c_indexlist, entry, hfs_index, hi_link);
-                       FREE(entry, M_TEMP);
-               }
+       directoryhint_t *test;
+
+       TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
+               if (test == hint)
+                       panic("hfs_insertdirhint: hint %p already on list!", hint);
         }
-}
  
+       TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
+       ++dcp->c_dirhintcnt;
+}
  
  /*
   * Perform a case-insensitive compare of two UTF-8 filenames.
@@ -1476,7 +1780,7 @@ hfs_relnamehints(struct cnode *dcp)
   */
  __private_extern__
  int
-hfs_namecmp(const char *str1, size_t len1, const char *str2, size_t len2)
+hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
  {
         u_int16_t *ustr1, *ustr2;
         size_t ulen1, ulen2;
@@ -1502,20 +1806,156 @@ out:
  }
  
  
+typedef struct jopen_cb_info {
+       off_t   jsize;
+       char   *desired_uuid;
+        struct  vnode *jvp;
+       size_t  blksize;
+       int     need_clean;
+       int     need_init;
+} jopen_cb_info;
+
+static int
+journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
+{
+       struct nameidata nd;
+       jopen_cb_info *ji = (jopen_cb_info *)arg;
+       char bsd_name[256];
+       int error;
+       
+       strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
+       strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
+
+       if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
+               return 1;   // keep iterating
+       }
+
+       // if we're here, either the desired uuid matched or there was no
+       // desired uuid so let's try to open the device for writing and
+       // see if it works.  if it does, we'll use it.
+       
+       NDINIT(&nd, LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
+       if ((error = namei(&nd))) {
+               printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
+               return 1;   // keep iterating
+       }
+
+       ji->jvp = nd.ni_vp;
+       nameidone(&nd);
+
+       if (ji->jvp == NULL) {
+               printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
+       } else {
+               error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
+               if (error == 0) {
+                       // if the journal is dirty and we didn't specify a desired
+                       // journal device uuid, then do not use the journal.  but
+                       // if the journal is just invalid (e.g. it hasn't been
+                       // initialized) then just set the need_init flag.
+                       if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
+                               error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
+                               if (error == EBUSY) {
+                                       VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
+                                       vnode_put(ji->jvp);
+                                       ji->jvp = NULL;
+                                       return 1;    // keep iterating
+                               } else if (error == EINVAL) {
+                                       ji->need_init = 1;
+                               }
+                       }
+
+                       if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
+                               strlcpy(ji->desired_uuid, uuid_str, 128);
+                       }
+                       vnode_setmountedon(ji->jvp);
+                       // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
+                       return 0;   // stop iterating
+               } else {
+                       vnode_put(ji->jvp);
+                       ji->jvp = NULL;
+               }
+       }
+
+       return 1;   // keep iterating
+}
+
+extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
+extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
+extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
+kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
+
+
+static vnode_t
+open_journal_dev(const char *vol_device,
+                int need_clean,
+                char *uuid_str,
+                char *machine_serial_num,
+                off_t jsize,
+                size_t blksize,
+                int *need_init)
+{
+    int retry_counter=0;
+    jopen_cb_info ji;
+
+    ji.jsize        = jsize;
+    ji.desired_uuid = uuid_str;
+    ji.jvp          = NULL;
+    ji.blksize      = blksize;
+    ji.need_clean   = need_clean;
+    ji.need_init    = 0;
+
+//    if (uuid_str[0] == '\0') {
+//         printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
+//    } else {
+//         printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
+//    }
+    while (ji.jvp == NULL && retry_counter++ < 4) {
+           if (retry_counter > 1) {
+                   if (uuid_str[0]) {
+                           printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
+                   } else {
+                           printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
+                   }
+                   delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
+           }
+
+           IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
+    }
+
+    if (ji.jvp == NULL) {
+           printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
+                  vol_device, uuid_str, machine_serial_num);
+    }
+
+    *need_init = ji.need_init;
+
+    return ji.jvp;
+}
+
+
  __private_extern__
  int
  hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
-                                          void *_args, int embeddedOffset, int mdb_offset,
-                                          HFSMasterDirectoryBlock *mdbp, struct ucred *cred)
+                                          void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
+                                          HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
  {
         JournalInfoBlock *jibp;
         struct buf       *jinfo_bp, *bp;
         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
-       int               retval, blksize = hfsmp->hfs_phys_block_size;
+       int               retval, write_jibp = 0;
+       uint32_t                  blksize = hfsmp->hfs_logical_block_size;
         struct vnode     *devvp;
         struct hfs_mount_args *args = _args;
-
+       u_int32_t         jib_flags;
+       u_int64_t         jib_offset;
+       u_int64_t         jib_size;
+       const char *dev_name;
+       
         devvp = hfsmp->hfs_devvp;
+       dev_name = vnode_name(devvp);
+       if (dev_name == NULL) {
+               dev_name = "unknown-dev";
+       }
  
         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
                 arg_flags  = args->journal_flags;
@@ -1524,36 +1964,93 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
  
         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
                                 
-       retval = meta_bread(devvp,
-                                               embeddedOffset/blksize + 
-                                               (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock),
-                                               SWAP_BE32(vhp->blockSize), cred, &jinfo_bp);
-       if (retval)
+       jinfo_bp = NULL;
+       retval = (int)buf_meta_bread(devvp,
+                                               (daddr64_t)((embeddedOffset/blksize) + 
+                                               ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+                                               hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
+       if (retval) {
+               if (jinfo_bp) {
+                       buf_brelse(jinfo_bp);
+               }
                 return retval;
+       }
+       
+       jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
+       jib_flags  = SWAP_BE32(jibp->flags);
+       jib_size   = SWAP_BE64(jibp->size);
  
-       jibp = (JournalInfoBlock *)jinfo_bp->b_data;
-       jibp->flags  = SWAP_BE32(jibp->flags);
-       jibp->offset = SWAP_BE64(jibp->offset);
-       jibp->size   = SWAP_BE64(jibp->size);
-
-       if (jibp->flags & kJIJournalInFSMask) {
+       if (jib_flags & kJIJournalInFSMask) {
                 hfsmp->jvp = hfsmp->hfs_devvp;
+               jib_offset = SWAP_BE64(jibp->offset);
         } else {
-               printf("hfs: journal not stored in fs! don't know what to do.\n");
-               brelse(jinfo_bp);
-               return EINVAL;
+           int need_init=0;
+       
+           // if the volume was unmounted cleanly then we'll pick any
+           // available external journal partition
+           //
+           if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
+                   *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+           }
+
+           hfsmp->jvp = open_journal_dev(dev_name,
+                                         !(jib_flags & kJIJournalNeedInitMask),
+                                         (char *)&jibp->ext_jnl_uuid[0],
+                                         (char *)&jibp->machine_serial_num[0],
+                                         jib_size,
+                                         hfsmp->hfs_logical_block_size,
+                                         &need_init);
+           if (hfsmp->jvp == NULL) {
+               buf_brelse(jinfo_bp);
+               return EROFS;
+           } else {
+                   if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+                           strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
+                   }
+           }
+
+           jib_offset = 0;
+           write_jibp = 1;
+           if (need_init) {
+                   jib_flags |= kJIJournalNeedInitMask;
+           }
         }
  
         // save this off for the hack-y check in hfs_remove()
-       hfsmp->jnl_start = jibp->offset / SWAP_BE32(vhp->blockSize);
-       hfsmp->jnl_size  = jibp->size;
+       hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
+       hfsmp->jnl_size  = jib_size;
+
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
+           // if the file system is read-only, check if the journal is empty.
+           // if it is, then we can allow the mount.  otherwise we have to
+           // return failure.
+           retval = journal_is_clean(hfsmp->jvp,
+                                     jib_offset + embeddedOffset,
+                                     jib_size,
+                                     devvp,
+                                     hfsmp->hfs_logical_block_size);
+
+           hfsmp->jnl = NULL;
+
+           buf_brelse(jinfo_bp);
+
+           if (retval) {
+               const char *name = vnode_getname(devvp);
+             printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
+                     name ? name : "");
+               if (name)
+                       vnode_putname(name);
+           }
+
+           return retval;
+       }
  
-       if (jibp->flags & kJIJournalNeedInitMask) {
+       if (jib_flags & kJIJournalNeedInitMask) {
                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
-                          jibp->offset + (off_t)embeddedOffset, jibp->size);
+                          jib_offset + embeddedOffset, jib_size);
                 hfsmp->jnl = journal_create(hfsmp->jvp,
-                                                                       jibp->offset + (off_t)embeddedOffset,
-                                                                       jibp->size,
+                                                                       jib_offset + embeddedOffset,
+                                                                       jib_size,
                                                                         devvp,
                                                                         blksize,
                                                                         arg_flags,
@@ -1562,28 +2059,30 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
  
                 // no need to start a transaction here... if this were to fail
                 // we'd just re-init it on the next mount.
-               jibp->flags &= ~kJIJournalNeedInitMask;
-               jibp->flags  = SWAP_BE32(jibp->flags);
-               jibp->offset = SWAP_BE64(jibp->offset);
-               jibp->size   = SWAP_BE64(jibp->size);
-               bwrite(jinfo_bp);
+               jib_flags &= ~kJIJournalNeedInitMask;
+               jibp->flags  = SWAP_BE32(jib_flags);
+               buf_bwrite(jinfo_bp);
                 jinfo_bp = NULL;
                 jibp     = NULL;
         } else { 
                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
-               //         jibp->offset + (off_t)embeddedOffset,
-               //         jibp->size, SWAP_BE32(vhp->blockSize));
+               //         jib_offset + embeddedOffset,
+               //         jib_size, SWAP_BE32(vhp->blockSize));
                                 
                 hfsmp->jnl = journal_open(hfsmp->jvp,
-                                                                 jibp->offset + (off_t)embeddedOffset,
-                                                                 jibp->size,
+                                                                 jib_offset + embeddedOffset,
+                                                                 jib_size,
                                                                   devvp,
                                                                   blksize,
                                                                   arg_flags,
                                                                   arg_tbufsz,
                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
  
-               brelse(jinfo_bp);
+               if (write_jibp) {
+                       buf_bwrite(jinfo_bp);
+               } else {
+                       buf_brelse(jinfo_bp);
+               }
                 jinfo_bp = NULL;
                 jibp     = NULL;
  
@@ -1591,17 +2090,22 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                         // reload the mdb because it could have changed
                         // if the journal had to be replayed.
                         if (mdb_offset == 0) {
-                               mdb_offset = (embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize);
+                               mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
                         }
-                       retval = meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+                       bp = NULL;
+                       retval = (int)buf_meta_bread(devvp, 
+                                       HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+                                       hfsmp->hfs_physical_block_size, cred, &bp);
                         if (retval) {
-                               brelse(bp);
+                               if (bp) {
+                                       buf_brelse(bp);
+                               }
                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
                                            retval);
                                 return retval;
                         }
-                       bcopy(bp->b_data + HFS_PRI_OFFSET(blksize), mdbp, 512);
-                       brelse(bp);
+                       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
+                       buf_brelse(bp);
                         bp = NULL;
                 }
         }
@@ -1638,15 +2142,18 @@ static int
  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
  {
         JournalInfoBlock *jibp;
-       struct buf       *jinfo_bp, *bp;
+       struct buf       *jinfo_bp;
         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
-       int               retval, need_flush = 0, write_jibp = 0;
+       int               retval, write_jibp = 0, recreate_journal = 0;
         struct vnode     *devvp;
         struct cat_attr   jib_attr, jattr;
         struct cat_fork   jib_fork, jfork;
         ExtendedVCB      *vcb;
-       u_long            fid;
+       u_int32_t            fid;
         struct hfs_mount_args *args = _args;
+       u_int32_t         jib_flags;
+       u_int64_t         jib_offset;
+       u_int64_t         jib_size;
         
         devvp = hfsmp->hfs_devvp;
         vcb = HFSTOVCB(hfsmp);
@@ -1676,80 +2183,143 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
  
                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
+               recreate_journal = 1;
         }
  
  
-       sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_phys_block_size;
-       retval = meta_bread(devvp,
-                                               vcb->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size + 
-                                               (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock),
-                                               SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp);
+       sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
+       jinfo_bp = NULL;
+       retval = (int)buf_meta_bread(devvp,
+                                               (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + 
+                                               ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+                                               hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
         if (retval) {
+               if (jinfo_bp) {
+                       buf_brelse(jinfo_bp);
+               }
                 printf("hfs: can't read journal info block. disabling journaling.\n");
                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
                 return 0;
         }
  
-       jibp = (JournalInfoBlock *)jinfo_bp->b_data;
-       jibp->flags  = SWAP_BE32(jibp->flags);
-       jibp->offset = SWAP_BE64(jibp->offset);
-       jibp->size   = SWAP_BE64(jibp->size);
+       jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
+       jib_flags  = SWAP_BE32(jibp->flags);
+       jib_offset = SWAP_BE64(jibp->offset);
+       jib_size   = SWAP_BE64(jibp->size);
  
         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
                            jfork.cf_extents[0].startBlock);
-               brelse(jinfo_bp);
+               buf_brelse(jinfo_bp);
                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
                 return 0;
         }
         hfsmp->hfs_jnlfileid = fid;
  
         // make sure the journal file begins where we think it should.
-       if ((jibp->offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
+       if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
-                          (jibp->offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
+                          (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
  
-               jibp->offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
+               jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
                 write_jibp   = 1;
+               recreate_journal = 1;
         }
  
         // check the size of the journal file.
-       if (jibp->size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
+       if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
-                          jibp->size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
+                          jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
                 
-               jibp->size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
+               jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
                 write_jibp = 1;
+               recreate_journal = 1;
         }
         
-       if (jibp->flags & kJIJournalInFSMask) {
+       if (jib_flags & kJIJournalInFSMask) {
                 hfsmp->jvp = hfsmp->hfs_devvp;
+               jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
         } else {
-               printf("hfs: journal not stored in fs! don't know what to do.\n");
-               brelse(jinfo_bp);
-               return EINVAL;
+           const char *dev_name;
+           int need_init = 0;
+
+           dev_name = vnode_name(devvp);
+           if (dev_name == NULL) {
+                   dev_name = "unknown-dev";
+           }
+
+            // since the journal is empty, just use any available external journal
+           *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+
+           // this fills in the uuid of the device we actually get
+           hfsmp->jvp = open_journal_dev(dev_name,
+                                         !(jib_flags & kJIJournalNeedInitMask),
+                                         (char *)&jibp->ext_jnl_uuid[0],
+                                         (char *)&jibp->machine_serial_num[0],
+                                         jib_size,
+                                         hfsmp->hfs_logical_block_size,
+                                         &need_init);
+           if (hfsmp->jvp == NULL) {
+               buf_brelse(jinfo_bp);
+               return EROFS;
+           } else {
+                   if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+                           strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
+                   }
+           } 
+           jib_offset = 0;
+           recreate_journal = 1;
+           write_jibp = 1;
+           if (need_init) {
+                   jib_flags |= kJIJournalNeedInitMask;
+           }
         }
  
         // save this off for the hack-y check in hfs_remove()
-       hfsmp->jnl_start = jibp->offset / SWAP_BE32(vhp->blockSize);
-       hfsmp->jnl_size  = jibp->size;
+       hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
+       hfsmp->jnl_size  = jib_size;
+
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
+           // if the file system is read-only, check if the journal is empty.
+           // if it is, then we can allow the mount.  otherwise we have to
+           // return failure.
+           retval = journal_is_clean(hfsmp->jvp,
+                                     jib_offset,
+                                     jib_size,
+                                     devvp,
+                                     hfsmp->hfs_logical_block_size);
+
+           hfsmp->jnl = NULL;
+
+           buf_brelse(jinfo_bp);
+
+           if (retval) {
+               const char *name = vnode_getname(devvp);
+             printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
+                    name ? name : "");
+               if (name)
+                       vnode_putname(name);
+           }
  
-       if (jibp->flags & kJIJournalNeedInitMask) {
+           return retval;
+       }
+
+       if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
-                          jibp->offset + (off_t)vcb->hfsPlusIOPosOffset, jibp->size);
+                          jib_offset, jib_size);
                 hfsmp->jnl = journal_create(hfsmp->jvp,
-                                                                       jibp->offset + (off_t)vcb->hfsPlusIOPosOffset,
-                                                                       jibp->size,
+                                                                       jib_offset,
+                                                                       jib_size,
                                                                         devvp,
-                                                                       hfsmp->hfs_phys_block_size,
+                                                                       hfsmp->hfs_logical_block_size,
                                                                         arg_flags,
                                                                         arg_tbufsz,
                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
  
                 // no need to start a transaction here... if this were to fail
                 // we'd just re-init it on the next mount.
-               jibp->flags &= ~kJIJournalNeedInitMask;
+               jib_flags &= ~kJIJournalNeedInitMask;
                 write_jibp   = 1;
  
         } else { 
@@ -1764,14 +2334,14 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
                 arg_flags |= JOURNAL_RESET;
                 
                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
-               //         jibp->offset + (off_t)vcb->hfsPlusIOPosOffset,
-               //         jibp->size, SWAP_BE32(vhp->blockSize));
+               //         jib_offset,
+               //         jib_size, SWAP_BE32(vhp->blockSize));
                                 
                 hfsmp->jnl = journal_open(hfsmp->jvp,
-                                                                 jibp->offset + (off_t)vcb->hfsPlusIOPosOffset,
-                                                                 jibp->size,
+                                                                 jib_offset,
+                                                                 jib_size,
                                                                   devvp,
-                                                                 hfsmp->hfs_phys_block_size,
+                                                                 hfsmp->hfs_logical_block_size,
                                                                   arg_flags,
                                                                   arg_tbufsz,
                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
@@ -1779,18 +2349,18 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
                         
  
         if (write_jibp) {
-               jibp->flags  = SWAP_BE32(jibp->flags);
-               jibp->offset = SWAP_BE64(jibp->offset);
-               jibp->size   = SWAP_BE64(jibp->size);
+               jibp->flags  = SWAP_BE32(jib_flags);
+               jibp->offset = SWAP_BE64(jib_offset);
+               jibp->size   = SWAP_BE64(jib_size);
  
-               bwrite(jinfo_bp);
+               buf_bwrite(jinfo_bp);
         } else {
-               brelse(jinfo_bp);
+               buf_brelse(jinfo_bp);
         } 
         jinfo_bp = NULL;
         jibp     = NULL;
  
-       //printf("journal @ 0x%x\n", hfsmp->jnl);
+       //printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
         
         // if we expected the journal to be there and we couldn't
         // create it or open it then we have to bail out.
@@ -1835,13 +2405,12 @@ static void
  hfs_metadatazone_init(struct hfsmount *hfsmp)
  {
         ExtendedVCB  *vcb;
-       struct BTreeInfoRec btinfo;
         u_int64_t  fs_size;
         u_int64_t  zonesize;
         u_int64_t  temp;
         u_int64_t  filesize;
         u_int32_t  blk;
-       int  items;
+       int  items, really_do_it=1;
  
         vcb = HFSTOVCB(hfsmp);
         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->totalBlocks;
@@ -1849,50 +2418,65 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
         /*
          * For volumes less than 10 GB, don't bother.
          */
-       if (fs_size < ((u_int64_t)10 * GIGABYTE))
-               return;
+       if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
+               really_do_it = 0;
+       }
+       
         /*
          * Skip non-journaled volumes as well.
          */
-       if (hfsmp->jnl == NULL)
-               return;
+       if (hfsmp->jnl == NULL) {
+               really_do_it = 0;
+       }
  
         /*
-        * Start with allocation bitmap (a fixed size).
+        * Start with space for the boot blocks and Volume Header.
+        * 1536 = byte offset from start of volume to end of volume header:
+        * 1024 bytes is the offset from the start of the volume to the
+        * start of the volume header (defined by the volume format)
+        * + 512 bytes (the size of the volume header).
          */
-       zonesize = roundup(vcb->totalBlocks / 8, vcb->vcbVBMIOSize);
-
+       zonesize = roundup(1536, hfsmp->blockSize);
+       
         /*
-        * Overflow Extents file gets 4 MB per 100 GB.
+        * Add the on-disk size of allocation bitmap.
          */
-       items = fs_size / ((u_int64_t)100 * GIGABYTE);
-       filesize = (u_int64_t)(items + 1) * OVERFLOW_DEFAULT_SIZE;
-       if (filesize > OVERFLOW_MAXIMUM_SIZE)
-               filesize = OVERFLOW_MAXIMUM_SIZE;
-       zonesize += filesize;
-       hfsmp->hfs_overflow_maxblks = filesize / vcb->blockSize;
-
+       zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+       
+       /* 
+        * Add space for the Journal Info Block and Journal (if they're in
+        * this file system).
+        */
+       if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
+               zonesize += hfsmp->blockSize + hfsmp->jnl_size;
+       }
+       
         /*
-        * Plan for at least 8 MB of journal for each
-        * 100 GB of disk space (up to a 512 MB).
+        * Add the existing size of the Extents Overflow B-tree.
+        * (It rarely grows, so don't bother reserving additional room for it.)
          */
-       items = fs_size / ((u_int64_t)100 * GIGABYTE);
-       filesize = (u_int64_t)(items + 1) * JOURNAL_DEFAULT_SIZE;
-       if (filesize > JOURNAL_MAXIMUM_SIZE)
-               filesize = JOURNAL_MAXIMUM_SIZE;
-       zonesize += filesize;
-
+       zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+       
         /*
-        * Catalog file gets 10 MB per 1 GB.
-        *
-        * How about considering the current catalog size (used nodes * node size)
-        * and the current file data size to help estimate the required
-        * catalog size.
+        * If there is an Attributes B-tree, leave room for 11 clumps worth.
+        * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
+        * When installing a full OS install onto a 20GB volume, we use
+        * 7 to 8 clumps worth of space (depending on packages), so that leaves
+        * us with another 3 or 4 clumps worth before we need another extent.
          */
-       filesize = MIN((fs_size / 1024) * 10, GIGABYTE);
-       hfsmp->hfs_catalog_maxblks = filesize / vcb->blockSize;
-       zonesize += filesize;
-
+       if (hfsmp->hfs_attribute_cp) {
+               zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
+       }
+       
+       /*
+        * Leave room for 11 clumps of the Catalog B-tree.
+        * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
+        * When installing a full OS install onto a 20GB volume, we use
+        * 7 to 8 clumps worth of space (depending on packages), so that leaves
+        * us with another 3 or 4 clumps worth before we need another extent.
+        */
+       zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
+       
         /*
          * Add space for hot file region.
          *
@@ -1906,39 +2490,40 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
         /*
          * Calculate user quota file requirements.
          */
-       items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
-       if (items < QF_MIN_USERS)
-               items = QF_MIN_USERS;
-       else if (items > QF_MAX_USERS)
-               items = QF_MAX_USERS;
-       if (!powerof2(items)) {
-               int x = items;
-               items = 4;
-               while (x>>1 != 1) {
-                       x = x >> 1;
-                       items = items << 1;
+       if (hfsmp->hfs_flags & HFS_QUOTAS) {
+               items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
+               if (items < QF_MIN_USERS)
+                       items = QF_MIN_USERS;
+               else if (items > QF_MAX_USERS)
+                       items = QF_MAX_USERS;
+               if (!powerof2(items)) {
+                       int x = items;
+                       items = 4;
+                       while (x>>1 != 1) {
+                               x = x >> 1;
+                               items = items << 1;
+                       }
                 }
-       }
-       filesize += (items + 1) * sizeof(struct dqblk);
-       /*
-        * Calculate group quota file requirements.
-        *
-        */
-       items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
-       if (items < QF_MIN_GROUPS)
-               items = QF_MIN_GROUPS;
-       else if (items > QF_MAX_GROUPS)
-               items = QF_MAX_GROUPS;
-       if (!powerof2(items)) {
-               int x = items;
-               items = 4;
-               while (x>>1 != 1) {
-                       x = x >> 1;
-                       items = items << 1;
+               filesize += (items + 1) * sizeof(struct dqblk);
+               /*
+                * Calculate group quota file requirements.
+                *
+                */
+               items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
+               if (items < QF_MIN_GROUPS)
+                       items = QF_MIN_GROUPS;
+               else if (items > QF_MAX_GROUPS)
+                       items = QF_MAX_GROUPS;
+               if (!powerof2(items)) {
+                       int x = items;
+                       items = 4;
+                       while (x>>1 != 1) {
+                               x = x >> 1;
+                               items = items << 1;
+                       }
                 }
+               filesize += (items + 1) * sizeof(struct dqblk);
         }
-       filesize += (items + 1) * sizeof(struct dqblk);
-       hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
         zonesize += filesize;
  
         /*
@@ -1946,11 +2531,29 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
          * The extra space goes to the catalog file and hot file area.
          */
         temp = zonesize;
-       zonesize = roundup(zonesize, vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+       zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+       hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
+       /*
+        * If doing the round up for hfs_min_alloc_start would push us past
+        * totalBlocks, then just reset it back to 0.  Though using a value 
+        * bigger than totalBlocks would not cause damage in the block allocator
+        * code, this value could get stored in the volume header and make it out 
+        * to disk, making the volume header technically corrupt.
+        */
+       if (hfsmp->hfs_min_alloc_start >= hfsmp->totalBlocks) {
+               hfsmp->hfs_min_alloc_start = 0;
+       }
+
+       if (really_do_it == 0) {
+               return;
+       }
+       
         temp = zonesize - temp;  /* temp has extra space */
         filesize += temp / 3;
         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
  
+       hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+
         /* Convert to allocation blocks. */
         blk = zonesize / vcb->blockSize;
  
@@ -1963,9 +2566,9 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
         hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
         hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
  #if 0
-       printf("HFS: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
-       printf("HFS: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
-       printf("HFS: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
+       printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+       printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+       printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
  #endif
         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
  }
@@ -1975,15 +2578,19 @@ static u_int32_t
  hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
  {
         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
+       int  lockflags;
         int  freeblocks;
  
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
         freeblocks = MetaZoneFreeBlocks(vcb);
+       hfs_systemfile_unlock(hfsmp, lockflags);
+
         /* Minus Extents overflow file reserve. */
         freeblocks -=
-               hfsmp->hfs_overflow_maxblks - VTOF(vcb->extentsRefNum)->ff_blocks;
+               hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
         /* Minus catalog file reserve. */
         freeblocks -=
-               hfsmp->hfs_catalog_maxblks - VTOF(vcb->catalogRefNum)->ff_blocks;
+               hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
         if (freeblocks < 0)
                 freeblocks = 0;
  
@@ -1998,23 +2605,249 @@ __private_extern__
  int
  hfs_virtualmetafile(struct cnode *cp)
  {
-       char * filename;
+       const char * filename;
  
  
         if (cp->c_parentcnid != kHFSRootFolderID)
                 return (0);
  
-       filename = cp->c_desc.cd_nameptr;
+       filename = (const char *)cp->c_desc.cd_nameptr;
         if (filename == NULL)
                 return (0);
  
-       if ((strcmp(filename, ".journal") == 0) ||
-           (strcmp(filename, ".journal_info_block") == 0) ||
-           (strcmp(filename, ".quota.user") == 0) ||
-           (strcmp(filename, ".quota.group") == 0) ||
-           (strcmp(filename, ".hotfiles.btree") == 0))
+       if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
+           (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
+           (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
+           (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
+           (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
                 return (1);
  
         return (0);
  }
  
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+       if (hfsmp->hfs_syncer)  {
+               clock_sec_t secs;
+               clock_usec_t usecs;
+               uint64_t now;
+
+               clock_get_calendar_microtime(&secs, &usecs);
+               now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
+
+               if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
+                       // if we have a sync scheduled but i/o is starting to pile up,
+                       // don't call thread_call_enter_delayed() again because that
+                       // will defer the sync.
+                       return;
+               }
+
+               if (hfsmp->hfs_sync_scheduled == 0) {
+                       uint64_t deadline;
+
+                       hfsmp->hfs_last_sync_request_time = now;
+
+                       clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
+
+                       /*
+                        * Increment hfs_sync_scheduled on the assumption that we're the
+                        * first thread to schedule the timer.  If some other thread beat
+                        * us, then we'll decrement it.  If we *were* the first to
+                        * schedule the timer, then we need to keep track that the
+                        * callback is waiting to complete.
+                        */
+                       OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+                       if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
+                               OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+                       else
+                               OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+               }               
+       }
+}
+
+
+__private_extern__
+int
+hfs_start_transaction(struct hfsmount *hfsmp)
+{
+       int ret, unlock_on_err=0;
+       void * thread = current_thread();
+
+#ifdef HFS_CHECK_LOCK_ORDER
+       /*
+        * You cannot start a transaction while holding a system
+        * file lock. (unless the transaction is nested.)
+        */
+       if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
+               if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
+                       panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
+               }
+               if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
+                       panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
+               }
+               if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
+                       panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
+               }
+       }
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+    if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
+       lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+       OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+       unlock_on_err = 1;
+    }
+
+       /* If a downgrade to read-only mount is in progress, no other
+        * process than the downgrade process is allowed to modify 
+        * the file system.
+        */
+       if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) && 
+                       (hfsmp->hfs_downgrading_proc != thread)) {
+               ret = EROFS;
+               goto out;
+       }
+
+    if (hfsmp->jnl) {
+       ret = journal_start_transaction(hfsmp->jnl);
+       if (ret == 0) {
+           OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
+       }
+    } else {
+       ret = 0;
+    }
+
+out:
+    if (ret != 0 && unlock_on_err) {
+       lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+       OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+    }
+
+    return ret;
+}
+
+__private_extern__
+int
+hfs_end_transaction(struct hfsmount *hfsmp)
+{
+    int need_unlock=0, ret;
+
+    if (    hfsmp->jnl == NULL
+       || (   journal_owner(hfsmp->jnl) == current_thread()
+           && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
+
+           need_unlock = 1;
+    } 
+
+    if (hfsmp->jnl) {
+       ret = journal_end_transaction(hfsmp->jnl);
+    } else {
+       ret = 0;
+    }
+
+    if (need_unlock) {
+       OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+       lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+       hfs_sync_ejectable(hfsmp);
+    }
+
+    return ret;
+}
+
+
+__private_extern__
+int
+hfs_journal_flush(struct hfsmount *hfsmp)
+{
+       int ret;
+
+       if (hfsmp->jnl) {
+               lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+               ret = journal_flush(hfsmp->jnl);
+               lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+       } else {
+               ret = 0;
+       }
+
+       return ret;
+}
+
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair?  We can't always be
+ * certain.  If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+__private_extern__
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+       int result; 
+       struct filefork *catalog;
+       int lockflags;
+       
+       if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+       {
+               /* This volume has already been checked and repaired. */
+               return 0;
+       }
+
+       if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+       {
+               /* This volume is too old to have had the problem. */
+               hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+               return 0;
+       }
+
+       catalog = hfsmp->hfs_catalog_cp->c_datafork;
+       if (catalog->ff_size > catalog->ff_clumpsize)
+       {
+               /* The entire first clump must have been in use at some point. */
+               hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+               return 0;
+       }
+       
+       /*
+        * If we get here, we need to zero out those unused nodes.
+        *
+        * We start a transaction and lock the catalog since we're going to be
+        * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
+        * do its writing via the journal, because that would be too much I/O
+        * to fit in a transaction, and it's a pain to break it up into multiple
+        * transactions.  (It behaves more like growing a B-tree would.)
+        */
+       printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+       result = hfs_start_transaction(hfsmp);
+       if (result)
+               goto done;
+       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+       result = BTZeroUnusedNodes(catalog);
+       vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+       hfs_systemfile_unlock(hfsmp, lockflags);
+       hfs_end_transaction(hfsmp);
+       if (result == 0)
+               hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+       printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+       return result;
+}