From ccb1535577c019312b69b95a60bb75c8a3ee22a3 Mon Sep 17 00:00:00 2001
From: Apple <opensource@apple.com>
Date: Fri, 1 May 2020 18:26:57 +0000
Subject: [PATCH] hfs-522.100.5.tar.gz

---
 core/.open_source_exclude                     |    2 +
 core/BTree.c                                  | 2088 +++++
 core/BTreeAllocate.c                          |  748 ++
 core/BTreeMiscOps.c                           |  676 ++
 core/BTreeNodeOps.c                           | 1036 +++
 core/BTreeNodeReserve.c                       |  336 +
 core/BTreeScanner.c                           |  410 +
 core/BTreeScanner.h                           |  122 +
 core/BTreeTreeOps.c                           | 1338 +++
 core/BTreeWrapper.c                           |  278 +
 core/BTreesInternal.h                         |  368 +
 core/BTreesPrivate.h                          |  516 ++
 core/CatalogPrivate.h                         |  129 +
 core/CatalogUtilities.c                       |  343 +
 core/FileExtentMapping.c                      | 2249 +++++
 core/FileIDsServices.c                        |  794 ++
 core/FileMgrInternal.h                        |  397 +
 core/HFSUnicodeWrappers.h                     |  120 +
 core/MacOSStubs.c                             |  143 +
 core/UCStringCompareData.h                    |  329 +
 core/UnicodeWrappers.c                        |  508 ++
 core/VolumeAllocation.c                       | 6198 ++++++++++++++
 core/hfs.h                                    | 1171 +++
 core/hfs_alloc_trace.h                        |   34 +
 core/hfs_attrlist.c                           | 1743 ++++
 core/hfs_attrlist.h                           |  108 +
 core/hfs_btreeio.c                            |  948 ++
 core/hfs_btreeio.h                            |   59 +
 core/hfs_catalog.c                            | 4813 +++++++++++
 core/hfs_catalog.h                            |  512 ++
 core/hfs_chash.c                              |  578 ++
 core/hfs_cnode.c                              | 2561 ++++++
 core/hfs_cnode.h                              |  630 ++
 core/hfs_cprotect.c                           | 2773 ++++++
 core/hfs_cprotect.h                           |  424 +
 core/hfs_dbg.h                                |   92 +
 core/hfs_endian.c                             | 1227 +++
 core/hfs_endian.h                             |  105 +
 core/hfs_extents.c                            |  771 ++
 core/hfs_extents.h                            |   74 +
 core/hfs_format.h                             |  818 ++
 core/hfs_fsctl.h                              |  387 +
 core/hfs_fsinfo.c                             |  889 ++
 core/hfs_hotfiles.c                           | 3929 +++++++++
 core/hfs_hotfiles.h                           |  136 +
 core/hfs_iokit.cpp                            |  307 +
 core/hfs_iokit.h                              |   57 +
 core/hfs_journal.c                            | 4892 +++++++++++
 core/hfs_journal.h                            |  378 +
 core/hfs_kdebug.h                             |  114 +
 core/hfs_link.c                               | 1419 +++
 core/hfs_lookup.c                             |  680 ++
 core/hfs_macos_defs.h                         |  299 +
 core/hfs_mount.h                              |   83 +
 core/hfs_notification.c                       |  198 +
 core/hfs_quota.c                              | 1014 +++
 core/hfs_quota.h                              |  111 +
 core/hfs_readwrite.c                          | 5876 +++++++++++++
 core/hfs_resize.c                             | 3432 ++++++++
 core/hfs_search.c                             | 1395 +++
 core/hfs_unistr.h                             |   64 +
 core/hfs_vfsops.c                             | 4751 ++++++++++
 core/hfs_vfsutils.c                           | 4462 ++++++++++
 core/hfs_vnops.c                              | 7622 +++++++++++++++++
 core/hfs_xattr.c                              | 2633 ++++++
 core/install                                  |   35 +
 core/iphoneos-Info.plist                      |   59 +
 core/kext-config.h                            |   56 +
 core/kext.xcconfig                            |   52 +
 core/macosx-Info.plist                        |   61 +
 core/mk-root.sh                               |   39 +
 core/rangelist.c                              |  429 +
 core/rangelist.h                              |   86 +
 hfs.xcodeproj/project.pbxproj                 |    3 +-
 .../xcschemes/livefiles_hfs_tester.xcscheme   |    8 +-
 livefiles_hfs_plugin/lf_hfs_btree_node_ops.c  |    2 +-
 livefiles_hfs_plugin/lf_hfs_btree_tree_ops.c  |    2 +-
 livefiles_hfs_plugin/lf_hfs_chash.c           |    7 +-
 livefiles_hfs_plugin/lf_hfs_cnode.c           |  203 +-
 livefiles_hfs_plugin/lf_hfs_common.h          |    7 +-
 livefiles_hfs_plugin/lf_hfs_dirops_handler.c  |    2 +-
 livefiles_hfs_plugin/lf_hfs_endian.c          |    5 +-
 .../lf_hfs_file_extent_mapping.c              |    3 +-
 livefiles_hfs_plugin/lf_hfs_fileops_handler.c |  102 +
 livefiles_hfs_plugin/lf_hfs_fileops_handler.h |    3 +
 livefiles_hfs_plugin/lf_hfs_fsops_handler.c   |   31 +-
 livefiles_hfs_plugin/lf_hfs_raw_read_write.c  |    7 +-
 livefiles_hfs_plugin/lf_hfs_readwrite_ops.c   |  141 +
 livefiles_hfs_plugin/lf_hfs_readwrite_ops.h   |    1 +
 livefiles_hfs_plugin/lf_hfs_vfsops.c          |    1 +
 livefiles_hfs_plugin/lf_hfs_vnode.c           |   70 +-
 livefiles_hfs_plugin/lf_hfs_vnode.h           |   13 +-
 livefiles_hfs_plugin/lf_hfs_vnops.c           |  188 +-
 livefiles_hfs_plugin/lf_hfs_vnops.h           |    2 +
 make_opensource.sh                            |  118 -
 95 files changed, 85154 insertions(+), 279 deletions(-)
 create mode 100644 core/.open_source_exclude
 create mode 100644 core/BTree.c
 create mode 100644 core/BTreeAllocate.c
 create mode 100644 core/BTreeMiscOps.c
 create mode 100644 core/BTreeNodeOps.c
 create mode 100644 core/BTreeNodeReserve.c
 create mode 100644 core/BTreeScanner.c
 create mode 100644 core/BTreeScanner.h
 create mode 100644 core/BTreeTreeOps.c
 create mode 100644 core/BTreeWrapper.c
 create mode 100644 core/BTreesInternal.h
 create mode 100644 core/BTreesPrivate.h
 create mode 100644 core/CatalogPrivate.h
 create mode 100644 core/CatalogUtilities.c
 create mode 100644 core/FileExtentMapping.c
 create mode 100644 core/FileIDsServices.c
 create mode 100644 core/FileMgrInternal.h
 create mode 100644 core/HFSUnicodeWrappers.h
 create mode 100644 core/MacOSStubs.c
 create mode 100644 core/UCStringCompareData.h
 create mode 100644 core/UnicodeWrappers.c
 create mode 100644 core/VolumeAllocation.c
 create mode 100644 core/hfs.h
 create mode 100644 core/hfs_alloc_trace.h
 create mode 100644 core/hfs_attrlist.c
 create mode 100644 core/hfs_attrlist.h
 create mode 100644 core/hfs_btreeio.c
 create mode 100644 core/hfs_btreeio.h
 create mode 100644 core/hfs_catalog.c
 create mode 100644 core/hfs_catalog.h
 create mode 100644 core/hfs_chash.c
 create mode 100644 core/hfs_cnode.c
 create mode 100644 core/hfs_cnode.h
 create mode 100644 core/hfs_cprotect.c
 create mode 100644 core/hfs_cprotect.h
 create mode 100644 core/hfs_dbg.h
 create mode 100644 core/hfs_endian.c
 create mode 100644 core/hfs_endian.h
 create mode 100644 core/hfs_extents.c
 create mode 100644 core/hfs_extents.h
 create mode 100644 core/hfs_format.h
 create mode 100644 core/hfs_fsctl.h
 create mode 100644 core/hfs_fsinfo.c
 create mode 100644 core/hfs_hotfiles.c
 create mode 100644 core/hfs_hotfiles.h
 create mode 100644 core/hfs_iokit.cpp
 create mode 100644 core/hfs_iokit.h
 create mode 100644 core/hfs_journal.c
 create mode 100644 core/hfs_journal.h
 create mode 100644 core/hfs_kdebug.h
 create mode 100644 core/hfs_link.c
 create mode 100644 core/hfs_lookup.c
 create mode 100644 core/hfs_macos_defs.h
 create mode 100644 core/hfs_mount.h
 create mode 100644 core/hfs_notification.c
 create mode 100644 core/hfs_quota.c
 create mode 100644 core/hfs_quota.h
 create mode 100644 core/hfs_readwrite.c
 create mode 100644 core/hfs_resize.c
 create mode 100644 core/hfs_search.c
 create mode 100644 core/hfs_unistr.h
 create mode 100644 core/hfs_vfsops.c
 create mode 100644 core/hfs_vfsutils.c
 create mode 100644 core/hfs_vnops.c
 create mode 100644 core/hfs_xattr.c
 create mode 100755 core/install
 create mode 100644 core/iphoneos-Info.plist
 create mode 100644 core/kext-config.h
 create mode 100644 core/kext.xcconfig
 create mode 100644 core/macosx-Info.plist
 create mode 100755 core/mk-root.sh
 create mode 100644 core/rangelist.c
 create mode 100644 core/rangelist.h
 delete mode 100755 make_opensource.sh

diff --git a/core/.open_source_exclude b/core/.open_source_exclude
new file mode 100644
index 0000000..0c36529
--- /dev/null
+++ b/core/.open_source_exclude
@@ -0,0 +1,2 @@
+hfs_key_roll.c
+hfs_key_roll.h
diff --git a/core/BTree.c b/core/BTree.c
new file mode 100644
index 0000000..cd7803d
--- /dev/null
+++ b/core/BTree.c
@@ -0,0 +1,2088 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTree.c
+
+	Contains:	Implementation of public interface routines for B-tree manager.
+
+	Version:	HFS Plus 1.0
+
+	Written by:	Gordon Sheridan and Bill Bruffey
+
+	Copyright:	(c) 1992-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(msd)	Mark Day
+		(DSH)	Deric Horn
+		(djb)	Don Brady
+
+	Change History (most recent first):
+	  <MOSXS>	 9/22/99	ser		Added routines  BTGetLastSync and BTSetLastSync
+	   <MOSXS>	  6/1/99	djb		Sync up with Mac OS 8.6.
+	   <MOSXS>	 6/30/98	djb		In BTOpenPath make sure nodes are contiguous on disk (radar #2249539).
+	   <MOSXS>	 4/15/98	djb		In BTOpenPath need to clear nodeRec.buffer if GetBlockProc fails.
+	   <MOSXS>	 4/11/98	djb		Add RequireFileLock checking to all external entry points.
+
+	   <MOSXS>	03/23/98	djb		In BTOpenPath use kTrashBlock option when releasing the header so
+	   								that we get a full node when we call GetNode. 
+
+	   <CS9>	12/12/97	djb		Radar #2202682, BTIterateRecord with kBTreeCurrentRecord was not
+									checking if we had a record and could call BlockMove with an
+									uninitialize source pointer (causing a bus error).
+	   <CS8>	10/24/97	msd		In BTIterateRecord, when moving to the previous or next record
+									and we have to move to another node, see if we need to release
+									the node about to be "shifted out" (opposite sibling of the
+									direction we need to move).
+	   <CS7>	 7/25/97	DSH		BTSearchRecord now takes a heuristicHint, nodeNum, and tries it
+									before calling SearchBTree
+	   <CS6>	 7/24/97	djb		GetBlockProc now take a file refnum instead of an FCB ptr.
+	   <CS5>	 7/22/97	djb		Move trace points from BTreeWrapper.c to here.
+	   <CS4>	 7/21/97	djb		LogEndTime now takes an error code.
+	   <CS3>	 7/16/97	DSH		FilesInternal.i renamed FileMgrInternal.i to avoid name
+									collision
+	   <CS2>	 5/19/97	djb		Add summary traces to BTIterateRecord.
+	   <CS1>	 4/23/97	djb		first checked in
+
+	  <HFS7>	 2/19/97	djb		Enable variable sized index keys for HFS+ volumes. Added node
+									cache to support nodes larger than 512 bytes.
+	  <HFS6>	 1/27/97	djb		Calls to InsertTree and DeleteTree are now recursive (to support
+									variable sized index keys).
+	  <HFS5>	 1/13/97	djb		Added support for getting current record to BTIterateRecord.
+	  <HFS4>	  1/6/97	djb		Initialize "BigKeys" attribute in BTOpen.
+	  <HFS3>	  1/3/97	djb		Added support for large keys.
+	  <HFS2>	12/23/96	djb		On exit map fsBTEmptyErr and fsBTEndOfIterationErr to
+									fsBTRecordNotFoundErr.
+	  <HFS1>	12/19/96	djb		first checked in
+
+	History applicable to original Scarecrow Design:
+
+		<13>	10/25/96	ser		Changing for new VFPI
+		<12>	10/18/96	ser		Converting over VFPI changes
+		<11>	 9/17/96	dkh		More BTree statistics. Modified hint checks to not bail out when
+									an error is returned from GetNode.
+		<10>	 9/16/96	dkh		Revised BTree statistics.
+		 <9>	 8/23/96	dkh		Remove checks for multiple paths to BTree file. Need to add
+									equivalent mechanism later.
+		 <8>	 6/20/96	dkh		Radar #1358740. Switch from using Pools to debug MemAllocators.
+		 <7>	 3/14/96	jev		Fix BTreeSetRecord, recordFound was not set for the case of a
+									simple replace causing the leafRecords count to get bumped even
+									though we didn't have to add a record.
+		 <6>	  3/1/96	prp		Fix lint problems. Bug in BTSetRecord that does not initialize
+									recordFound.
+		 <5>	 1/22/96	dkh		Add #include Memory.h
+		 <4>	 1/10/96	msd		Use the real function names from Math64.i.
+		 <3>	  1/4/96	jev		Fix BTItererateRecord for the condition when the iterator
+									position routine does not find the record and we are looking for
+									the next record. In such a case, if the node's forrward link is
+									non-zero, we have to keep iterating next and not return
+									fsBTEndOfIterationErr error.
+		 <2>	 12/7/95	dkh		D10E2 build. Changed usage of Ref data type to LogicalAddress.
+		 <1>	10/18/95	rst		Moved from Scarecrow project.
+
+		<24>	 7/18/95	mbb		Change MoveData & ClearBytes to BlockMoveData & BlockZero.
+		<23>	 1/31/95	prp		GetBlockProc interface uses a 64 bit node number.
+		<22>	 1/12/95	wjk		Adopt Model FileSystem changes in D5.
+		<21>	11/16/94	prp		Add IsItAHint routine and use it whenever hint's node number was
+									used for testing.
+		<20>	11/10/94	prp		BTGetInfo name collides with the same name in FileManagerPriv.i.
+									Change it to BTGetInformation.
+		<19>	 9/30/94	prp		Get in sync with D2 interface changes.
+		<18>	 7/22/94	wjk		Convert to the new set of header files.
+		<17>	 12/9/93	wjk		Cleanup usage of char, Byte, int8, UInt8, etc.
+		<16>	 12/2/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		<15>	11/30/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		<14>	 9/30/93	gs		Rename E_NoGetNodeProc and E_NoReleaseNodeProc to
+									E_NoXxxxBlockProc.
+		<13>	 8/31/93	prp		Use Set64U instead of Set64.
+		<12>	 8/16/93	prp		In BTSearchRecord, if the input hint found the node and record,
+									set the local nodeNum variable correctly so that the resultant
+									iterator gets set correctly.
+		<11>	  7/1/93	gs		Fix bug in BTIterateRecord related to kBTreePrevRecord
+									operation.
+		<10>	  6/2/93	gs		Update for changes to FSErrors.h and add some comments.
+		 <9>	 5/24/93	gs		Fix bug in BTInsert/Set/ReplaceRecord which didn't set node hint
+									properly in some cases.
+		 <8>	 5/24/93	gs		Do NOT map fsBTEmptyErr to fsBTRecordNotFoundErr in BTSearchRecord.
+		 <7>	 5/24/93	gs		Rename BTFlush to BTFlushPath.
+		 <6>	 5/21/93	gs		Add hint optimization to Set/Replace routines.
+		 <5>	 5/10/93	gs		Remove Panic from BTInitialize for small logicalEOF. Implement
+									Insert, Set, Replace, and Delete.
+		 <4>	 3/23/93	gs		Finish BTInitialize.
+		 <3>	  2/8/93	gs		Implement BTSearchRecord and BTIterateRecord.
+		 <2>	 12/8/92	gs		Implement Open and Close routines.
+		 <1>	11/15/92	gs		first checked in
+
+*/
+
+#include "BTreesPrivate.h"
+#include "hfs_btreeio.h"
+
+//////////////////////////////////// Globals ////////////////////////////////////
+
+
+/////////////////////////// BTree Module Entry Points ///////////////////////////
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTOpenPath	-	Open a file for access as a B*Tree.
+
+Function:	Create BTree control block for a file, if necessary. Validates the
+			file to be sure it looks like a BTree file.
+
+
+Input:		filePtr				- pointer to file to open as a B-tree
+			keyCompareProc		- pointer to client's KeyCompare function
+
+Result:		noErr				- success
+			paramErr			- required ptr was nil
+			fsBTInvalidFileErr				-
+			memFullErr			-
+			!= noErr			- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc)
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+	BTHeaderRec				*header;
+	NodeRec					nodeRec;
+
+	////////////////////// Preliminary Error Checking ///////////////////////////
+
+	if ( filePtr == nil )
+	{
+		return  paramErr;
+	}
+
+	/*
+	 * Subsequent opens allow key compare proc to be changed.
+	 */
+	if ( filePtr->fcbBTCBPtr != nil && keyCompareProc != nil) {
+		btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+		btreePtr->keyCompareProc = keyCompareProc;
+		return noErr;
+	}
+
+	if ( filePtr->fcbEOF < kMinNodeSize )
+		return fsBTInvalidFileErr;
+
+
+	//////////////////////// Allocate Control Block /////////////////////////////
+
+	btreePtr = hfs_mallocz(sizeof(BTreeControlBlock));
+
+	btreePtr->getBlockProc		= GetBTreeBlock;
+	btreePtr->releaseBlockProc	= ReleaseBTreeBlock;
+	btreePtr->setEndOfForkProc	= ExtendBTreeFile;
+	btreePtr->keyCompareProc	= keyCompareProc;
+
+	/////////////////////////// Read Header Node ////////////////////////////////
+
+	nodeRec.buffer				= nil;				// so we can call ReleaseNode
+	btreePtr->fileRefNum		= GetFileRefNumFromFCB(filePtr);
+	filePtr->fcbBTCBPtr			= (Ptr) btreePtr;	// attach btree cb to file
+
+	/* Prefer doing I/O a physical block at a time */
+	nodeRec.blockSize = VTOHFS(btreePtr->fileRefNum)->hfs_physical_block_size;
+
+	/* Start with the allocation block size for regular files. */
+	if (FTOC(filePtr)->c_fileid >= kHFSFirstUserCatalogNodeID)
+	{
+		nodeRec.blockSize = FCBTOVCB(filePtr)->blockSize;
+	}
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+	// it is now safe to call M_ExitOnError (err)
+
+	err = SetBTreeBlockSize (btreePtr->fileRefNum, nodeRec.blockSize, 1);
+	M_ExitOnError (err);
+
+
+	err = GetBTreeBlock(btreePtr->fileRefNum,
+						kHeaderNodeNum,
+						kGetBlock,
+						&nodeRec );
+	if (err != noErr)
+	{
+		nodeRec.buffer = nil;
+		nodeRec.blockHeader	= nil;
+		Panic("BTOpen: getNodeProc returned error getting header node.");
+		goto ErrorExit;
+	}
+	++btreePtr->numGetNodes;
+	header = (BTHeaderRec*) ((uintptr_t)nodeRec.buffer + sizeof(BTNodeDescriptor));
+
+
+	///////////////////////////// verify header /////////////////////////////////
+
+	err = VerifyHeader (filePtr, header);
+	M_ExitOnError (err);
+
+
+	///////////////////// Initalize fields from header //////////////////////////
+	
+    PanicIf ( (FCBTOVCB(filePtr)->vcbSigWord != 0x4244) && (header->nodeSize == 512), " BTOpenPath: wrong node size for HFS+ volume!");	// 0x4244 = 'BD'
+
+	btreePtr->treeDepth			= header->treeDepth;
+	btreePtr->rootNode			= header->rootNode;
+	btreePtr->leafRecords		= header->leafRecords;
+	btreePtr->firstLeafNode		= header->firstLeafNode;
+	btreePtr->lastLeafNode		= header->lastLeafNode;
+	btreePtr->nodeSize			= header->nodeSize;
+	btreePtr->maxKeyLength		= header->maxKeyLength;
+	btreePtr->totalNodes		= header->totalNodes;
+	btreePtr->freeNodes			= header->freeNodes;
+	if (FTOC(filePtr)->c_fileid >= kHFSFirstUserCatalogNodeID)
+		filePtr->ff_clumpsize = header->clumpSize;
+	btreePtr->btreeType			= header->btreeType;
+
+	btreePtr->keyCompareType = header->keyCompareType;
+
+	btreePtr->attributes		= header->attributes;
+
+	if ( btreePtr->maxKeyLength > 40 )
+		btreePtr->attributes |= (kBTBigKeysMask + kBTVariableIndexKeysMask);	// we need a way to save these attributes
+
+	/////////////////////// Initialize dynamic fields ///////////////////////////
+
+	btreePtr->version			= kBTreeVersion;
+	btreePtr->flags				= 0;
+	btreePtr->writeCount		= 1;
+
+	/////////////////////////// Check Header Node ///////////////////////////////
+
+	// set kBadClose attribute bit, and UpdateNode
+
+	/* b-tree node size must be at least as big as the logical block size */
+	if (btreePtr->nodeSize < VTOHFS(btreePtr->fileRefNum)->hfs_logical_block_size)
+	{
+		/*
+		 * If this tree has any records or the media is writeable then
+		 * we cannot mount using the current physical block size.
+		 */
+		if (btreePtr->leafRecords > 0 ||
+		    VTOHFS(btreePtr->fileRefNum)->hfs_flags & HFS_WRITEABLE_MEDIA)		
+		{
+			err = fsBTBadNodeSize;
+			goto ErrorExit;
+		}
+	}
+
+	/*
+	 * If the actual node size is different than the amount we read,
+	 * then release and trash this block, and re-read with the correct
+	 * node size.
+	 */
+	if ( btreePtr->nodeSize != nodeRec.blockSize )
+	{
+		err = SetBTreeBlockSize (btreePtr->fileRefNum, btreePtr->nodeSize, 32);
+		M_ExitOnError (err);
+
+		/*
+		 * Need to use kTrashBlock option to force the
+		 * buffer cache to read the entire node
+		 */
+		err = ReleaseBTreeBlock(btreePtr->fileRefNum, &nodeRec, kTrashBlock);
+		++btreePtr->numReleaseNodes;
+		M_ExitOnError (err);
+
+		err = GetNode (btreePtr, kHeaderNodeNum, 0, &nodeRec );
+		M_ExitOnError (err);
+	}
+
+	// total nodes * node size <= LEOF?
+
+
+	err = ReleaseNode (btreePtr, &nodeRec);
+	M_ExitOnError (err);
+
+	/*
+	 * Under Mac OS, b-tree nodes can be non-contiguous on disk when the
+	 * allocation block size is smaller than the b-tree node size.
+	 *
+	 * If journaling is turned on for this volume we can't deal with this
+	 * situation and so we bail out.  If journaling isn't on it's ok as
+	 * hfs_strategy_fragmented() deals with it.  Journaling can't support
+	 * this because it assumes that if you give it a block that it's
+	 * contiguous on disk.
+	 */
+	if ( FCBTOHFS(filePtr)->jnl && !NodesAreContiguous(FCBTOVCB(filePtr), filePtr, btreePtr->nodeSize) ) {
+		return fsBTInvalidNodeErr;
+	}
+
+	//////////////////////////////// Success ////////////////////////////////////
+
+	// align LEOF to multiple of node size?	- just on close
+
+	return noErr;
+
+
+	/////////////////////// Error - Clean up and Exit ///////////////////////////
+
+ErrorExit:
+
+	filePtr->fcbBTCBPtr = nil;
+	(void) ReleaseNode (btreePtr, &nodeRec);
+	hfs_free(btreePtr, sizeof(*btreePtr));
+
+	return err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTClosePath	-	Flush BTree Header and Deallocate Memory for BTree.
+
+Function:	Flush the BTreeControlBlock fields to header node, and delete BTree control
+			block and key descriptor associated with the file if filePtr is last
+			path of type kBTreeType ('btre').
+
+
+Input:		filePtr		- pointer to file to delete BTree control block for.
+
+Result:		noErr			- success
+			fsBTInvalidFileErr	-
+			!= noErr		- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	BTClosePath			(FCB					*filePtr)
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	if (btreePtr == nil)
+		return fsBTInvalidFileErr;
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+	////////////////////// Check for other BTree Paths //////////////////////////
+
+	btreePtr->attributes &= ~kBTBadCloseMask;		// clear "bad close" attribute bit
+	err = UpdateHeader (btreePtr, true);
+	M_ExitOnError (err);
+
+	hfs_free(btreePtr, sizeof(*btreePtr));
+	filePtr->fcbBTCBPtr = nil;
+
+	return	noErr;
+
+	/////////////////////// Error - Clean Up and Exit ///////////////////////////
+
+ErrorExit:
+
+	return	err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTSearchRecord	-	Search BTree for a record with a matching key.
+
+Function:	Search for position in B*Tree indicated by searchKey. If a valid node hint
+			is provided, it will be searched first, then SearchTree will be called.
+			If a BTreeIterator is provided, it will be set to the position found as
+			a result of the search. If a record exists at that position, and a BufferDescriptor
+			is supplied, the record will be copied to the buffer (as much as will fit),
+			and recordLen will be set to the length of the record.
+
+			If an error other than fsBTRecordNotFoundErr occurs, the BTreeIterator, if any,
+			is invalidated, and recordLen is set to 0.
+
+
+Input:		pathPtr			- pointer to path for BTree file.
+			searchKey		- pointer to search key to match.
+			hintPtr			- pointer to hint (may be nil)
+
+Output:		record			- pointer to BufferDescriptor containing record
+			recordLen		- length of data at recordPtr
+			iterator		- pointer to BTreeIterator indicating position result of search
+
+Result:		noErr			- success, record contains copy of record found
+			fsBTRecordNotFoundErr	- record was not found, no data copied
+			fsBTInvalidFileErr	- no BTreeControlBlock is allocated for the fork
+			fsBTInvalidKeyLengthErr		-
+			!= noErr		- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	BTSearchRecord		(FCB						*filePtr,
+								 BTreeIterator				*searchIterator,
+								 FSBufferDescriptor			*record,
+								 u_int16_t					*recordLen,
+								 BTreeIterator				*resultIterator )
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+	TreePathTable			treePathTable;
+	u_int32_t				nodeNum = 0;
+	BlockDescriptor			node;
+	u_int16_t				index = 0;
+	BTreeKeyPtr				keyPtr = NULL;
+	RecordPtr				recordPtr;
+	u_int16_t				len;
+	Boolean					foundRecord;
+	Boolean					validHint;
+
+	if (filePtr == nil) 
+	{
+		return	paramErr;
+	}
+
+	if (searchIterator == nil) 
+	{
+		return	paramErr;
+	}
+
+	node.buffer = nil;
+	node.blockHeader = nil;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil) 
+	{
+		return	fsBTInvalidFileErr;
+	}
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	foundRecord = false;
+
+	////////////////////////////// Take A Hint //////////////////////////////////
+
+	err = IsItAHint (btreePtr, searchIterator, &validHint);
+	M_ExitOnError (err);
+
+	if (validHint)
+	{
+		nodeNum = searchIterator->hint.nodeNum;
+		
+		err = GetNode (btreePtr, nodeNum, kGetNodeHint, &node);
+		if( err == noErr )
+		{
+			if ( ((BTNodeDescriptor*) node.buffer)->kind == kBTLeafNode &&
+				 ((BTNodeDescriptor*) node.buffer)->numRecords	>  0 )
+			{
+				foundRecord = SearchNode (btreePtr, node.buffer, &searchIterator->key, &index);
+
+				// if !foundRecord, we could still skip tree search if ( 0 < index < numRecords )
+			}
+
+			if (foundRecord == false)
+			{
+				err = ReleaseNode (btreePtr, &node);
+				M_ExitOnError (err);
+			}
+			else
+			{
+				++btreePtr->numValidHints;
+			}
+		}
+		
+		if( foundRecord == false )
+			(void) BTInvalidateHint( searchIterator );
+	}
+
+
+	//////////////////////////// Search The Tree ////////////////////////////////
+
+	if (foundRecord == false)
+	{
+		err = SearchTree ( btreePtr, &searchIterator->key, treePathTable, &nodeNum, &node, &index);
+		switch (err)
+		{
+			case noErr:			
+				foundRecord = true;				
+				break;
+			case fsBTRecordNotFoundErr:
+				break;
+			default:
+				goto ErrorExit;
+		}
+	}
+
+
+	//////////////////////////// Get the Record /////////////////////////////////
+
+	if (foundRecord == true)
+	{
+		//XXX Should check for errors! Or BlockMove could choke on recordPtr!!!
+		GetRecordByIndex (btreePtr, node.buffer, index, &keyPtr, &recordPtr, &len);
+
+		if (recordLen != nil)			*recordLen = len;
+
+		if (record != nil)
+		{
+			ByteCount recordSize;
+
+			recordSize = record->itemCount * record->itemSize;
+			
+			if (len > recordSize)	len = recordSize;
+
+			BlockMoveData (recordPtr, record->bufferAddress, len);
+		}
+	}
+
+
+	/////////////////////// Success - Update Iterator ///////////////////////////
+
+	if (resultIterator != nil)
+	{
+		if (foundRecord) {
+			resultIterator->hint.writeCount	= btreePtr->writeCount;
+			resultIterator->hint.nodeNum = nodeNum;
+			resultIterator->hint.index = index;
+		}
+#if DEBUG
+		resultIterator->hint.reserved1 = 0;
+		resultIterator->hint.reserved2 = 0;
+		resultIterator->version = 0;
+		resultIterator->reserved = 0;
+#endif
+		// copy the key in the BTree when found rather than searchIterator->key to get proper case/diacriticals
+		if (foundRecord == true)
+			BlockMoveData ((Ptr)keyPtr, (Ptr)&resultIterator->key, CalcKeySize(btreePtr, keyPtr));
+		else
+			BlockMoveData ((Ptr)&searchIterator->key, (Ptr)&resultIterator->key, CalcKeySize(btreePtr, &searchIterator->key));
+	}
+
+	err = ReleaseNode (btreePtr, &node);
+	M_ExitOnError (err);
+
+	if (foundRecord == false)	return	fsBTRecordNotFoundErr;
+	else						return	noErr;
+
+
+	/////////////////////// Error - Clean Up and Exit ///////////////////////////
+
+ErrorExit:
+
+	if (recordLen != nil)
+		*recordLen = 0;
+
+	if (resultIterator != nil)
+	{
+		resultIterator->hint.writeCount	= 0;
+		resultIterator->hint.nodeNum	= 0;
+		resultIterator->hint.index		= 0;
+		resultIterator->hint.reserved1	= 0;
+		resultIterator->hint.reserved2	= 0;
+
+		resultIterator->version			= 0;
+		resultIterator->reserved		= 0;
+		resultIterator->key.length16	= 0;	// zero out two bytes to cover both types of keys
+	}
+
+	if ( err == fsBTEmptyErr )
+		err = fsBTRecordNotFoundErr;
+
+	return err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTIterateRecord	-	Find the first, next, previous, or last record.
+
+Function:	Find the first, next, previous, or last record in the BTree
+
+Input:		pathPtr			- pointer to path iterate records for.
+			operation		- iteration operation (first,next,prev,last)
+			iterator		- pointer to iterator indicating start position
+
+Output:		iterator		- iterator is updated to indicate new position
+			newKeyPtr		- pointer to buffer to copy key found by iteration
+			record			- pointer to buffer to copy record found by iteration
+			recordLen		- length of record
+
+Result:		noErr			- success
+			!= noErr		- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	BTIterateRecord		(FCB						*filePtr,
+								 BTreeIterationOperation	 operation,
+								 BTreeIterator				*iterator,
+								 FSBufferDescriptor			*record,
+								 u_int16_t					*recordLen )
+{
+	OSStatus					err;
+	BTreeControlBlockPtr		btreePtr;
+	BTreeKeyPtr					keyPtr;
+	RecordPtr					recordPtr;
+	u_int16_t					len;
+
+	Boolean						foundRecord;
+	u_int32_t					nodeNum;
+
+	BlockDescriptor				left,		node,		right;
+	u_int16_t					index;
+
+
+	////////////////////////// Priliminary Checks ///////////////////////////////
+
+	left.buffer		  = nil;
+	left.blockHeader  = nil;
+	right.buffer	  = nil;
+	right.blockHeader = nil;
+	node.buffer		  = nil;
+	node.blockHeader  = nil;
+
+
+	if (filePtr == nil)
+	{
+		return	paramErr;
+	}
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil)
+	{
+		return	fsBTInvalidFileErr;			// handle properly
+	}
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	if ((operation != kBTreeFirstRecord)	&&
+		(operation != kBTreeNextRecord)		&&
+		(operation != kBTreeCurrentRecord)	&&
+		(operation != kBTreePrevRecord)		&&
+		(operation != kBTreeLastRecord))
+	{
+		err = fsInvalidIterationMovmentErr;
+		goto ErrorExit;
+	}
+
+	/////////////////////// Find First or Last Record ///////////////////////////
+
+	if ((operation == kBTreeFirstRecord) || (operation == kBTreeLastRecord))
+	{
+		if (operation == kBTreeFirstRecord)		nodeNum = btreePtr->firstLeafNode;
+		else									nodeNum = btreePtr->lastLeafNode;
+
+		if (nodeNum == 0)
+		{
+			err = fsBTEmptyErr;
+			goto ErrorExit;
+		}
+
+		err = GetNode (btreePtr, nodeNum, 0, &node);
+		M_ExitOnError (err);
+
+		if ( ((NodeDescPtr) node.buffer)->kind != kBTLeafNode ||
+			 ((NodeDescPtr) node.buffer)->numRecords <=  0 )
+		{
+			err = ReleaseNode (btreePtr, &node);
+			M_ExitOnError (err);
+
+			err = fsBTInvalidNodeErr;
+			printf ("hfs: BTIterateRecord() found invalid btree node on volume %s\n", FCBTOVCB(filePtr)->vcbVN);
+			hfs_mark_inconsistent(FCBTOVCB(filePtr), HFS_INCONSISTENCY_DETECTED);
+			goto ErrorExit;
+		}
+
+		if (operation == kBTreeFirstRecord)		index = 0;
+		else									index = ((BTNodeDescriptor*) node.buffer)->numRecords - 1;
+
+		goto CopyData;						// is there a cleaner way?
+	}
+
+
+	//////////////////////// Find Iterator Position /////////////////////////////
+
+	// Not called for (operation == kBTreeFirstRecord || operation == kBTreeLastRecord)
+	err = FindIteratorPosition (btreePtr, iterator,
+								&left, &node, &right, &nodeNum, &index, &foundRecord);
+	M_ExitOnError (err);
+
+
+	///////////////////// Find Next Or Previous Record //////////////////////////
+
+	if (operation == kBTreePrevRecord)
+	{
+		if (index > 0)
+		{
+			--index;
+		}
+		else
+		{
+			if (left.buffer == nil)
+			{
+				nodeNum = ((NodeDescPtr) node.buffer)->bLink;
+				if ( nodeNum > 0)
+				{
+					// BTree nodes are always grabbed in left to right order.  
+					// Therefore release the current node before looking up the 
+					// left node.
+					err = ReleaseNode(btreePtr, &node);
+					M_ExitOnError(err);
+
+					// Look up the left node 
+					err = GetNode (btreePtr, nodeNum, 0, &left);
+					M_ExitOnError (err);
+
+					// Look up the current node again
+					err = GetRightSiblingNode (btreePtr, left.buffer, &node);
+					M_ExitOnError (err);
+				} else {
+					err = fsBTStartOfIterationErr;
+					goto ErrorExit;
+				}
+			}
+			//	Before we stomp on "right", we'd better release it if needed
+			if (right.buffer != nil) {
+				err = ReleaseNode(btreePtr, &right);
+				M_ExitOnError(err);
+			}
+			right		= node;
+			node		= left;
+			left.buffer	= nil;
+			index 		= ((NodeDescPtr) node.buffer)->numRecords -1;
+		}
+	}
+	else if (operation == kBTreeNextRecord)
+	{
+		if ((foundRecord != true) &&
+			(((NodeDescPtr) node.buffer)->fLink == 0) &&
+			(index == ((NodeDescPtr) node.buffer)->numRecords))
+		{
+			err = fsBTEndOfIterationErr;
+			goto ErrorExit;
+		} 
+	
+		// we did not find the record but the index is already positioned correctly
+		if ((foundRecord == false) && (index != ((NodeDescPtr) node.buffer)->numRecords)) 
+			goto CopyData;
+
+		// we found the record OR we have to look in the next node
+		if (index < ((NodeDescPtr) node.buffer)->numRecords -1)
+		{
+			++index;
+		}
+		else
+		{
+			if (right.buffer == nil)
+			{
+				nodeNum = ((NodeDescPtr) node.buffer)->fLink;
+				if ( nodeNum > 0)
+				{
+					err = GetNode (btreePtr, nodeNum, 0, &right);
+					M_ExitOnError (err);
+				} else {
+					err = fsBTEndOfIterationErr;
+					goto ErrorExit;
+				}
+			}
+			//	Before we stomp on "left", we'd better release it if needed
+			if (left.buffer != nil) {
+				err = ReleaseNode(btreePtr, &left);
+				M_ExitOnError(err);
+			}
+			left		 = node;
+			node		 = right;
+			right.buffer = nil;
+			index		 = 0;
+		}
+	}
+	else // operation == kBTreeCurrentRecord
+	{
+		// make sure we have something... <CS9>
+		if ((foundRecord != true) &&
+			(index >= ((NodeDescPtr) node.buffer)->numRecords))
+		{
+			err = fsBTEndOfIterationErr;
+			goto ErrorExit;
+		} 
+	}
+
+	//////////////////// Copy Record And Update Iterator ////////////////////////
+
+CopyData:
+
+	// added check for errors <CS9>
+	err = GetRecordByIndex (btreePtr, node.buffer, index, &keyPtr, &recordPtr, &len);
+	M_ExitOnError (err);
+
+	if (recordLen != nil)
+		*recordLen = len;
+
+	if (record != nil)
+	{
+		ByteCount recordSize;
+
+		recordSize = record->itemCount * record->itemSize;
+	
+		if (len > recordSize)	len = recordSize;
+
+		BlockMoveData (recordPtr, record->bufferAddress, len);
+	}
+
+	if (iterator != nil)						// first & last do not require iterator
+	{
+		iterator->hint.writeCount	= btreePtr->writeCount;
+		iterator->hint.nodeNum		= nodeNum;
+		iterator->hint.index		= index;
+		iterator->hint.reserved1	= 0;
+		iterator->hint.reserved2	= 0;
+
+		iterator->version			= 0;
+		iterator->reserved			= 0;
+		
+		/* SER
+		 * Check for infinite loops by making sure we do not
+		 * process more leaf records, than can possibly be (or the BTree header
+		 * is seriously damaged)....a brute force method.
+		 */
+		if ((operation == kBTreeFirstRecord) || (operation == kBTreeLastRecord))
+			iterator->hitCount		= 1;
+		else if (operation != kBTreeCurrentRecord)
+			iterator->hitCount		+= 1;
+		/* Always use the highest max, in case the grows while iterating */
+		iterator->maxLeafRecs		= max(btreePtr->leafRecords, iterator->maxLeafRecs);
+		
+#if 0
+		if (iterator->hitCount > iterator->maxLeafRecs + kNumLeafRecSlack)
+		{
+			err = fsBTInvalidNodeErr;
+			printf ("hfs: BTIterateRecord() found invalid btree node on volume %s\n", FCBTOVCB(filePtr)->vcbVN);
+			hfs_mark_inconsistent(FCBTOVCB(filePtr), HFS_INCONSISTENCY_DETECTED);
+			goto ErrorExit;
+		}
+#endif
+		
+		BlockMoveData ((Ptr)keyPtr, (Ptr)&iterator->key, CalcKeySize(btreePtr, keyPtr));
+	}
+
+
+	///////////////////////////// Release Nodes /////////////////////////////////
+
+	err = ReleaseNode (btreePtr, &node);
+	M_ExitOnError (err);
+
+	if (left.buffer != nil)
+	{
+		err = ReleaseNode (btreePtr, &left);
+		M_ExitOnError (err);
+	}
+
+	if (right.buffer != nil)
+	{
+		err = ReleaseNode (btreePtr, &right);
+		M_ExitOnError (err);
+	}
+
+	return noErr;
+
+	/////////////////////// Error - Clean Up and Exit ///////////////////////////
+
+ErrorExit:
+
+	(void)	ReleaseNode (btreePtr, &left);
+	(void)	ReleaseNode (btreePtr, &node);
+	(void)	ReleaseNode (btreePtr, &right);
+
+	if (recordLen != nil)
+		*recordLen = 0;
+
+	if (iterator != nil)
+	{
+		iterator->hint.writeCount	= 0;
+		iterator->hint.nodeNum		= 0;
+		iterator->hint.index		= 0;
+		iterator->hint.reserved1	= 0;
+		iterator->hint.reserved2	= 0;
+
+		iterator->version			= 0;
+		iterator->reserved			= 0;
+		iterator->key.length16		= 0;
+	}
+
+	if ( err == fsBTEmptyErr || err == fsBTEndOfIterationErr )
+		err = fsBTRecordNotFoundErr;
+
+	return err;
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTIterateRecords
+
+Function:	Find a series of records
+
+Input:		filePtr		- b-tree file
+		operation	- iteration operation (first,next,prev,last)
+		iterator	- pointer to iterator indicating start position
+		callBackProc	- pointer to routince to process a record
+		callBackState	- pointer to state data (used by callBackProc)
+
+Output:		iterator	- iterator is updated to indicate new position
+
+Result:		noErr		- success
+		!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus
+BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator *iterator,
+		 IterateCallBackProcPtr	 callBackProc, void * callBackState)
+{
+	OSStatus		err;
+	BTreeControlBlockPtr	btreePtr;
+	BTreeKeyPtr		keyPtr;
+	RecordPtr		recordPtr;
+	u_int16_t		len;
+	Boolean			foundRecord;
+	u_int32_t		nodeNum;
+	BlockDescriptor		left, node, right;
+	u_int16_t		index;
+
+
+	////////////////////////// Priliminary Checks ///////////////////////////////
+
+	left.buffer       = nil;
+	left.blockHeader  = nil;
+	right.buffer      = nil;
+	right.blockHeader = nil;
+	node.buffer       = nil;
+	node.blockHeader  = nil;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	if ((operation != kBTreeFirstRecord)	&&
+		(operation != kBTreeNextRecord)		&&
+		(operation != kBTreeCurrentRecord)	&&
+		(operation != kBTreePrevRecord)		&&
+		(operation != kBTreeLastRecord))
+	{
+		err = fsInvalidIterationMovmentErr;
+		goto ErrorExit;
+	}
+
+	/////////////////////// Find First or Last Record ///////////////////////////
+
+	if ((operation == kBTreeFirstRecord) || (operation == kBTreeLastRecord))
+	{
+		if (operation == kBTreeFirstRecord)
+			nodeNum = btreePtr->firstLeafNode;
+		else
+			nodeNum = btreePtr->lastLeafNode;
+
+		if (nodeNum == 0)
+		{
+			err = fsBTEmptyErr;
+			goto ErrorExit;
+		}
+
+		err = GetNode(btreePtr, nodeNum, 0, &node);
+		M_ExitOnError(err);
+
+		if ( ((NodeDescPtr)node.buffer)->kind != kBTLeafNode ||
+			 ((NodeDescPtr)node.buffer)->numRecords <=  0 )
+		{
+			err = ReleaseNode(btreePtr, &node);
+			M_ExitOnError(err);
+
+			err = fsBTInvalidNodeErr;
+			printf ("hfs: BTIterateRecords() found invalid btree node on volume %s\n", FCBTOVCB(filePtr)->vcbVN);
+			hfs_mark_inconsistent(FCBTOVCB(filePtr), HFS_INCONSISTENCY_DETECTED);
+			goto ErrorExit;
+		}
+
+		if (operation == kBTreeFirstRecord)
+			index = 0;
+		else
+			index = ((BTNodeDescriptor*) node.buffer)->numRecords - 1;
+
+		goto ProcessData;
+	}
+
+	//////////////////////// Find Iterator Position /////////////////////////////
+
+	// Not called for (operation == kBTreeFirstRecord || operation == kBTreeLastRecord)
+	err = FindIteratorPosition(btreePtr, iterator, &left, &node, &right,
+				   &nodeNum, &index, &foundRecord);
+	if (err == fsBTRecordNotFoundErr)
+		err = 0;
+	M_ExitOnError(err);
+
+
+	///////////////////// Find Next Or Previous Record //////////////////////////
+
+	if (operation == kBTreePrevRecord)
+	{
+		if (index > 0)
+		{
+			--index;
+		}
+		else
+		{
+			if (left.buffer == nil)
+			{
+				nodeNum = ((NodeDescPtr) node.buffer)->bLink;
+				if ( nodeNum > 0)
+				{
+					// BTree nodes are always grabbed in left to right order.  
+					// Therefore release the current node before looking up the 
+					// left node.
+					err = ReleaseNode(btreePtr, &node);
+					M_ExitOnError(err);
+
+					// Look up the left node 
+					err = GetNode (btreePtr, nodeNum, 0, &left);
+					M_ExitOnError (err);
+
+					// Look up the current node again
+					err = GetRightSiblingNode (btreePtr, left.buffer, &node);
+					M_ExitOnError (err);
+				} else {
+					err = fsBTStartOfIterationErr;
+					goto ErrorExit;
+				}
+			}
+			// Before we stomp on "right", we'd better release it if needed
+			if (right.buffer != nil) {
+				err = ReleaseNode(btreePtr, &right);
+				M_ExitOnError(err);
+			}
+			right	    = node;
+			node	    = left;
+			left.buffer = nil;
+			index	    = ((NodeDescPtr) node.buffer)->numRecords -1;
+		}
+	}
+	else if (operation == kBTreeNextRecord)
+	{
+		if ((foundRecord != true) &&
+			(((NodeDescPtr)node.buffer)->fLink == 0) &&
+			(index == ((NodeDescPtr)node.buffer)->numRecords))
+		{
+			err = fsBTEndOfIterationErr;
+			goto ErrorExit;
+		} 
+	
+		// we did not find the record but the index is already positioned correctly
+		if ((foundRecord == false) && (index != ((NodeDescPtr)node.buffer)->numRecords)) 
+			goto ProcessData;
+
+		// we found the record OR we have to look in the next node
+		if (index < ((NodeDescPtr)node.buffer)->numRecords -1)
+		{
+			++index;
+		}
+		else
+		{
+			if (right.buffer == nil)
+			{
+				nodeNum = ((NodeDescPtr)node.buffer)->fLink;
+				if ( nodeNum > 0)
+				{
+					err = GetNode(btreePtr, nodeNum, 0, &right);
+					M_ExitOnError(err);
+				} else {
+					err = fsBTEndOfIterationErr;
+					goto ErrorExit;
+				}
+			}
+			// Before we stomp on "left", we'd better release it if needed
+			if (left.buffer != nil) {
+				err = ReleaseNode(btreePtr, &left);
+				M_ExitOnError(err);
+			}
+			left	     = node;
+			node	     = right;
+			right.buffer = nil;
+			index	     = 0;
+		}
+	}
+	else // operation == kBTreeCurrentRecord
+	{
+		// make sure we have something... <CS9>
+		if ((foundRecord != true) &&
+			(index >= ((NodeDescPtr)node.buffer)->numRecords))
+		{
+			err = fsBTEndOfIterationErr;
+			goto ErrorExit;
+		} 
+	}
+
+	////////////////////  Process Records Using Callback  ////////////////////////
+
+ProcessData:
+	err = GetRecordByIndex(btreePtr, node.buffer, index, &keyPtr, &recordPtr, &len);
+	if (err) {
+		err = btBadNode;
+		goto ErrorExit;
+	}
+	
+	while (err == 0) {
+		if (callBackProc(keyPtr, recordPtr, callBackState) == 0)
+			break;
+		
+		if ((index+1) < ((NodeDescPtr)node.buffer)->numRecords) {
+			++index;
+		} else {
+			if (right.buffer == nil)
+			{
+				nodeNum = ((NodeDescPtr)node.buffer)->fLink;
+				if ( nodeNum > 0)
+				{
+					err = GetNode(btreePtr, nodeNum, 0, &right);
+					M_ExitOnError(err);
+				} else {
+					err = fsBTEndOfIterationErr;
+					break;
+				}
+			}
+			// Before we stomp on "left", we'd better release it if needed
+			if (left.buffer != nil) {
+				err = ReleaseNode(btreePtr, &left);
+				M_ExitOnError(err);
+			}
+			left	     = node;
+			node	     = right;
+			right.buffer = nil;
+			index	     = 0;
+		}
+		err = GetRecordByIndex(btreePtr, node.buffer, index,
+						&keyPtr, &recordPtr, &len);
+		if (err) {
+			err = btBadNode;
+			goto ErrorExit;
+		}
+	}
+
+
+	///////////////// Update Iterator to Last Item Processed /////////////////////
+
+
+	if (iterator != nil)	// first & last have optional iterator
+	{
+		iterator->hint.writeCount = btreePtr->writeCount;
+		iterator->hint.nodeNum	  = nodeNum;
+		iterator->hint.index	  = index;
+		iterator->version	  = 0;
+
+		BlockMoveData((Ptr)keyPtr, (Ptr)&iterator->key, CalcKeySize(btreePtr, keyPtr));
+	}
+	M_ExitOnError(err);
+
+
+	///////////////////////////// Release Nodes /////////////////////////////////
+
+	err = ReleaseNode(btreePtr, &node);
+	M_ExitOnError(err);
+
+	if (left.buffer != nil)
+	{
+		err = ReleaseNode(btreePtr, &left);
+		M_ExitOnError(err);
+	}
+
+	if (right.buffer != nil)
+	{
+		err = ReleaseNode(btreePtr, &right);
+		M_ExitOnError(err);
+	}
+
+	return noErr;
+
+	/////////////////////// Error - Clean Up and Exit ///////////////////////////
+
+ErrorExit:
+
+	(void) ReleaseNode(btreePtr, &left);
+	(void) ReleaseNode(btreePtr, &node);
+	(void) ReleaseNode(btreePtr, &right);
+
+	if (iterator != nil)
+	{
+		iterator->hint.writeCount = 0;
+		iterator->hint.nodeNum	  = 0;
+		iterator->hint.index	  = 0;
+		iterator->version	  = 0;
+		iterator->key.length16	  = 0;
+	}
+
+	if ( err == fsBTEmptyErr || err == fsBTEndOfIterationErr )
+		err = fsBTRecordNotFoundErr;
+
+	return err;
+}
+
+
+//////////////////////////////// BTInsertRecord /////////////////////////////////
+
+OSStatus	BTInsertRecord		(FCB						*filePtr,
+								 BTreeIterator				*iterator,
+								 FSBufferDescriptor			*record,
+								 u_int16_t					 recordLen )
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+	TreePathTable			treePathTable;
+	u_int32_t			nodesNeeded;
+	BlockDescriptor			nodeRec;
+	u_int32_t				insertNodeNum;
+	u_int16_t				index;
+	Boolean					recordFit;
+
+	////////////////////////// Priliminary Checks ///////////////////////////////
+
+	nodeRec.buffer = nil;					// so we can call ReleaseNode
+	nodeRec.blockHeader = nil;
+
+	err = CheckInsertParams (filePtr, iterator, record, recordLen);
+	if (err != noErr)
+		return	err;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+
+	///////////////////////// Find Insert Position //////////////////////////////
+
+	// always call SearchTree for Insert
+	err = SearchTree (btreePtr, &iterator->key, treePathTable, &insertNodeNum, &nodeRec, &index);
+
+	switch (err)				// set/replace/insert decision point
+	{
+		case noErr:			err = fsBTDuplicateRecordErr;
+								goto ErrorExit;
+
+		case fsBTRecordNotFoundErr:	break;
+
+		case fsBTEmptyErr:	// if tree empty add 1st leaf node
+
+								if (btreePtr->freeNodes == 0)
+								{
+									err = ExtendBTree (btreePtr, btreePtr->totalNodes + 1);
+									M_ExitOnError (err);
+								}
+
+								err = AllocateNode (btreePtr, &insertNodeNum);
+								M_ExitOnError (err);
+
+								err = GetNewNode (btreePtr, insertNodeNum, &nodeRec);
+								M_ExitOnError (err);
+
+								// XXXdbg
+								ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+								
+								((NodeDescPtr)nodeRec.buffer)->kind = kBTLeafNode;
+								((NodeDescPtr)nodeRec.buffer)->height	= 1;
+
+								recordFit = InsertKeyRecord (btreePtr, nodeRec.buffer, 0,
+															 &iterator->key, KeyLength(btreePtr, &iterator->key),
+															 record->bufferAddress, recordLen );
+								if (recordFit != true)
+								{
+									err = fsBTRecordTooLargeErr;
+									goto ErrorExit;
+								}
+
+								/*
+								 * Update the B-tree control block.  Do this before
+								 * calling UpdateNode since it will compare the node's
+								 * height with treeDepth.
+								 */
+								btreePtr->treeDepth	 		= 1;
+								btreePtr->rootNode	 		= insertNodeNum;
+								btreePtr->firstLeafNode		= insertNodeNum;
+								btreePtr->lastLeafNode		= insertNodeNum;
+
+								err = UpdateNode (btreePtr, &nodeRec, 0, kLockTransaction);
+								M_ExitOnError (err);
+
+								M_BTreeHeaderDirty (btreePtr);
+
+								goto Success;
+
+		default:				goto ErrorExit;
+	}
+
+	if (index > 0)
+	{
+		// XXXdbg
+		ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+								
+		recordFit = InsertKeyRecord (btreePtr, nodeRec.buffer, index,
+										&iterator->key, KeyLength(btreePtr, &iterator->key),
+										record->bufferAddress, recordLen);
+		if (recordFit == true)
+		{
+			err = UpdateNode (btreePtr, &nodeRec, 0, kLockTransaction);
+			M_ExitOnError (err);
+
+			goto Success;
+		}
+	}
+
+	/////////////////////// Extend File If Necessary ////////////////////////////
+
+	if ((btreePtr->treeDepth + 1UL) > btreePtr->freeNodes)
+	{
+		nodesNeeded = btreePtr->treeDepth + 1 + btreePtr->totalNodes - btreePtr->freeNodes;
+		if (nodesNeeded > CalcMapBits (btreePtr))	// we'll need to add a map node too!
+			++nodesNeeded;
+
+		err = ExtendBTree (btreePtr, nodesNeeded);
+		M_ExitOnError (err);
+	}
+
+	// no need to delete existing record
+
+	err = InsertTree (btreePtr, treePathTable, &iterator->key, record->bufferAddress,
+					  recordLen, &nodeRec, index, 1, kInsertRecord, &insertNodeNum);
+	M_ExitOnError (err);
+
+
+	//////////////////////////////// Success ////////////////////////////////////
+
+Success:
+	++btreePtr->writeCount;
+	++btreePtr->leafRecords;
+	M_BTreeHeaderDirty (btreePtr);
+		
+	// create hint
+	iterator->hint.writeCount 	= btreePtr->writeCount;
+	iterator->hint.nodeNum		= insertNodeNum;
+	iterator->hint.index		= 0;						// unused
+	iterator->hint.reserved1	= 0;
+	iterator->hint.reserved2	= 0;
+
+	return noErr;
+
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+
+	(void) ReleaseNode (btreePtr, &nodeRec);
+
+	iterator->hint.writeCount 	= 0;
+	iterator->hint.nodeNum		= 0;
+	iterator->hint.index		= 0;
+	iterator->hint.reserved1	= 0;
+	iterator->hint.reserved2	= 0;
+	
+	if (err == fsBTEmptyErr)
+		err = fsBTRecordNotFoundErr;
+
+	return err;
+}
+
+
+//////////////////////////////// BTReplaceRecord ////////////////////////////////
+
+OSStatus	BTReplaceRecord		(FCB						*filePtr,
+								 BTreeIterator				*iterator,
+								 FSBufferDescriptor			*record,
+								 u_int16_t					 recordLen )
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+	TreePathTable			treePathTable;
+	u_int32_t			nodesNeeded;
+	BlockDescriptor			nodeRec;
+	u_int32_t				insertNodeNum;
+	u_int16_t				index;
+	Boolean					recordFit;
+	Boolean					validHint;
+
+
+	////////////////////////// Priliminary Checks ///////////////////////////////
+
+	nodeRec.buffer = nil;					// so we can call ReleaseNode
+	nodeRec.blockHeader = nil;
+
+	err = CheckInsertParams (filePtr, iterator, record, recordLen);
+	if (err != noErr)
+		return err;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+	////////////////////////////// Take A Hint //////////////////////////////////
+
+	err = IsItAHint (btreePtr, iterator, &validHint);
+	M_ExitOnError (err);
+
+	if (validHint)
+	{
+		insertNodeNum = iterator->hint.nodeNum;
+
+		err = GetNode (btreePtr, insertNodeNum, kGetNodeHint, &nodeRec);
+		if( err == noErr )
+		{
+			// XXXdbg
+			ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+								
+			err = TrySimpleReplace (btreePtr, nodeRec.buffer, iterator, record, recordLen, &recordFit);
+			M_ExitOnError (err);
+
+			if (recordFit)
+			{
+				err = UpdateNode (btreePtr, &nodeRec, 0, 0);
+				M_ExitOnError (err);
+
+				++btreePtr->numValidHints;
+
+				goto Success;
+			}
+			else
+			{
+				(void) BTInvalidateHint( iterator );
+			}
+			
+			err = ReleaseNode (btreePtr, &nodeRec);
+			M_ExitOnError (err);
+		}
+		else
+		{
+			(void) BTInvalidateHint( iterator );
+		}
+	}
+
+
+	////////////////////////////// Get A Clue ///////////////////////////////////
+
+	err = SearchTree (btreePtr, &iterator->key, treePathTable, &insertNodeNum, &nodeRec, &index);
+	M_ExitOnError (err);					// record must exit for Replace
+
+	// optimization - if simple replace will work then don't extend btree
+	//  if we tried this before, and failed because it wouldn't fit then we shouldn't try this again...
+
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+
+	err = TrySimpleReplace (btreePtr, nodeRec.buffer, iterator, record, recordLen, &recordFit);
+	M_ExitOnError (err);
+
+	if (recordFit)
+	{
+		err = UpdateNode (btreePtr, &nodeRec, 0, 0);
+		M_ExitOnError (err);
+
+		goto Success;
+	}
+
+
+	//////////////////////////// Make Some Room /////////////////////////////////
+
+	if ((btreePtr->treeDepth + 1UL) > btreePtr->freeNodes)
+	{
+		nodesNeeded = btreePtr->treeDepth + 1 + btreePtr->totalNodes - btreePtr->freeNodes;
+		if (nodesNeeded > CalcMapBits (btreePtr))	// we'll need to add a map node too!
+			++nodesNeeded;
+
+		err = ExtendBTree (btreePtr, nodesNeeded);
+		M_ExitOnError (err);
+	}
+
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+								
+	DeleteRecord (btreePtr, nodeRec.buffer, index);	// delete existing key/record
+
+	err = InsertTree (btreePtr, treePathTable, &iterator->key, record->bufferAddress,
+					  recordLen, &nodeRec, index, 1, kReplaceRecord, &insertNodeNum);
+	M_ExitOnError (err);
+
+	++btreePtr->writeCount;	/* writeCount changes only if the tree structure changed */
+
+Success:
+	// create hint
+	iterator->hint.writeCount 	= btreePtr->writeCount;
+	iterator->hint.nodeNum		= insertNodeNum;
+	iterator->hint.index		= 0;						// unused
+	iterator->hint.reserved1	= 0;
+	iterator->hint.reserved2	= 0;
+
+	return noErr;
+
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+
+	(void) ReleaseNode (btreePtr, &nodeRec);
+
+	iterator->hint.writeCount 	= 0;
+	iterator->hint.nodeNum		= 0;
+	iterator->hint.index		= 0;
+	iterator->hint.reserved1	= 0;
+	iterator->hint.reserved2	= 0;
+
+	return err;
+}
+
+
+
+//////////////////////////////// BTUpdateRecord ////////////////////////////////
+
+OSStatus
+BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator,
+               IterateCallBackProcPtr callBackProc, void * callBackState)
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+	TreePathTable			treePathTable;
+	BlockDescriptor			nodeRec;
+	RecordPtr				recordPtr;
+	BTreeKeyPtr				keyPtr;
+	u_int32_t				insertNodeNum;
+	u_int16_t				recordLen;
+	u_int16_t				index;
+	Boolean					validHint;
+
+
+	////////////////////////// Priliminary Checks ///////////////////////////////
+
+	nodeRec.buffer = nil;					// so we can call ReleaseNode
+	nodeRec.blockHeader = nil;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	////////////////////////////// Take A Hint //////////////////////////////////
+
+	err = IsItAHint (btreePtr, iterator, &validHint);
+	M_ExitOnError (err);
+
+	if (validHint)
+	{
+		insertNodeNum = iterator->hint.nodeNum;
+
+		err = GetNode (btreePtr, insertNodeNum, kGetNodeHint, &nodeRec);
+		if (err == noErr)
+		{
+			if (((NodeDescPtr)nodeRec.buffer)->kind == kBTLeafNode &&
+			    SearchNode (btreePtr, nodeRec.buffer, &iterator->key, &index))
+			{
+				err = GetRecordByIndex(btreePtr, nodeRec.buffer, index, &keyPtr, &recordPtr, &recordLen);
+				M_ExitOnError (err);
+
+				// XXXdbg
+				ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+								
+				err = callBackProc(keyPtr, recordPtr, callBackState);
+				M_ExitOnError (err);
+
+				err = UpdateNode (btreePtr, &nodeRec, 0, 0);
+				M_ExitOnError (err);
+
+				++btreePtr->numValidHints;
+
+				goto Success;
+			}
+			else
+			{
+				(void) BTInvalidateHint( iterator );
+			}
+			
+			err = ReleaseNode (btreePtr, &nodeRec);
+			M_ExitOnError (err);
+		}
+		else
+		{
+			(void) BTInvalidateHint( iterator );
+		}
+	}
+
+	////////////////////////////// Get A Clue ///////////////////////////////////
+
+	err = SearchTree (btreePtr, &iterator->key, treePathTable, &insertNodeNum, &nodeRec, &index);
+	M_ExitOnError (err);
+
+	err = GetRecordByIndex(btreePtr, nodeRec.buffer, index, &keyPtr, &recordPtr, &recordLen);
+	M_ExitOnError (err);
+
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, &nodeRec);
+								
+	err = callBackProc(keyPtr, recordPtr, callBackState);
+	M_ExitOnError (err);
+
+	err = UpdateNode (btreePtr, &nodeRec, 0, 0);
+	M_ExitOnError (err);
+
+Success:
+	// create hint
+	iterator->hint.writeCount 	= btreePtr->writeCount;
+	iterator->hint.nodeNum		= insertNodeNum;
+	iterator->hint.index		= 0;
+	iterator->hint.reserved1	= 0;
+	iterator->hint.reserved2	= 0;
+	return noErr;
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+
+	(void) ReleaseNode (btreePtr, &nodeRec);
+
+	iterator->hint.writeCount 	= 0;
+	iterator->hint.nodeNum		= 0;
+	iterator->hint.index		= 0;
+	iterator->hint.reserved1	= 0;
+	iterator->hint.reserved2	= 0;
+	return err;
+}
+
+
+
+//////////////////////////////// BTDeleteRecord /////////////////////////////////
+
+OSStatus	BTDeleteRecord		(FCB						*filePtr,
+								 BTreeIterator				*iterator )
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+	TreePathTable			treePathTable;
+	BlockDescriptor			nodeRec;
+	u_int32_t 			nodesNeeded;
+	u_int32_t				nodeNum;
+	u_int16_t				index;
+
+
+	////////////////////////// Priliminary Checks ///////////////////////////////
+
+	nodeRec.buffer = nil;					// so we can call ReleaseNode
+	nodeRec.blockHeader = nil;
+
+	M_ReturnErrorIf (filePtr == nil, 	paramErr);
+	M_ReturnErrorIf (iterator == nil,	paramErr);
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil)
+	{
+		err = fsBTInvalidFileErr;
+		goto ErrorExit;
+	}
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+
+	/////////////////////////////// Find Key ////////////////////////////////////
+
+	// check hint for simple delete case (index > 0, numRecords > 2)
+
+	err = SearchTree (btreePtr, &iterator->key, treePathTable, &nodeNum, &nodeRec, &index);
+	M_ExitOnError (err);					// record must exit for Delete
+
+
+	/////////////////////// Extend File If Necessary ////////////////////////////
+
+	/*
+	 * Worst case: we delete the first record in the tree and
+	 * following key is sufficiently larger to cause all parents to
+	 * require splitting and we need a new root node and a new map
+	 * node.
+	 */
+	if (index == 0 && btreePtr->treeDepth + 1 > btreePtr->freeNodes)
+	{
+		nodesNeeded = btreePtr->treeDepth + btreePtr->totalNodes;
+		if (nodesNeeded > CalcMapBits (btreePtr))
+			++nodesNeeded;
+
+		if (nodesNeeded - btreePtr->totalNodes > btreePtr->freeNodes) {
+			err = ExtendBTree (btreePtr, nodesNeeded);
+			M_ExitOnError (err);
+		}
+	}
+
+	///////////////////////////// Delete Record /////////////////////////////////
+
+	err = DeleteTree (btreePtr, treePathTable, &nodeRec, index, 1);
+	M_ExitOnError (err);
+
+	++btreePtr->writeCount;
+	--btreePtr->leafRecords;
+	M_BTreeHeaderDirty (btreePtr);
+		
+	iterator->hint.nodeNum	= 0;
+
+	return noErr;
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+	(void) ReleaseNode (btreePtr, &nodeRec);
+
+	return	err;
+}
+
+
+
+OSStatus	BTGetInformation	(FCB					*filePtr,
+								 u_int16_t				 file_version,
+								 BTreeInfoRec			*info )
+{
+#pragma unused (file_version)
+
+	BTreeControlBlockPtr	btreePtr;
+
+
+	M_ReturnErrorIf (filePtr == nil, 	paramErr);
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	/*
+	 * XXX SER
+	 * This should not require the whole tree to be locked, just maybe the BTreeControlBlockPtr
+	 * 
+	 * REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+	 */
+
+	M_ReturnErrorIf (btreePtr == nil,	fsBTInvalidFileErr);
+	M_ReturnErrorIf (info == nil,		paramErr);
+
+	// check version?
+
+	info->nodeSize		= btreePtr->nodeSize;
+	info->maxKeyLength	= btreePtr->maxKeyLength;
+	info->treeDepth		= btreePtr->treeDepth;
+	info->numRecords	= btreePtr->leafRecords;
+	info->numNodes		= btreePtr->totalNodes;
+	info->numFreeNodes	= btreePtr->freeNodes;
+	info->lastfsync		= btreePtr->lastfsync;
+	info->keyCompareType	= btreePtr->keyCompareType;
+	return noErr;
+}
+
+// XXXdbg
+OSStatus
+BTIsDirty(FCB *filePtr)
+{
+	BTreeControlBlockPtr	btreePtr;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	return TreeIsDirty(btreePtr);
+}
+
+/*-------------------------------------------------------------------------------
+Routine:	BTFlushPath	-	Flush BTreeControlBlock to Header Node.
+
+Function:	Brief_description_of_the_function_and_any_side_effects
+
+
+Input:		pathPtr		- pointer to path control block for B*Tree file to flush
+
+Output:		none
+
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	BTFlushPath				(FCB					*filePtr)
+{
+	OSStatus				err;
+	BTreeControlBlockPtr	btreePtr;
+
+
+	M_ReturnErrorIf (filePtr == nil, 	paramErr);
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+
+	M_ReturnErrorIf (btreePtr == nil,	fsBTInvalidFileErr);
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	err = UpdateHeader (btreePtr, false);
+
+	return	err;
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTReload  -  Reload B-tree Header Data.
+
+Function:	Reload B-tree header data from disk.  This is called after fsck
+		has made repairs to the root filesystem.  The filesystem is
+		mounted read-only when BTReload is caled.
+
+
+Input:		filePtr - the B*Tree file that needs its header updated
+
+Output:		none
+
+Result:		noErr - success
+	     != noErr - failure
+-------------------------------------------------------------------------------*/
+
+OSStatus
+BTReloadData(FCB *filePtr)
+{
+	OSStatus err;
+	BTreeControlBlockPtr btreePtr;
+	BlockDescriptor node;
+	BTHeaderRec *header;	
+
+
+	node.buffer = nil;
+	node.blockHeader = nil;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil)
+		return (fsBTInvalidFileErr);
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+	err = GetNode(btreePtr, kHeaderNodeNum, 0, &node);
+	if (err != noErr)
+		return (err);
+	
+	header = (BTHeaderRec*)((char *)node.buffer + sizeof(BTNodeDescriptor));
+	if ((err = VerifyHeader (filePtr, header)) == 0) {
+		btreePtr->treeDepth     = header->treeDepth;
+		btreePtr->rootNode      = header->rootNode;
+		btreePtr->leafRecords   = header->leafRecords;
+		btreePtr->firstLeafNode = header->firstLeafNode;
+		btreePtr->lastLeafNode  = header->lastLeafNode;
+		btreePtr->maxKeyLength  = header->maxKeyLength;
+		btreePtr->totalNodes    = header->totalNodes;
+		btreePtr->freeNodes     = header->freeNodes;
+		btreePtr->btreeType     = header->btreeType;
+
+		btreePtr->flags &= (~kBTHeaderDirty);
+	} 
+
+	(void) ReleaseNode(btreePtr, &node);
+
+	return	err;
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTInvalidateHint	-	Invalidates the hint within a BTreeInterator.
+
+Function:	Invalidates the hint within a BTreeInterator.
+
+
+Input:		iterator	- pointer to BTreeIterator
+
+Output:		iterator	- iterator with the hint.nodeNum cleared
+
+Result:		noErr			- success
+			paramErr	- iterator == nil
+-------------------------------------------------------------------------------*/
+
+
+OSStatus	BTInvalidateHint	(BTreeIterator				*iterator )
+{
+	if (iterator == nil)
+		return	paramErr;
+
+	iterator->hint.nodeNum = 0;
+
+	return	noErr;
+}
+
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTGetLastSync
+
+Function:	Returns the last time that this btree was flushed, does not include header.
+
+Input:		filePtr	- pointer file control block
+
+Output:		lastfsync	- time in seconds of last update
+
+Result:		noErr			- success
+			paramErr	- iterator == nil
+-------------------------------------------------------------------------------*/
+
+
+OSStatus	BTGetLastSync		(FCB					*filePtr,
+								 u_int32_t				*lastsync)
+{
+	BTreeControlBlockPtr	btreePtr;
+
+
+	M_ReturnErrorIf (filePtr == nil, 	paramErr);
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	
+	/* Maybe instead of requiring a lock..an atomic set might be more appropriate */
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	M_ReturnErrorIf (btreePtr == nil,	fsBTInvalidFileErr);
+	M_ReturnErrorIf (lastsync == nil,	paramErr);
+
+	*lastsync		= btreePtr->lastfsync;
+
+	return noErr;
+}
+
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTSetLastSync
+
+Function:	Sets the last time that this btree was flushed, does not include header.
+
+
+Input:		fcb	- pointer file control block
+
+Output:		lastfsync	- time in seconds of last update
+
+Result:		noErr			- success
+			paramErr	- iterator == nil
+-------------------------------------------------------------------------------*/
+
+
+OSStatus	BTSetLastSync		(FCB					*filePtr,
+								 u_int32_t				lastsync)
+{
+	BTreeControlBlockPtr	btreePtr;
+
+
+	M_ReturnErrorIf (filePtr == nil, 	paramErr);
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	
+	/* Maybe instead of requiring a lock..an atomic set might be more appropriate */
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	M_ReturnErrorIf (btreePtr == nil,	fsBTInvalidFileErr);
+	M_ReturnErrorIf (lastsync == 0,	paramErr);
+
+	btreePtr->lastfsync = lastsync;
+
+	return noErr;
+}
+
+OSStatus	BTHasContiguousNodes	(FCB	 				*filePtr)
+{
+	BTreeControlBlockPtr	btreePtr;
+
+
+	M_ReturnErrorIf (filePtr == nil, 	paramErr);
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true);
+
+	M_ReturnErrorIf (btreePtr == nil,	fsBTInvalidFileErr);
+
+	return NodesAreContiguous(FCBTOVCB(filePtr), filePtr, btreePtr->nodeSize);
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTGetUserData
+
+Function:	Read the user data area of the b-tree header node.
+
+-------------------------------------------------------------------------------*/
+OSStatus
+BTGetUserData(FCB *filePtr, void * dataPtr, int dataSize)
+{
+	BTreeControlBlockPtr btreePtr;
+	BlockDescriptor node;
+	char * offset;
+	OSStatus err;
+
+	if (dataSize > kBTreeHeaderUserBytes)
+		return (EINVAL);
+	node.buffer = nil;
+	node.blockHeader = nil;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil)
+		return (fsBTInvalidFileErr);
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+	err = GetNode(btreePtr, kHeaderNodeNum, 0, &node);
+	if (err)
+		return (err);
+	
+	offset = (char *)node.buffer + sizeof(BTNodeDescriptor) + sizeof(BTHeaderRec);
+	bcopy(offset, dataPtr, dataSize);
+
+	(void) ReleaseNode(btreePtr, &node);
+
+	return	(0);
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTSetUserData
+
+Function:	Write the user data area of the b-tree header node.
+-------------------------------------------------------------------------------*/
+OSStatus
+BTSetUserData(FCB *filePtr, void * dataPtr, int dataSize)
+{
+	BTreeControlBlockPtr btreePtr;
+	BlockDescriptor node;
+	char * offset;
+	OSStatus err;
+
+	if (dataSize > kBTreeHeaderUserBytes)
+		return (EINVAL);
+	node.buffer = nil;
+	node.blockHeader = nil;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil)
+		return (fsBTInvalidFileErr);
+
+	REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
+
+	err = GetNode(btreePtr, kHeaderNodeNum, 0, &node);
+	if (err)
+		return (err);
+	
+	ModifyBlockStart(btreePtr->fileRefNum, &node);
+
+	offset = (char *)node.buffer + sizeof(BTNodeDescriptor) + sizeof(BTHeaderRec);
+	bcopy(dataPtr, offset, dataSize);
+
+	err = UpdateNode (btreePtr, &node, 0, 0);
+
+	return	(err);
+}
+
diff --git a/core/BTreeAllocate.c b/core/BTreeAllocate.c
new file mode 100644
index 0000000..d9b3b63
--- /dev/null
+++ b/core/BTreeAllocate.c
@@ -0,0 +1,748 @@
+/*
+ * Copyright (c) 2000-2003, 2005-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTreeAllocate.c
+
+	Contains:	BTree Node Allocation routines for the BTree Module.
+
+	Version:	xxx put the technology version here xxx
+
+	Written by:	Gordon Sheridan and Bill Bruffey
+
+	Copyright:	(c) 1992-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(djb)	Don Brady
+		(ser)	Scott Roberts
+		(msd)	Mark Day
+
+	Change History (most recent first):
+
+	   <MOSXS>	  6/1/99	djb		Sync up with Mac OS 8.6.
+	   <CS3>	11/24/97	djb		Remove some debug code (Panic calls).
+	   <CS2>	 7/24/97	djb		CallbackProcs now take refnum instead of an FCB.
+	   <CS1>	 4/23/97	djb		first checked in
+
+	  <HFS2>	 2/19/97	djb		Change E_BadNodeType to fsBTBadNodeType.
+	  <HFS1>	12/19/96	djb		first checked in
+
+	History applicable to original Scarecrow Design:
+
+		 <4>	10/25/96	ser		Changing for new VFPI
+		 <3>	10/18/96	ser		Converting over VFPI changes
+		 <2>	 1/10/96	msd		Change 64-bit math to use real function names from Math64.i.
+		 <1>	10/18/95	rst		Moved from Scarecrow project.
+
+		 <8>	 1/12/95	wjk		Adopt Model FileSystem changes in D5.
+		 <7>	 9/30/94	prp		Get in sync with D2 interface changes.
+		 <6>	 7/22/94	wjk		Convert to the new set of header files.
+		 <5>	 8/31/93	prp		Use U64SetU instead of S64Set.
+		 <4>	 5/21/93	gs		Fix ExtendBTree bug.
+		 <3>	 5/10/93	gs		Fix pointer arithmetic bug in AllocateNode.
+		 <2>	 3/23/93	gs		finish ExtendBTree routine.
+		 <1>	  2/8/93	gs		first checked in
+		 <0>	  1/1/93	gs		begin AllocateNode and FreeNode
+
+*/
+
+#include "hfs_btreeio.h"
+#include "hfs_endian.h"
+#include "BTreesPrivate.h"
+
+///////////////////// Routines Internal To BTreeAllocate.c //////////////////////
+
+static OSStatus	GetMapNode (BTreeControlBlockPtr	  btreePtr,
+						BlockDescriptor			 *nodePtr,
+						u_int16_t					**mapPtr,
+						u_int16_t					 *mapSize );
+
+/////////////////////////////////////////////////////////////////////////////////
+
+/*-------------------------------------------------------------------------------
+
+Routine:	AllocateNode	-	Find Free Node, Mark It Used, and Return Node Number.
+
+Function:	Searches the map records for the first free node, marks it "in use" and
+			returns the node number found. This routine should really only be called
+			when we know there are free blocks, otherwise it's just a waste of time.
+
+Note:		We have to examine map nodes a word at a time rather than a long word
+			because the External BTree Mgr used map records that were not an integral
+			number of long words. Too bad. In our spare time could develop a more
+			sophisticated algorithm that read map records by long words (and long
+			word aligned) and handled the spare bytes at the beginning and end
+			appropriately.
+
+Input:		btreePtr	- pointer to control block for BTree file		
+
+Output:		nodeNum		- number of node allocated
+			
+			
+Result:		noErr			- success
+			fsBTNoMoreMapNodesErr	- no free blocks were found
+			!= noErr		- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	AllocateNode (BTreeControlBlockPtr		btreePtr, u_int32_t	*nodeNum)
+{
+	OSStatus		 err;
+	BlockDescriptor	 node;
+	u_int16_t		*mapPtr, *pos;
+	u_int16_t		 mapSize, size;
+	u_int16_t		 freeWord;
+	u_int16_t		 mask;
+	u_int16_t		 bitOffset;
+	u_int32_t		 nodeNumber;
+	
+	
+	nodeNumber		= 0;				// first node number of header map record
+	node.buffer		= nil;				// clear node.buffer to get header node
+										//	- and for ErrorExit
+	node.blockHeader = nil;
+	
+	while (true)
+	{
+		err = GetMapNode (btreePtr, &node, &mapPtr, &mapSize);
+		M_ExitOnError (err);
+		
+		// XXXdbg
+		ModifyBlockStart(btreePtr->fileRefNum, &node);
+								
+	//////////////////////// Find Word with Free Bit ////////////////////////////
+
+		pos		= mapPtr;
+		size	= mapSize;
+		size  >>= 1;						// convert to number of words
+						// assumes mapRecords contain an integral number of words
+
+		while ( size-- )
+		{
+			if ( *pos++ != 0xFFFF )			// assume test fails, and increment pos
+				break;
+		}
+
+		--pos;								// whoa! backup
+
+		if (*pos != 0xFFFF)					// hey, we got one!
+			break;
+		
+		nodeNumber += mapSize << 3;			// covert to number of bits (nodes)
+	}
+	
+	///////////////////////// Find Free Bit in Word /////////////////////////////
+
+	freeWord	= SWAP_BE16 (*pos);
+	bitOffset	=  15;
+	mask		=  0x8000;
+	
+	do {
+		if ( (freeWord & mask) == 0)
+			break;
+		mask >>= 1;
+	} while (--bitOffset);
+
+	////////////////////// Calculate Free Node Number ///////////////////////////
+	
+	nodeNumber += ((pos - mapPtr) << 4) + (15 - bitOffset);	// (pos-mapPtr) = # of words!
+	
+	
+	///////////////////////// Check for End of Map //////////////////////////////
+
+	if (nodeNumber >= btreePtr->totalNodes)
+	{
+		err = fsBTFullErr;
+		goto ErrorExit;
+	}
+
+	/////////////////////////// Allocate the Node ///////////////////////////////
+
+	*pos |= SWAP_BE16 (mask);				// set the map bit for the node
+
+	err = UpdateNode (btreePtr, &node, 0, kLockTransaction);
+	M_ExitOnError (err);
+	
+	--btreePtr->freeNodes;
+	M_BTreeHeaderDirty(btreePtr);
+	
+	/* Account for allocations from node reserve */
+	BTUpdateReserve(btreePtr, 1);
+
+	*nodeNum = nodeNumber;
+	
+	return noErr;
+
+////////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+	
+	(void) ReleaseNode (btreePtr, &node);
+	*nodeNum = 0;
+	
+	return	err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	FreeNode	-	Clear allocation bit for node.
+
+Function:	Finds the bit representing the node specified by nodeNum in the node
+			map and clears the bit.
+
+
+Input:		btreePtr	- pointer to control block for BTree file
+			nodeNum		- number of node to mark free
+
+Output:		none			
+			
+Result:		noErr			- success
+			fsBTNoMoreMapNodesErr	- node number is beyond end of node map
+			!= noErr		- GetNode or ReleaseNode encountered some difficulty
+-------------------------------------------------------------------------------*/
+
+OSStatus	FreeNode (BTreeControlBlockPtr		btreePtr, u_int32_t	nodeNum)
+{
+	OSStatus		 err;
+	BlockDescriptor	 node;
+	u_int32_t		 nodeIndex;
+	u_int16_t		 mapSize = 0;
+	u_int16_t		*mapPos = NULL;
+	u_int16_t		 bitOffset;
+	
+
+	//////////////////////////// Find Map Record ////////////////////////////////
+	nodeIndex			= 0;				// first node number of header map record
+	node.buffer			= nil;				// invalidate node.buffer to get header node
+	node.blockHeader    = nil;
+	
+	while (nodeNum >= nodeIndex)
+	{
+		err = GetMapNode (btreePtr, &node, &mapPos, &mapSize);
+		M_ExitOnError (err);
+		
+		nodeIndex += mapSize << 3;			// covert to number of bits (nodes)
+	}
+	
+	//////////////////////////// Mark Node Free /////////////////////////////////
+
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, &node);
+								
+	nodeNum -= (nodeIndex - (mapSize << 3));			// relative to this map record
+	bitOffset = 15 - (nodeNum & 0x0000000F);			// last 4 bits are bit offset
+	mapPos += nodeNum >> 4;								// point to word containing map bit
+
+    M_SWAP_BE16_ClearBitNum (*mapPos, bitOffset);		// clear it
+    
+	err = UpdateNode (btreePtr, &node, 0, kLockTransaction);
+	M_ExitOnError (err);
+	
+	++btreePtr->freeNodes;
+	M_BTreeHeaderDirty(btreePtr);
+
+	return noErr;
+
+ErrorExit:
+
+	(void) ReleaseNode (btreePtr, &node);
+
+	return	err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	ExtendBTree	-	Call FSAgent to extend file, and allocate necessary map nodes.
+
+Function:	This routine calls the the FSAgent to extend the end of fork, if necessary,
+			to accomodate the number of nodes requested. It then allocates as many
+			map nodes as are necessary to account for all the nodes in the B*Tree.
+			If newTotalNodes is less than the current number of nodes, no action is
+			taken.
+
+Note:		Internal HFS File Manager BTree Module counts on an integral number of
+			long words in map records, although they are not long word aligned.
+
+Input:		btreePtr		- pointer to control block for BTree file
+			newTotalNodes	- total number of nodes the B*Tree is to extended to
+			
+Output:		none
+			
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	ExtendBTree	(BTreeControlBlockPtr	btreePtr,
+						 u_int32_t				newTotalNodes )
+{
+	OSStatus				 err;
+	FCB						*filePtr;
+	FSSize					 minEOF, maxEOF;	
+	u_int16_t				 nodeSize;
+	u_int32_t				 oldTotalNodes;
+	u_int32_t				 newMapNodes;
+	u_int32_t				 mapBits, totalMapBits;
+	u_int32_t				 recStartBit;
+	u_int32_t				 nodeNum, nextNodeNum;
+	u_int32_t				 firstNewMapNodeNum, lastNewMapNodeNum;
+	BlockDescriptor			 mapNode, newNode;
+	u_int16_t				*mapPos;
+	u_int16_t				*mapStart;
+	u_int16_t				 mapSize;
+	u_int16_t				 mapNodeRecSize;
+	u_int32_t				 bitInWord, bitInRecord;
+	u_int16_t				 mapIndex;
+
+
+	oldTotalNodes	 	= btreePtr->totalNodes;
+	if (newTotalNodes  <= oldTotalNodes)				// we're done!
+		return	noErr;
+
+	nodeSize			= btreePtr->nodeSize;
+	filePtr				= GetFileControlBlock(btreePtr->fileRefNum);
+	
+	mapNode.buffer		= nil;
+	mapNode.blockHeader = nil;
+	newNode.buffer		= nil;
+	newNode.blockHeader = nil;
+
+	mapNodeRecSize	= nodeSize - sizeof(BTNodeDescriptor) - 6;	// 2 bytes of free space (see note)
+
+
+	//////////////////////// Count Bits In Node Map /////////////////////////////
+	
+	totalMapBits = 0;
+	do {
+		err = GetMapNode (btreePtr, &mapNode, &mapStart, &mapSize);
+		M_ExitOnError (err);
+		
+		mapBits		= mapSize << 3;				// mapSize (in bytes) * 8
+		recStartBit	= totalMapBits;				// bit number of first bit in map record
+		totalMapBits  += mapBits;
+		
+	} while ( ((BTNodeDescriptor*)mapNode.buffer)->fLink != 0 );
+
+#if DEBUG
+	if (totalMapBits != CalcMapBits (btreePtr))
+		Panic ("ExtendBTree: totalMapBits != CalcMapBits");
+#endif
+
+	/////////////////////// Extend LEOF If Necessary ////////////////////////////
+
+	minEOF = (u_int64_t)newTotalNodes * (u_int64_t)nodeSize;
+	if ( (u_int64_t)filePtr->fcbEOF < minEOF )
+	{
+		maxEOF = (u_int64_t)0x7fffffffLL * (u_int64_t)nodeSize;
+
+		err = btreePtr->setEndOfForkProc (btreePtr->fileRefNum, minEOF, maxEOF);
+		M_ExitOnError (err);
+	}
+
+	
+	//////////////////// Calc New Total Number Of Nodes /////////////////////////
+	
+	newTotalNodes = filePtr->fcbEOF / nodeSize;		// hack!
+	// do we wish to perform any verification of newTotalNodes at this point?
+
+	btreePtr->totalNodes	 =  newTotalNodes;		// do we need to update freeNodes here too?
+
+
+	////////////// Calculate Number Of New Map Nodes Required ///////////////////
+
+	newMapNodes		= 0;
+	if (newTotalNodes > totalMapBits)
+	{
+		newMapNodes			= (((newTotalNodes - totalMapBits) >> 3) / mapNodeRecSize) + 1;
+		firstNewMapNodeNum	= oldTotalNodes;
+		lastNewMapNodeNum	= firstNewMapNodeNum + newMapNodes - 1;
+	}
+	else
+	{
+		err = ReleaseNode (btreePtr, &mapNode);
+		M_ExitOnError (err);
+	
+		goto Success;
+	}
+	
+
+	/////////////////////// Initialize New Map Nodes ////////////////////////////
+	// XXXdbg - this is the correct place for this:
+	ModifyBlockStart(btreePtr->fileRefNum, &mapNode);
+
+	((BTNodeDescriptor*)mapNode.buffer)->fLink = firstNewMapNodeNum;
+
+	nodeNum		= firstNewMapNodeNum;
+	while (true)
+	{
+		err = GetNewNode (btreePtr, nodeNum, &newNode);
+		M_ExitOnError (err);
+		
+		// XXXdbg
+		ModifyBlockStart(btreePtr->fileRefNum, &newNode);
+
+		((NodeDescPtr)newNode.buffer)->numRecords	= 1;
+		((NodeDescPtr)newNode.buffer)->kind = kBTMapNode;
+		
+		// set free space offset
+		*(u_int16_t *)((Ptr)newNode.buffer + nodeSize - 4) = nodeSize - 6;
+
+		if (nodeNum++ == lastNewMapNodeNum)
+			break;
+
+		((BTNodeDescriptor*)newNode.buffer)->fLink = nodeNum;	// point to next map node
+			
+		err = UpdateNode (btreePtr, &newNode, 0, kLockTransaction);
+		M_ExitOnError (err);
+	}
+	
+	err = UpdateNode (btreePtr, &newNode, 0, kLockTransaction);
+	M_ExitOnError (err);
+		
+
+	///////////////////// Mark New Map Nodes Allocated //////////////////////////
+
+	nodeNum = firstNewMapNodeNum;
+	do {	
+		bitInRecord	= nodeNum - recStartBit;
+
+		while (bitInRecord >= mapBits)
+		{
+			nextNodeNum = ((NodeDescPtr)mapNode.buffer)->fLink;
+			if ( nextNodeNum == 0)
+			{
+				err = fsBTNoMoreMapNodesErr;
+				goto ErrorExit;
+			}
+			
+			err = UpdateNode (btreePtr, &mapNode, 0, kLockTransaction);
+			M_ExitOnError (err);
+			
+			err = GetNode (btreePtr, nextNodeNum, 0, &mapNode);
+			M_ExitOnError (err);
+			
+			// XXXdbg
+			ModifyBlockStart(btreePtr->fileRefNum, &mapNode);
+
+			mapIndex = 0;
+			
+			mapStart	 = (u_int16_t *) GetRecordAddress (btreePtr, mapNode.buffer, mapIndex);
+			mapSize		 = GetRecordSize (btreePtr, mapNode.buffer, mapIndex);
+
+#if DEBUG
+			if (mapSize != M_MapRecordSize (btreePtr->nodeSize) )
+			{
+				Panic ("ExtendBTree: mapSize != M_MapRecordSize");
+			}
+#endif
+			
+			mapBits		= mapSize << 3;		// mapSize (in bytes) * 8
+			recStartBit	= totalMapBits;		// bit number of first bit in map record
+			totalMapBits  += mapBits;
+
+			bitInRecord	= nodeNum - recStartBit;
+		}
+
+		mapPos		= mapStart + ((nodeNum - recStartBit) >> 4);
+		bitInWord	= 15 - ((nodeNum - recStartBit) & 0x0000000F);
+        
+        M_SWAP_BE16_SetBitNum (*mapPos, bitInWord);
+        
+		++nodeNum;
+		
+	} while (nodeNum <= lastNewMapNodeNum);
+
+	err = UpdateNode (btreePtr, &mapNode, 0, kLockTransaction);
+	M_ExitOnError (err);
+
+	
+	//////////////////////////////// Success ////////////////////////////////////
+
+Success:
+	
+	btreePtr->totalNodes	 =  newTotalNodes;
+	btreePtr->freeNodes		+= (newTotalNodes - oldTotalNodes) - newMapNodes;
+
+	M_BTreeHeaderDirty(btreePtr);
+
+	/* Force the b-tree header changes to disk */
+	(void) UpdateHeader (btreePtr, true);
+	
+	return	noErr;
+
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+	
+	(void) ReleaseNode (btreePtr, &mapNode);
+	(void) ReleaseNode (btreePtr, &newNode);
+	
+	return	err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetMapNode	-	Get the next map node and pointer to the map record.
+
+Function:	Given a BlockDescriptor to a map node in nodePtr, GetMapNode releases
+			it and gets the next node. If nodePtr->buffer is nil, then the header
+			node is retrieved.
+
+
+Input:		btreePtr	- pointer to control block for BTree file
+			nodePtr		- pointer to a BlockDescriptor of a map node
+			
+Output:		nodePtr		- pointer to the BlockDescriptor for the next map node
+			mapPtr		- pointer to the map record within the map node
+			mapSize		- number of bytes in the map record
+			
+Result:		noErr			- success
+			fsBTNoMoreMapNodesErr	- we've run out of map nodes
+			fsBTInvalidNodeErr			- bad node, or not node type kMapNode
+			!= noErr		- failure
+-------------------------------------------------------------------------------*/
+
+static
+OSStatus	GetMapNode (BTreeControlBlockPtr	  btreePtr,
+						BlockDescriptor			 *nodePtr,
+						u_int16_t				**mapPtr, 			
+						u_int16_t				 *mapSize )
+{
+	OSStatus	err;
+	u_int16_t	mapIndex;
+	u_int32_t	nextNodeNum;
+	
+	if (nodePtr->buffer != nil)		// if iterator is valid...
+	{
+		nextNodeNum = ((NodeDescPtr)nodePtr->buffer)->fLink;
+		if (nextNodeNum == 0)
+		{
+			err = fsBTNoMoreMapNodesErr;
+			goto ErrorExit;
+		}
+		
+		err = ReleaseNode (btreePtr, nodePtr);
+		M_ExitOnError (err);
+		
+		err = GetNode (btreePtr, nextNodeNum, 0, nodePtr);
+		M_ExitOnError (err);
+		
+		if ( ((NodeDescPtr)nodePtr->buffer)->kind != kBTMapNode)
+		{
+			err = fsBTBadNodeType;
+			goto ErrorExit;
+		}
+		
+		++btreePtr->numMapNodesRead;
+		mapIndex = 0;
+	} else {
+		err = GetNode (btreePtr, kHeaderNodeNum, 0, nodePtr);
+		M_ExitOnError (err);
+		
+		if ( ((NodeDescPtr)nodePtr->buffer)->kind != kBTHeaderNode)
+		{
+			err = fsBTInvalidHeaderErr;				// or fsBTBadNodeType
+			goto ErrorExit;
+		}
+		
+		mapIndex = 2;
+	}
+	
+		
+	*mapPtr		= (u_int16_t *) GetRecordAddress (btreePtr, nodePtr->buffer, mapIndex);
+	*mapSize	= GetRecordSize (btreePtr, nodePtr->buffer, mapIndex);
+	
+	return noErr;
+	
+	
+ErrorExit:
+	
+	(void) ReleaseNode (btreePtr, nodePtr);
+	
+	*mapPtr		= nil;
+	*mapSize	= 0;
+	
+	return	err;
+}
+
+
+
+////////////////////////////////// CalcMapBits //////////////////////////////////
+
+u_int32_t		CalcMapBits	(BTreeControlBlockPtr	 btreePtr)
+{
+	u_int32_t		mapBits;
+	
+	mapBits		= M_HeaderMapRecordSize (btreePtr->nodeSize) << 3;
+	
+	while (mapBits < btreePtr->totalNodes)
+		mapBits	+= M_MapRecordSize (btreePtr->nodeSize) << 3;
+	
+	return	mapBits;
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	BTZeroUnusedNodes
+
+Function:	Write zeros to all nodes in the B-tree that are not currently in use.
+-------------------------------------------------------------------------------*/
+int
+BTZeroUnusedNodes(FCB *filePtr)
+{
+	int						err;
+	vnode_t					vp;
+	BTreeControlBlockPtr	btreePtr;
+	BlockDescriptor			mapNode;
+	buf_t					bp;
+	u_int32_t				nodeNumber;
+	u_int16_t				*mapPtr, *pos;
+	u_int16_t				mapSize, size;
+	u_int16_t				mask;
+	u_int16_t				bitNumber;
+	u_int16_t				word;
+	int						numWritten;
+	
+	vp = FTOV(filePtr);
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	bp = NULL;
+	nodeNumber = 0;
+	mapNode.buffer = nil;
+	mapNode.blockHeader = nil;
+	numWritten = 0;
+	
+	/* Iterate over map nodes. */
+	while (true)
+	{
+		err = GetMapNode (btreePtr, &mapNode, &mapPtr, &mapSize);
+		if (err)
+		{
+			err = MacToVFSError(err);
+			goto ErrorExit;
+		}
+		
+		pos		= mapPtr;
+		size	= mapSize;
+		size  >>= 1;					/* convert to number of 16-bit words */
+
+		/* Iterate over 16-bit words in the map record. */
+		while (size--)
+		{
+			if (*pos != 0xFFFF)			/* Anything free in this word? */
+			{
+				word = SWAP_BE16(*pos);
+				
+				/* Iterate over bits in the word. */
+				for (bitNumber = 0, mask = 0x8000;
+				     bitNumber < 16;
+				     ++bitNumber, mask >>= 1)
+				{
+					if (word & mask)
+						continue;				/* This node is in use. */
+					
+					if (nodeNumber + bitNumber >= btreePtr->totalNodes)
+					{
+						/* We've processed all of the nodes. */
+						goto done;
+					}
+					
+					/*
+					 * Get a buffer full of zeros and write it to the unused
+					 * node.  Since we'll probably be writing a lot of nodes,
+					 * bypass the journal (to avoid a transaction that's too
+					 * big).  Instead, this behaves more like clearing out
+					 * nodes when extending a B-tree (eg., ClearBTNodes).
+					 */
+					bp = buf_getblk(vp, nodeNumber + bitNumber, btreePtr->nodeSize, 0, 0, BLK_META);
+					if (bp == NULL)
+					{
+						printf("hfs: BTZeroUnusedNodes: unable to read node %u\n", nodeNumber + bitNumber);
+						err = EIO;
+						goto ErrorExit;
+					}
+
+					if (buf_flags(bp) & B_LOCKED) {
+						/* 
+						 * This node is already part of a transaction and will be written when
+						 * the transaction is committed, so don't write it here.  If we did, then
+						 * we'd hit a panic in hfs_vnop_bwrite because the B_LOCKED bit is still set.
+						 */
+						buf_brelse(bp);
+						continue;
+					}
+					
+					buf_clear(bp);
+					buf_markaged(bp);
+					
+					/*
+					 * Try not to hog the buffer cache.  Wait for the write
+					 * every 32 nodes.   If VNOP_BWRITE reports an error, bail out and bubble
+					 * it up to the function calling us.  If we tried to update a read-only 
+					 * mount on read-only media, for example, catching the error will let 
+					 * us alert the callers of this function that they should maintain 
+					 * the mount in read-only mode.
+
+					 */
+					++numWritten;
+					if (numWritten % 32 == 0) {
+						err = VNOP_BWRITE(bp);
+						if (err) {
+							goto ErrorExit;
+						}
+					}
+					else {
+						buf_bawrite(bp);
+					}
+				}
+			}
+			
+			/* Go to the next word in the bitmap */
+			++pos;
+			nodeNumber += 16;
+		}
+	}
+
+ErrorExit:
+done:
+	(void) ReleaseNode(btreePtr, &mapNode);
+	
+	return err;
+}
diff --git a/core/BTreeMiscOps.c b/core/BTreeMiscOps.c
new file mode 100644
index 0000000..a8682ef
--- /dev/null
+++ b/core/BTreeMiscOps.c
@@ -0,0 +1,676 @@
+/*
+ * Copyright (c) 2000-2003, 2005-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTreeMiscOps.c
+
+	Contains:	Miscellaneous operations for the BTree Module.
+
+	Version:	xxx put the technology version here xxx
+
+	Written by:	Gordon Sheridan and Bill Bruffey
+
+	Copyright:	(c) 1992-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(DSH)	Deric Horn
+		(msd)	Mark Day
+		(djb)	Don Brady
+
+	Change History (most recent first):
+
+	   <MOSXS>	  6/1/99	djb		Sync up with Mac OS 8.6.
+	   <CS2>	  9/4/97	djb		Optimize TrySimpleReplace for the case where record size is not
+									changing.
+	   <CS1>	 4/23/97	djb		first checked in
+
+	  <HFS7>	 3/31/97	djb		Move ClearMemory to Utilities.c.
+	  <HFS6>	 3/17/97	DSH		Casting for DFA
+	  <HFS5>	 2/27/97	msd		Remove temporary fix from last revision. BTree EOF's should be
+									correct now, so check for strict equality.
+	  <HFS4>	 2/26/97	msd		Fix a casting problem in ClearMemory. TEMPORARY FIX: Made
+									VerifyHeader more lenient, allowing the EOF to be greater than
+									the amount actually used by nodes; this should really be fixed
+									in the formatting code (which needs to compute the real BTree
+									sizes before writing the volume header).
+	  <HFS3>	 2/19/97	djb		Added ClearMemory. Changed CalcKeyLength to KeyLength.
+	  <HFS2>	  1/3/97	djb		Added support for large keys.
+	  <HFS1>	12/19/96	djb		first checked in
+
+	History applicable to original Scarecrow Design:
+
+		 <9>	10/25/96	ser		Changing for new VFPI
+		 <8>	10/18/96	ser		Converting over VFPI changes
+		 <7>	 9/17/96	dkh		More BTree statistics. Change IsItAHint to not always check to
+									see if the hint node is allocated.
+		 <6>	 9/16/96	dkh		Revised BTree statistics.
+		 <5>	 6/20/96	dkh		Radar #1358740. Change from using Pools to debug MemAllocators.
+		 <4>	 1/22/96	dkh		Change Pools.i inclusion to PoolsPriv.i
+		 <3>	 1/10/96	msd		Change 64-bit math to use real function names from Math64.i.
+		 <2>	 12/7/95	dkh		D10E2 build. Changed usage of Ref data type to LogicalAddress.
+		 <1>	10/18/95	rst		Moved from Scarecrow project.
+
+		<19>	 4/26/95	prp		In UpdateHeader, clear the dirty flag after the BTree is updated.
+		<18>	 1/12/95	wjk		Adopt Model FileSystem changes in D5.
+		<17>	11/16/94	prp		Add IsItAHint routine and use it whenever hint's node number was
+									used for testing.
+		<16>	 10/5/94	bk		add pools.h include file
+		<15>	 9/30/94	prp		Get in sync with D2 interface changes.
+		<14>	 7/22/94	wjk		Convert to the new set of header files.
+		<13>	 12/2/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		<12>	11/30/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		<11>	11/23/93	wjk		Changes required to compile on the RS6000.
+		<10>	 8/31/93	prp		Use U64SetU instead of S64Set.
+		 <9>	  6/2/93	gs		Update for changes to FSErrors.h and add some comments.
+		 <8>	 5/21/93	gs		Modify UpdateHeader to write out attributes. Remove
+									Get/UpdateNode from TrySimpleReplace.
+		 <7>	 5/10/93	gs		Add TrySimpleReplace routine.
+		 <6>	 3/23/93	gs		Change MoveData to take void * instead of Ptr. Add UpdateHeader
+									and ClearBytes routines.
+		 <5>	  2/8/93	gs		Add FindIteratorPosition.
+		 <4>	12/10/92	gs		Implement CheckKeyDescriptor and the KeyDescriptor interpreter.
+		 <3>	 12/8/92	gs		Add GetKeyDescriptor, VerifyHeader, and Alloc/Dealloc memory
+									routines.
+		 <2>	 12/2/92	gs		Add CompareKeys routine.
+		 <1>	11/15/92	gs		first checked in
+
+*/
+
+#include "BTreesPrivate.h"
+#include "hfs_btreeio.h"
+
+
+////////////////////////////// Routine Definitions //////////////////////////////
+
+/*-------------------------------------------------------------------------------
+Routine:	CalcKeyRecordSize	-	Return size of combined key/record structure.
+
+Function:	Rounds keySize and recSize so they will end on word boundaries.
+			Does NOT add size of offset.
+
+Input:		keySize		- length of key (including length field)
+			recSize		- length of record data
+
+Output:		none
+			
+Result:		u_int16_t	- size of combined key/record that will be inserted in btree
+-------------------------------------------------------------------------------*/
+
+u_int16_t		CalcKeyRecordSize		(u_int16_t				 keySize,
+										 u_int16_t				 recSize )
+{
+	if ( M_IsOdd (keySize) )	keySize += 1;	// pad byte
+	
+	if (M_IsOdd (recSize) )		recSize += 1;	// pad byte
+	
+	return	(keySize + recSize);
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	VerifyHeader	-	Validate fields of the BTree header record.
+
+Function:	Examines the fields of the BTree header record to determine if the
+			fork appears to contain a valid BTree.
+			
+Input:		forkPtr		- pointer to fork control block
+			header		- pointer to BTree header
+			
+			
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	VerifyHeader	(FCB				*filePtr,
+							 BTHeaderRec			 *header )
+{
+	u_int64_t		forkSize;
+	u_int32_t		totalNodes;
+	
+
+	switch (header->nodeSize)							// node size == 512*2^n
+	{
+		case   512:
+		case  1024:
+		case  2048:
+		case  4096:
+		case  8192:
+		case 16384:
+		case 32768:		break;
+		default:		return	fsBTInvalidHeaderErr;			// E_BadNodeType
+	}
+	
+	totalNodes = header->totalNodes;
+
+	forkSize = (u_int64_t)totalNodes * (u_int64_t)header->nodeSize;
+	
+	if ( forkSize > (u_int64_t)filePtr->fcbEOF )
+		return fsBTInvalidHeaderErr;
+	
+	if ( header->freeNodes >= totalNodes )
+		return fsBTInvalidHeaderErr;
+	
+	if ( header->rootNode >= totalNodes )
+		return fsBTInvalidHeaderErr;
+	
+	if ( header->firstLeafNode >= totalNodes )
+		return fsBTInvalidHeaderErr;
+	
+	if ( header->lastLeafNode >= totalNodes )
+		return fsBTInvalidHeaderErr;
+	
+	if ( header->treeDepth > kMaxTreeDepth )
+		return fsBTInvalidHeaderErr;
+
+
+	/////////////////////////// Check BTree Type ////////////////////////////////
+	
+	switch (header->btreeType)
+	{
+		case	0:					// HFS Type - no Key Descriptor
+		case	kUserBTreeType:		// with Key Descriptors etc.
+		case	kReservedBTreeType:	// Desktop Mgr BTree ?
+									break;
+
+		default:					return fsBTUnknownVersionErr;		
+	}
+	
+	return noErr;
+}
+
+
+
+OSStatus TreeIsDirty(BTreeControlBlockPtr btreePtr)
+{
+    return (btreePtr->flags & kBTHeaderDirty);
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	UpdateHeader	-	Write BTreeInfoRec fields to Header node.
+
+Function:	Checks the kBTHeaderDirty flag in the BTreeInfoRec and updates the
+			header node if necessary.
+			
+Input:		btreePtr		- pointer to BTreeInfoRec
+			
+			
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus UpdateHeader(BTreeControlBlockPtr btreePtr, Boolean forceWrite)
+{
+	OSStatus				err;
+	BlockDescriptor			node;
+	BTHeaderRec	*header;	
+	u_int32_t options;
+
+	if ((btreePtr->flags & kBTHeaderDirty) == 0)			// btree info already flushed
+		return	noErr;
+	
+	err = GetNode (btreePtr, kHeaderNodeNum, 0, &node );
+	if (err != noErr) {
+		return	err;
+	}
+	
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, &node);
+
+	header = (BTHeaderRec*) ((char *)node.buffer + sizeof(BTNodeDescriptor));
+	
+	header->treeDepth		= btreePtr->treeDepth;
+	header->rootNode		= btreePtr->rootNode;
+	header->leafRecords		= btreePtr->leafRecords;
+	header->firstLeafNode	= btreePtr->firstLeafNode;
+	header->lastLeafNode	= btreePtr->lastLeafNode;
+	header->nodeSize		= btreePtr->nodeSize;			// this shouldn't change
+	header->maxKeyLength	= btreePtr->maxKeyLength;		// neither should this
+	header->totalNodes		= btreePtr->totalNodes;
+	header->freeNodes		= btreePtr->freeNodes;
+	header->btreeType		= btreePtr->btreeType;
+
+	// ignore	header->clumpSize;							// rename this field?
+
+	if (forceWrite)
+		options = kForceWriteBlock;
+	else
+		options = kLockTransaction;
+
+	err = UpdateNode (btreePtr, &node, 0, options);
+
+	btreePtr->flags &= (~kBTHeaderDirty);
+
+	return	err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	FindIteratorPosition	-	One_line_description.
+
+Function:	Brief_description_of_the_function_and_any_side_effects
+
+Algorithm:	see FSC.BT.BTIterateRecord.PICT
+
+Note:		// document side-effects of bad node hints
+
+Input:		btreePtr		- description
+			iterator		- description
+			
+
+Output:		iterator		- description
+			left			- description
+			middle			- description
+			right			- description
+			nodeNum			- description
+			returnIndex		- description
+			foundRecord		- description
+			
+			
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	FindIteratorPosition	(BTreeControlBlockPtr	 btreePtr,
+									 BTreeIteratorPtr		 iterator,
+									 BlockDescriptor		*left,
+									 BlockDescriptor		*middle,
+									 BlockDescriptor		*right,
+									 u_int32_t				*returnNodeNum,
+									 u_int16_t				*returnIndex,
+									 Boolean				*foundRecord )
+{
+	OSStatus		err;
+	Boolean			foundIt;
+	u_int32_t		nodeNum;
+	u_int16_t		leftIndex,	index,	rightIndex;
+	Boolean			validHint;
+
+	// assume btreePtr valid
+	// assume left, middle, right point to BlockDescriptors
+	// assume nodeNum points to u_int32_t
+	// assume index points to u_int16_t
+	// assume foundRecord points to Boolean
+	
+	left->buffer		= nil;
+	left->blockHeader   = nil;
+	middle->buffer		= nil;
+	middle->blockHeader	= nil;
+	right->buffer		= nil;
+	right->blockHeader	= nil;
+	
+	foundIt				= false;
+	
+	if (iterator == nil)						// do we have an iterator?
+	{
+		err = fsBTInvalidIteratorErr;
+		goto ErrorExit;
+	}
+
+	err = IsItAHint (btreePtr, iterator, &validHint);
+	M_ExitOnError (err);
+
+	nodeNum = iterator->hint.nodeNum;
+	if (! validHint)							// does the hint appear to be valid?
+	{
+		goto SearchTheTree;
+	}
+	
+	err = GetNode (btreePtr, nodeNum, kGetNodeHint, middle);
+	if( err == fsBTInvalidNodeErr )	// returned if nodeNum is out of range
+		goto SearchTheTree;
+		
+	M_ExitOnError (err);
+	
+	if ( ((NodeDescPtr) middle->buffer)->kind != kBTLeafNode ||
+		 ((NodeDescPtr) middle->buffer)->numRecords <= 0 )
+	{	
+		goto SearchTheTree;
+	}	
+	
+	foundIt = SearchNode (btreePtr, middle->buffer, &iterator->key, &index);
+	if (foundIt == true)
+	{
+		++btreePtr->numValidHints;
+		goto SuccessfulExit;
+	}
+	iterator->hint.nodeNum = 0;
+
+	if (index == 0)
+	{
+		if (((NodeDescPtr) middle->buffer)->bLink == 0)		// before 1st btree record
+		{
+			goto SuccessfulExit;
+		}
+		
+		nodeNum = ((NodeDescPtr) middle->buffer)->bLink;
+		
+		// BTree nodes are always grabbed in left to right order.  
+		// Therefore release the current node before looking up the 
+		// left node.
+		err = ReleaseNode(btreePtr, middle);
+		M_ExitOnError(err);
+
+		// Look up the left node 
+		err = GetNode (btreePtr, nodeNum, 0, left);
+		M_ExitOnError (err);
+
+		// Look up the current node again
+		err = GetRightSiblingNode (btreePtr, left->buffer, middle);
+		M_ExitOnError (err);
+
+		if ( ((NodeDescPtr) left->buffer)->kind != kBTLeafNode ||
+			 ((NodeDescPtr) left->buffer)->numRecords <= 0 )
+		{	
+			goto SearchTheTree;
+		}
+		
+		foundIt = SearchNode (btreePtr, left->buffer, &iterator->key, &leftIndex);
+		if (foundIt == true)
+		{
+			*right			= *middle;
+			*middle			= *left;
+			left->buffer	= nil;
+			index			= leftIndex;
+			
+			goto SuccessfulExit;
+		}
+		
+		if (leftIndex == 0)									// we're lost!
+		{
+			goto SearchTheTree;
+		}
+		else if (leftIndex >= ((NodeDescPtr) left->buffer)->numRecords)
+		{
+			nodeNum = ((NodeDescPtr) left->buffer)->fLink;
+			
+			PanicIf (index != 0, "FindIteratorPosition: index != 0");	// just checking...
+			goto SuccessfulExit;
+		}
+		else
+		{
+			*right			= *middle;
+			*middle			= *left;
+			left->buffer	= nil;
+			index			= leftIndex;
+			
+			goto SuccessfulExit;
+		}
+	}
+	else if (index >= ((NodeDescPtr) middle->buffer)->numRecords)
+	{
+		if (((NodeDescPtr) middle->buffer)->fLink == 0)	// beyond last record
+		{
+			goto SuccessfulExit;
+		}
+		
+		nodeNum = ((NodeDescPtr) middle->buffer)->fLink;
+		
+		err = GetRightSiblingNode (btreePtr, middle->buffer, right);
+		M_ExitOnError (err);
+		
+		if ( ((NodeDescPtr) right->buffer)->kind != kBTLeafNode ||
+			 ((NodeDescPtr) right->buffer)->numRecords <= 0 )
+		{	
+			goto SearchTheTree;
+		}
+
+		foundIt = SearchNode (btreePtr, right->buffer, &iterator->key, &rightIndex);
+		if (rightIndex >= ((NodeDescPtr) right->buffer)->numRecords)		// we're lost
+		{
+			goto SearchTheTree;
+		}
+		else	// we found it, or rightIndex==0, or rightIndex<numRecs
+		{
+			*left			= *middle;
+			*middle			= *right;
+			right->buffer	= nil;
+			index			= rightIndex;
+			
+			goto SuccessfulExit;
+		}
+	}
+
+	
+	//////////////////////////// Search The Tree ////////////////////////////////	
+
+SearchTheTree:
+	{
+		TreePathTable	treePathTable;		// so we only use stack space if we need to
+
+		err = ReleaseNode (btreePtr, left);			M_ExitOnError (err);
+		err = ReleaseNode (btreePtr, middle);		M_ExitOnError (err);
+		err = ReleaseNode (btreePtr, right);		M_ExitOnError (err);
+	
+		err = SearchTree ( btreePtr, &iterator->key, treePathTable, &nodeNum, middle, &index);
+		switch (err)				// separate find condition from exceptions
+		{
+			case noErr:			foundIt = true;				break;
+			case fsBTRecordNotFoundErr:						break;
+			default:				goto ErrorExit;
+		}
+	}
+
+	/////////////////////////////// Success! ////////////////////////////////////
+
+SuccessfulExit:
+	
+	*returnNodeNum	= nodeNum;
+	*returnIndex 	= index;
+	*foundRecord	= foundIt;
+	
+	return	noErr;
+	
+	
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+
+	(void)	ReleaseNode (btreePtr, left);
+	(void)	ReleaseNode (btreePtr, middle);
+	(void)	ReleaseNode (btreePtr, right);
+
+	*returnNodeNum	= 0;
+	*returnIndex 	= 0;
+	*foundRecord	= false;
+
+	return	err;
+}
+
+
+
+/////////////////////////////// CheckInsertParams ///////////////////////////////
+
+OSStatus	CheckInsertParams		(FCB						*filePtr,
+									 BTreeIterator				*iterator,
+									 FSBufferDescriptor			*record,
+									 u_int16_t					 recordLen )
+{
+	BTreeControlBlockPtr	btreePtr;
+	
+	if (filePtr == nil)									return	paramErr;
+
+	btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr;
+	if (btreePtr == nil)								return	fsBTInvalidFileErr;
+	if (iterator == nil)								return	paramErr;
+	if (record	 == nil)								return	paramErr;
+	
+	//	check total key/record size limit
+	if ( CalcKeyRecordSize (CalcKeySize(btreePtr, &iterator->key), recordLen) > (btreePtr->nodeSize >> 1))
+		return	fsBTRecordTooLargeErr;
+	
+	return	noErr;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	TrySimpleReplace	-	Attempts a simple insert, set, or replace.
+
+Function:	If a hint exitst for the iterator, attempt to find the key in the hint
+			node. If the key is found, an insert operation fails. If the is not
+			found, a replace operation fails. If the key was not found, and the
+			insert position is greater than 0 and less than numRecords, the record
+			is inserted, provided there is enough freeSpace.  If the key was found,
+			and there is more freeSpace than the difference between the new record
+			and the old record, the old record is deleted and the new record is
+			inserted.
+
+Assumptions:	iterator key has already been checked by CheckKey
+
+
+Input:		btreePtr		- description
+			iterator		- description
+			record			- description
+			recordLen		- description
+			operation		- description
+			
+
+Output:		recordInserted		- description
+			
+						
+Result:		noErr			- success
+			E_RecordExits		- insert operation failure
+			!= noErr		- GetNode, ReleaseNode, UpdateNode returned an error
+-------------------------------------------------------------------------------*/
+
+OSStatus	TrySimpleReplace		(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 nodePtr,
+									 BTreeIterator			*iterator,
+									 FSBufferDescriptor		*record,
+									 u_int16_t				 recordLen,
+									 Boolean				*recordInserted )
+{
+	u_int32_t			oldSpace;
+	u_int32_t			spaceNeeded;
+	u_int16_t			index;
+	u_int16_t			keySize;
+	Boolean				foundIt;
+	Boolean				didItFit;
+	
+	
+	*recordInserted	= false;								// we'll assume this won't work...
+	
+	if ( nodePtr->kind != kBTLeafNode )
+		return	noErr;	// we're in the weeds!
+
+	foundIt	= SearchNode (btreePtr, nodePtr, &iterator->key, &index);	
+
+	if ( foundIt == false )
+		return	noErr;	// we might be lost...
+		
+	keySize = CalcKeySize(btreePtr, &iterator->key);	// includes length field
+	
+	spaceNeeded	= CalcKeyRecordSize (keySize, recordLen);
+	
+	oldSpace = GetRecordSize (btreePtr, nodePtr, index);
+	
+	if ( spaceNeeded == oldSpace )
+	{
+		u_int8_t *		dst;
+
+		dst = GetRecordAddress (btreePtr, nodePtr, index);
+
+		if ( M_IsOdd (keySize) )
+			++keySize;			// add pad byte
+		
+		dst += keySize;		// skip over key to point at record
+
+		BlockMoveData(record->bufferAddress, dst, recordLen);	// blast away...
+
+		*recordInserted = true;
+	}
+	else if ( (GetNodeFreeSize(btreePtr, nodePtr) + oldSpace) >= spaceNeeded)
+	{
+		DeleteRecord (btreePtr, nodePtr, index);
+	
+		didItFit = InsertKeyRecord (btreePtr, nodePtr, index,
+										&iterator->key, KeyLength(btreePtr, &iterator->key),
+										record->bufferAddress, recordLen);
+		PanicIf (didItFit == false, "TrySimpleInsert: InsertKeyRecord returned false!");
+
+		*recordInserted = true;
+	}
+	// else not enough space...
+
+	return	noErr;
+}
+
+
+/*-------------------------------------------------------------------------------
+Routine:	IsItAHint	-	checks the hint within a BTreeInterator.
+
+Function:	checks the hint within a BTreeInterator.  If it is non-zero, it may 
+			possibly be valid. 
+
+Input:		btreePtr	- pointer to control block for BTree file
+			iterator	- pointer to BTreeIterator
+			
+Output:		answer		- true if the hint looks reasonable
+						- false if the hint is 0
+			
+Result:		noErr			- success
+-------------------------------------------------------------------------------*/
+
+
+OSStatus	IsItAHint	(BTreeControlBlockPtr btreePtr, BTreeIterator *iterator, Boolean *answer)
+{
+	++btreePtr->numHintChecks;
+	
+#if DEBUG
+	if (iterator->hint.nodeNum >= btreePtr->totalNodes)
+	{
+		*answer = false;
+	} else 
+
+#endif
+	if (iterator->hint.nodeNum == 0)
+	{
+		*answer = false;
+	}
+	else
+	{
+		*answer = true;
+		++btreePtr->numPossibleHints;
+	}
+	
+	return noErr;
+}
diff --git a/core/BTreeNodeOps.c b/core/BTreeNodeOps.c
new file mode 100644
index 0000000..9fee0b4
--- /dev/null
+++ b/core/BTreeNodeOps.c
@@ -0,0 +1,1036 @@
+/*
+ * Copyright (c) 2000, 2002, 2005-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTreeNodeOps.c
+
+	Contains:	Single-node operations for the BTree Module.
+
+	Version:	xxx put the technology version here xxx
+
+	Written by:	Gordon Sheridan and Bill Bruffey
+
+	Copyright:	(c) 1992-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(msd)	Mark Day
+		(djb)	Don Brady
+
+	Change History (most recent first):
+
+	   <MOSXS>	  6/1/99	djb		Sync up with Mac OS 8.6.
+	   <MOSXS>	4/113/99	djb		Fix key size checking bug in CheckNode.
+	   <MOSXS>	 3/19/99	djb		Added key size checking to CheckNode.
+	   <MOSXS>	 3/26/98	djb		Added PrintNode for debugging.
+	   <CS5>	  9/4/97	djb		Removed GetRightSiblingNode and GetLeftSiblingNode - they are
+									now macros. SearchNode is now in BTreeSearchNode.a.
+	   <CS4>	 8/22/97	djb		Turn off debugging code in CheckKey.
+	   <CS3>	 7/24/97	djb		Add summary traces for Get/Rel Node. Made GetRecordOffset into a
+									macro. Only call CheckNode if the node came from disk.
+	   <CS2>	 7/21/97	msd		Make GetRecordByIndex check its record index input; it now
+									returns an OSStatus.
+	   <CS1>	 4/23/97	djb		first checked in
+
+	  <HFS3>	 2/19/97	djb		Changes to support big node cache.
+	  <HFS2>	  1/3/97	djb		Added support for large keys.
+	  <HFS1>	12/19/96	djb		first checked in
+
+
+	History applicable to original Scarecrow Design:
+
+		 <6>	10/25/96	ser		Changing for new VFPI
+		 <5>	 9/17/96	dkh		Add bounds checking to GetNode. Update GetNode to not assert
+									that CheckNode failed if the node is all zeroes. This can happen
+									if the hint case if the fetched node has been deallocated
+		 <4>	  3/7/96	dkh		Change GetNewNode() to not use kGetEmptyBlock. Instead use
+									kGetBlock to fetch a block from the disk itself.   Why?
+		 <3>	 1/22/96	dkh		Add #include Memory.h
+		 <2>	 1/10/96	msd		Change 64-bit math to use real function names from Math64.i.
+		 <1>	10/18/95	rst		Moved from Scarecrow project.
+
+		<17>	 7/18/95	mbb		Change MoveData & ClearBytes to BlockMoveData & BlockZero.
+		<16>	 1/31/95	prp		GetBlockProc interface uses a 64 bit node number.
+		<15>	 1/12/95	wjk		Adopt Model FileSystem changes in D5.
+		<14>	 9/30/94	prp		Get in sync with D2 interface changes.
+		<13>	 7/25/94	wjk		Eliminate usage of BytePtr in favor of UInt8 *.
+		<12>	 7/22/94	wjk		Convert to the new set of header files.
+		<11>	 12/2/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		<10>	11/30/93	wjk		Change some Ptr's to BytePtr's in function definitions so they
+									agree with their prototypes.
+		 <9>	 8/31/93	prp		Use U64SetU instead of S64Set.
+		 <8>	 5/21/93	gs		Maintain statistical counters on Get/Release node routines.
+		 <7>	 5/10/93	gs		Change keySize parameter to keyLength for InsertKeyRecord
+									routine. Calculate number of bytes in key from keyLength to
+									account for length and pad bytes. Add GetChildNodeNum routine.
+		 <6>	 3/23/93	gs		Add InsertKeyRecord routine.
+		 <5>	  2/8/93	gs		Fix bug in SearchNode that caused "off by 1" error when final
+									compare was searchKey > trialKey. Add UpdateNode.
+		 <4>	12/10/92	gs		Change keyLength field of key to 'length'.
+		 <3>	 12/8/92	gs		Incorporate suggestions from preliminary code review.
+		 <2>	 12/2/92	gs		Implement routines.
+		 <1>	11/15/92	gs		Define routine interfaces.
+
+*/
+
+#include "BTreesPrivate.h"
+
+
+
+///////////////////////// BTree Module Node Operations //////////////////////////
+//
+//	GetNode 			- Call FS Agent to get node
+//	GetNewNode			- Call FS Agent to get a new node
+//	ReleaseNode			- Call FS Agent to release node obtained by GetNode.
+//	UpdateNode			- Mark a node as dirty and call FS Agent to release it.
+//
+//	ClearNode			- Clear a node to all zeroes.
+//
+//	InsertRecord		- Inserts a record into a BTree node.
+//	InsertKeyRecord		- Inserts a key and record pair into a BTree node.
+//	DeleteRecord		- Deletes a record from a BTree node.
+//
+//	SearchNode			- Return index for record that matches key.
+//	LocateRecord		- Return pointer to key and data, and size of data.
+//
+//	GetNodeDataSize		- Return the amount of space used for data in the node.
+//	GetNodeFreeSize		- Return the amount of free space in the node.
+//
+//	GetRecordOffset		- Return the offset for record "index".
+//	GetRecordAddress	- Return address of record "index".
+//	GetOffsetAddress	- Return address of offset for record "index".
+//
+//	InsertOffset		- Inserts a new offset into a node.
+//	DeleteOffset		- Deletes an offset from a node.
+//
+/////////////////////////////////////////////////////////////////////////////////
+
+
+
+////////////////////// Routines Internal To BTreeNodeOps.c //////////////////////
+
+u_int16_t	GetRecordOffset		(BTreeControlBlockPtr	 btree,
+								 NodeDescPtr			 node,
+								 u_int16_t				 index );
+
+u_int16_t	*GetOffsetAddress	(BTreeControlBlockPtr	btreePtr,
+								 NodeDescPtr			 node,
+								 u_int16_t				index );
+								 
+void		InsertOffset		(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr			 node,
+								 u_int16_t				 index,
+								 u_int16_t				 delta );
+
+void		DeleteOffset		(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr			 node,
+								 u_int16_t				 index );
+
+
+/////////////////////////////////////////////////////////////////////////////////
+
+#define GetRecordOffset(btreePtr,node,index)		(*(short *) ((u_int8_t *)(node) + (btreePtr)->nodeSize - ((index) << 1) - kOffsetSize))
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetNode	-	Call FS Agent to get node
+
+Function:	Gets an existing BTree node from FS Agent and verifies it.
+
+Input:		btreePtr	- pointer to BTree control block
+			nodeNum		- number of node to request
+			
+Output:		nodePtr		- pointer to beginning of node (nil if error)
+			
+Result:
+			noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	GetNode		(BTreeControlBlockPtr	 btreePtr,
+						 u_int32_t				 nodeNum,
+			   			 u_int32_t				 flags, 
+						 NodeRec				*nodePtr )
+{
+	OSStatus			err;
+	GetBlockProcPtr		getNodeProc;
+	u_int32_t			options;
+	
+
+	// is nodeNum within proper range?
+	if( nodeNum >= btreePtr->totalNodes )
+	{
+		Panic("GetNode:nodeNum >= totalNodes");
+		err = fsBTInvalidNodeErr;
+		goto ErrorExit;
+	}
+	
+	nodePtr->blockSize = btreePtr->nodeSize;	// indicate the size of a node
+
+	options = kGetBlock;
+	if ( flags & kGetNodeHint ) 
+	{
+		options |= kGetBlockHint;
+	}
+	
+	getNodeProc = btreePtr->getBlockProc;
+	err = getNodeProc (btreePtr->fileRefNum,
+					   nodeNum,
+					   options,
+					   nodePtr );
+
+	if (err != noErr)
+	{
+		Panic ("GetNode: getNodeProc returned error.");
+		goto ErrorExit;
+	}
+	++btreePtr->numGetNodes;
+
+	return noErr;
+
+ErrorExit:
+	nodePtr->buffer			= nil;
+	nodePtr->blockHeader	= nil;
+
+	return	err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetNewNode	-	Call FS Agent to get a new node
+
+Function:	Gets a new BTree node from FS Agent and initializes it to an empty
+			state.
+
+Input:		btreePtr		- pointer to BTree control block
+			nodeNum			- number of node to request
+			
+Output:		returnNodePtr	- pointer to beginning of node (nil if error)
+			
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	GetNewNode	(BTreeControlBlockPtr	 btreePtr,
+						 u_int32_t				 nodeNum,
+						 NodeRec				*returnNodePtr )
+{
+	OSStatus			 err;
+	NodeDescPtr			 node;
+	void				*pos;
+	GetBlockProcPtr		 getNodeProc;
+	
+
+	//////////////////////// get buffer for new node ////////////////////////////
+
+	returnNodePtr->blockSize = btreePtr->nodeSize;	// indicate the size of a node
+
+	getNodeProc = btreePtr->getBlockProc;
+	err = getNodeProc (btreePtr->fileRefNum,
+					   nodeNum,
+					   kGetBlock+kGetEmptyBlock,
+					   returnNodePtr );
+					   
+	if (err != noErr)
+	{
+		Panic ("GetNewNode: getNodeProc returned error.");
+	//	returnNodePtr->buffer = nil;
+		return err;
+	}
+	++btreePtr->numGetNewNodes;
+	
+
+	////////////////////////// initialize the node //////////////////////////////
+
+	node = returnNodePtr->buffer;
+	
+	ClearNode (btreePtr, node);						// clear the node
+
+	pos = (char *)node + btreePtr->nodeSize - 2;	// find address of last offset
+	*(u_int16_t *)pos = sizeof (BTNodeDescriptor);	// set offset to beginning of free space
+
+
+	return noErr;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	ReleaseNode	-	Call FS Agent to release node obtained by GetNode.
+
+Function:	Informs the FS Agent that a BTree node may be released.
+
+Input:		btreePtr		- pointer to BTree control block
+			nodeNum			- number of node to release
+						
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	ReleaseNode	(BTreeControlBlockPtr	 btreePtr,
+						 NodePtr				 nodePtr )
+{
+	OSStatus			 err;
+	ReleaseBlockProcPtr	 releaseNodeProc;
+
+
+	err = noErr;
+	
+	if (nodePtr->buffer != nil)
+	{
+		releaseNodeProc = btreePtr->releaseBlockProc;
+		err = releaseNodeProc (btreePtr->fileRefNum,
+							   nodePtr,
+							   kReleaseBlock );
+		PanicIf (err, "ReleaseNode: releaseNodeProc returned error.");
+		++btreePtr->numReleaseNodes;
+	}
+
+	nodePtr->buffer			= nil;
+	nodePtr->blockHeader	= nil;
+
+	return err;
+}
+
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	TrashNode	-	Call FS Agent to release node obtained by GetNode, and
+							not store it...mark it as bad.
+
+Function:	Informs the FS Agent that a BTree node may be released and thrown away.
+
+Input:		btreePtr		- pointer to BTree control block
+			nodeNum			- number of node to release
+						
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	TrashNode	(BTreeControlBlockPtr	 btreePtr,
+						 NodePtr				 nodePtr )
+{
+	OSStatus			 err;
+	ReleaseBlockProcPtr	 releaseNodeProc;
+	
+
+	err = noErr;
+	
+	if (nodePtr->buffer != nil)
+	{
+		releaseNodeProc = btreePtr->releaseBlockProc;
+		err = releaseNodeProc (btreePtr->fileRefNum,
+							   nodePtr,
+							   kReleaseBlock | kTrashBlock );
+		PanicIf (err, "TrashNode: releaseNodeProc returned error.");
+		++btreePtr->numReleaseNodes;
+	}
+
+	nodePtr->buffer			= nil;
+	nodePtr->blockHeader	= nil;
+	
+	return err;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	UpdateNode	-	Mark a node as dirty and call FS Agent to release it.
+
+Function:	Marks a BTree node dirty and informs the FS Agent that it may be released.
+
+Input:		btreePtr		- pointer to BTree control block
+			nodeNum			- number of node to release
+			transactionID	- ID of transaction this node update is a part of
+			flags			- special flags to pass to ReleaseNodeProc
+						
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	UpdateNode	(BTreeControlBlockPtr	 btreePtr,
+						 NodePtr				 nodePtr,
+						 u_int32_t				 transactionID,
+						 u_int32_t				 flags )
+{
+#pragma unused(transactionID)
+
+	OSStatus			 err;
+	ReleaseBlockProcPtr	 releaseNodeProc;
+	
+	
+	err = noErr;
+		
+	if (nodePtr->buffer != nil)			// Why call UpdateNode if nil ?!?
+	{
+		releaseNodeProc = btreePtr->releaseBlockProc;
+		err = releaseNodeProc (btreePtr->fileRefNum,
+							   nodePtr,
+							   flags | kMarkBlockDirty );
+		++btreePtr->numUpdateNodes;
+		M_ExitOnError (err);
+	}
+	
+	nodePtr->buffer			= nil;
+	nodePtr->blockHeader	= nil;
+
+	return	noErr;
+
+ErrorExit:
+	
+	return	err;
+}
+
+/*-------------------------------------------------------------------------------
+
+Routine:	ClearNode	-	Clear a node to all zeroes.
+
+Function:	Writes zeroes from beginning of node for nodeSize bytes.
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node to clear
+						
+Result:		none
+-------------------------------------------------------------------------------*/
+
+void	ClearNode	(BTreeControlBlockPtr	btreePtr, NodeDescPtr	 node )
+{
+	ClearMemory( node, btreePtr->nodeSize );
+}
+
+/*-------------------------------------------------------------------------------
+
+Routine:	InsertRecord	-	Inserts a record into a BTree node.
+
+Function:	
+
+Note:		Record size must be even!
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node to insert the record
+			index			- position record is to be inserted
+			recPtr			- pointer to record to insert
+
+Result:		noErr		- success
+			fsBTFullErr	- record larger than remaining free space.
+-------------------------------------------------------------------------------*/
+
+Boolean		InsertRecord	(BTreeControlBlockPtr	btreePtr,
+							 NodeDescPtr 			node,
+							 u_int16_t	 			index,
+							 RecordPtr				recPtr,
+							 u_int16_t				recSize )
+{
+	u_int16_t	freeSpace;
+	u_int16_t	indexOffset;
+	u_int16_t	freeOffset;
+	u_int16_t	bytesToMove;
+	void	   *src;
+	void	   *dst;
+	
+	//// will new record fit in node?
+
+	freeSpace = GetNodeFreeSize (btreePtr, node);
+											// we could get freeOffset & calc freeSpace
+	if ( freeSpace < recSize + 2)
+	{
+		return false;
+	}
+
+	
+	//// make hole for new record
+
+	indexOffset = GetRecordOffset (btreePtr, node, index);
+	freeOffset	= GetRecordOffset (btreePtr, node, node->numRecords);
+
+	src = ((Ptr) node) + indexOffset;
+	dst = ((Ptr) src)  + recSize;
+	bytesToMove = freeOffset - indexOffset;
+	if (bytesToMove)
+		MoveRecordsRight (src, dst, bytesToMove);
+
+
+	//// adjust offsets for moved records
+
+	InsertOffset (btreePtr, node, index, recSize);
+
+
+	//// move in the new record
+
+	dst = ((Ptr) node) + indexOffset;
+	MoveRecordsLeft (recPtr, dst, recSize);
+
+	return true;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	InsertKeyRecord	-	Inserts a record into a BTree node.
+
+Function:	
+
+Note:		Record size must be even!
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node to insert the record
+			index			- position record is to be inserted
+			keyPtr			- pointer to key for record to insert
+			keyLength		- length of key (or maxKeyLength)
+			recPtr			- pointer to record to insert
+			recSize			- number of bytes to copy for record
+
+Result:		noErr		- success
+			fsBTFullErr	- record larger than remaining free space.
+-------------------------------------------------------------------------------*/
+
+Boolean		InsertKeyRecord		(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr 			 node,
+								 u_int16_t	 			 index,
+								 KeyPtr					 keyPtr,
+								 u_int16_t				 keyLength,
+								 RecordPtr				 recPtr,
+								 u_int16_t				 recSize )
+{
+	u_int16_t		freeSpace;
+	u_int16_t		indexOffset;
+	u_int16_t		freeOffset;
+	u_int16_t		bytesToMove;
+	u_int8_t *		src;
+	u_int8_t *		dst;
+	u_int16_t		keySize;
+	u_int16_t		rawKeyLength;
+	u_int16_t		sizeOfLength;
+	
+	//// calculate actual key size
+
+	if ( btreePtr->attributes & kBTBigKeysMask )
+		keySize = keyLength + sizeof(u_int16_t);
+	else
+		keySize = keyLength + sizeof(u_int8_t);
+	
+	if ( M_IsOdd (keySize) )
+		++keySize;			// add pad byte
+
+
+	//// will new record fit in node?
+
+	freeSpace = GetNodeFreeSize (btreePtr, node);
+											// we could get freeOffset & calc freeSpace
+	if ( freeSpace < keySize + recSize + 2)
+	{
+		return false;
+	}
+
+	
+	//// make hole for new record
+
+	indexOffset = GetRecordOffset (btreePtr, node, index);
+	freeOffset	= GetRecordOffset (btreePtr, node, node->numRecords);
+
+	src = ((u_int8_t *) node) + indexOffset;
+	dst = ((u_int8_t *) src) + keySize + recSize;
+	bytesToMove = freeOffset - indexOffset;
+	if (bytesToMove)
+		MoveRecordsRight (src, dst, bytesToMove);
+
+
+	//// adjust offsets for moved records
+
+	InsertOffset (btreePtr, node, index, keySize + recSize);
+	
+
+	//// copy record key
+
+	dst = ((u_int8_t *) node) + indexOffset;
+
+	if ( btreePtr->attributes & kBTBigKeysMask )
+	{
+		*((u_int16_t *)dst) = keyLength;			// use keyLength rather than key.length
+  		dst = (u_int8_t *) (((u_int16_t *)dst) + 1);
+		rawKeyLength = keyPtr->length16;
+		sizeOfLength = 2;
+	}
+	else
+	{
+		*dst++ = keyLength;					// use keyLength rather than key.length
+		rawKeyLength = keyPtr->length8;
+		sizeOfLength = 1;
+	}
+	
+	MoveRecordsLeft ( ((u_int8_t *) keyPtr) + sizeOfLength, dst, rawKeyLength);	// copy key
+
+	// any pad bytes?
+	bytesToMove = keySize - rawKeyLength;
+	if (bytesToMove)
+		ClearMemory (dst + rawKeyLength, bytesToMove);	// clear pad bytes in index key
+
+
+	//// copy record data
+
+	dst = ((u_int8_t *) node) + indexOffset + keySize;
+	MoveRecordsLeft (recPtr, dst, recSize);
+
+	return true;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	DeleteRecord	-	Deletes a record from a BTree node.
+
+Function:	
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node to insert the record
+			index			- position record is to be inserted
+
+Result:		none
+-------------------------------------------------------------------------------*/
+
+void		DeleteRecord	(BTreeControlBlockPtr	btreePtr,
+							 NodeDescPtr 			node,
+							 u_int16_t	 			index )
+{
+	int16_t		indexOffset;
+	int16_t		nextOffset;
+	int16_t		freeOffset;
+	int16_t		bytesToMove;
+	void	   *src;
+	void	   *dst;
+	
+	//// compress records
+	indexOffset = GetRecordOffset (btreePtr, node, index);
+	nextOffset	= GetRecordOffset (btreePtr, node, index + 1);
+	freeOffset	= GetRecordOffset (btreePtr, node, node->numRecords);
+
+	src = ((Ptr) node) + nextOffset;
+	dst = ((Ptr) node) + indexOffset;
+	bytesToMove = freeOffset - nextOffset;
+	if (bytesToMove)
+		MoveRecordsLeft (src, dst, bytesToMove);
+
+	//// Adjust the offsets
+	DeleteOffset (btreePtr, node, index);
+	
+	/* clear out new free space */
+	bytesToMove = nextOffset - indexOffset;
+	ClearMemory(GetRecordAddress(btreePtr, node, node->numRecords), bytesToMove);
+
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	SearchNode	-	Return index for record that matches key.
+
+Function:	Returns the record index for the record that matches the search key.
+			If no record was found that matches the search key, the "insert index"
+			of where the record should go is returned instead.
+
+Algorithm:	A binary search algorithm is used to find the specified key.
+
+Input:		btreePtr	- pointer to BTree control block
+			node		- pointer to node that contains the record
+			searchKey	- pointer to the key to match
+
+Output:		index		- pointer to beginning of key for record
+
+Result:		true	- success (index = record index)
+			false	- key did not match anything in node (index = insert index)
+-------------------------------------------------------------------------------*/
+Boolean
+SearchNode( BTreeControlBlockPtr btreePtr,
+	    NodeDescPtr node,
+	    KeyPtr searchKey,
+	    u_int16_t *returnIndex )
+{
+	int32_t		lowerBound;
+	int32_t		upperBound;
+	int32_t		index;
+	int32_t		result;
+	KeyPtr		trialKey;
+	u_int16_t	*offset;
+	KeyCompareProcPtr compareProc = btreePtr->keyCompareProc;
+
+	lowerBound = 0;
+	upperBound = node->numRecords - 1;
+	offset = (u_int16_t *) ((u_int8_t *)(node) + (btreePtr)->nodeSize - kOffsetSize);
+	
+	while (lowerBound <= upperBound) {
+		index = (lowerBound + upperBound) >> 1;
+
+		trialKey = (KeyPtr) ((u_int8_t *)node + *(offset - index));
+		
+		result = compareProc(searchKey, trialKey);
+
+		if (result <  0) {
+			upperBound = index - 1;	  /* search < trial */
+		} else if (result >  0) {
+			lowerBound = index + 1;	  /* search > trial */
+		} else {	
+			*returnIndex = index;	  /* search == trial */
+			return true;
+		}
+	}
+	
+	*returnIndex = lowerBound;	/* lowerBound is insert index */
+	return false;
+}
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetRecordByIndex	-	Return pointer to key and data, and size of data.
+
+Function:	Returns a pointer to beginning of key for record, a pointer to the
+			beginning of the data for the record, and the size of the record data
+			(does not include the size of the key).
+
+Input:		btreePtr	- pointer to BTree control block
+			node		- pointer to node that contains the record
+			index		- index of record to get
+
+Output:		keyPtr		- pointer to beginning of key for record
+			dataPtr		- pointer to beginning of data for record
+			dataSize	- size of the data portion of the record
+
+Result:		none
+-------------------------------------------------------------------------------*/
+
+OSStatus	GetRecordByIndex	(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr			 node,
+								 u_int16_t				 index,
+								 KeyPtr					*keyPtr,
+								 u_int8_t *				*dataPtr,
+								 u_int16_t				*dataSize )
+{
+	u_int16_t		offset;
+	u_int16_t		nextOffset;
+	u_int16_t		keySize;
+	
+	//
+	//	Make sure index is valid (in range 0..numRecords-1)
+	//
+	if (index >= node->numRecords)
+		return fsBTRecordNotFoundErr;
+
+	//// find keyPtr
+	offset		= GetRecordOffset (btreePtr, node, index);
+	*keyPtr		= (KeyPtr) ((Ptr)node + offset);
+
+	//// find dataPtr
+	keySize	= CalcKeySize(btreePtr, *keyPtr);
+	if ( M_IsOdd (keySize) )
+		++keySize;	// add pad byte
+
+	offset += keySize;			// add the key length to find data offset
+	*dataPtr = (u_int8_t *) node + offset;
+	
+	//// find dataSize
+	nextOffset	= GetRecordOffset (btreePtr, node, index + 1);
+	*dataSize	= nextOffset - offset;
+	
+	return noErr;
+}
+								 
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetNodeDataSize	-	Return the amount of space used for data in the node.
+
+Function:	Gets the size of the data currently contained in a node, excluding
+			the node header. (record data + offset overhead)
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+
+Result:		- number of bytes used for data and offsets in the node.
+-------------------------------------------------------------------------------*/
+
+u_int16_t	GetNodeDataSize	(BTreeControlBlockPtr	btreePtr, NodeDescPtr	 node )
+{
+	u_int16_t freeOffset;
+	
+	freeOffset = GetRecordOffset (btreePtr, node, node->numRecords);
+	
+	return	freeOffset + (node->numRecords << 1) - sizeof (BTNodeDescriptor);
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetNodeFreeSize	-	Return the amount of free space in the node.
+
+Function:	
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+
+Result:		- number of bytes of free space in the node.
+-------------------------------------------------------------------------------*/
+
+u_int16_t		GetNodeFreeSize	(BTreeControlBlockPtr	btreePtr, NodeDescPtr	 node )
+{
+	u_int16_t	freeOffset;
+	
+	freeOffset = GetRecordOffset (btreePtr, node, node->numRecords);	// inline?
+	
+	return btreePtr->nodeSize - freeOffset - (node->numRecords << 1) - kOffsetSize;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetRecordOffset	-	Return the offset for record "index".
+
+Function:	
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+			index			- record to obtain offset for
+
+Result:		- offset (in bytes) from beginning of node of record specified by index
+-------------------------------------------------------------------------------*/
+// make this a macro (for inlining)
+#if 0
+u_int16_t	GetRecordOffset	(BTreeControlBlockPtr	btreePtr,
+							 NodeDescPtr			node,
+							 u_int16_t				index )
+{
+	void	*pos;
+	
+		
+	pos = (u_int8_t *)node + btreePtr->nodeSize - (index << 1) - kOffsetSize;
+	
+	return *(short *)pos;
+}
+#endif
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetRecordAddress	-	Return address of record "index".
+
+Function:	
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+			index			- record to obtain offset address for
+
+Result:		- pointer to record "index".
+-------------------------------------------------------------------------------*/
+// make this a macro (for inlining)
+#if 0
+u_int8_t *	GetRecordAddress	(BTreeControlBlockPtr	btreePtr,
+								 NodeDescPtr			node,
+								 u_int16_t				index )
+{
+	u_int8_t *	pos;
+	
+	pos = (u_int8_t *)node + GetRecordOffset (btreePtr, node, index);
+	
+	return pos;
+}
+#endif
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	GetRecordSize	-	Return size of record "index".
+
+Function:	
+
+Note:		This does not work on the FreeSpace index!
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+			index			- record to obtain record size for
+
+Result:		- size of record "index".
+-------------------------------------------------------------------------------*/
+
+u_int16_t	GetRecordSize		(BTreeControlBlockPtr	btreePtr,
+								 NodeDescPtr			node,
+								 u_int16_t				index )
+{
+	u_int16_t	*pos;
+		
+	pos = (u_int16_t *) ((Ptr)node + btreePtr->nodeSize - (index << 1) - kOffsetSize);
+	
+	return  *(pos-1) - *pos;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	GetOffsetAddress	-	Return address of offset for record "index".
+
+Function:	
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+			index			- record to obtain offset address for
+
+Result:		- pointer to offset for record "index".
+-------------------------------------------------------------------------------*/
+
+u_int16_t	 *GetOffsetAddress	(BTreeControlBlockPtr	btreePtr,
+								 NodeDescPtr			node,
+								 u_int16_t				index )
+{
+	void	*pos;
+	
+	pos = (Ptr)node + btreePtr->nodeSize - (index << 1) -2;
+	
+	return (u_int16_t *)pos;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	GetChildNodeNum	-	Return child node number from index record "index".
+
+Function:	Returns the first u_int32_t stored after the key for record "index".
+
+Assumes:	The node is an Index Node.
+			The key.length stored at record "index" is ODD. // change for variable length index keys
+
+Input:		btreePtr		- pointer to BTree control block
+			node			- pointer to node that contains the record
+			index			- record to obtain child node number from
+
+Result:		- child node number from record "index".
+-------------------------------------------------------------------------------*/
+
+u_int32_t	GetChildNodeNum			(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 nodePtr,
+									 u_int16_t				 index )
+{
+	u_int8_t *		pos;
+	
+	pos = GetRecordAddress (btreePtr, nodePtr, index);
+	pos += CalcKeySize(btreePtr, (BTreeKey *) pos);		// key.length + size of length field
+	
+	return	*(u_int32_t *)pos;
+}
+
+
+
+/*-------------------------------------------------------------------------------
+Routine:	InsertOffset	-	Add an offset and adjust existing offsets by delta.
+
+Function:	Add an offset at 'index' by shifting 'index+1' through the last offset
+			and adjusting them by 'delta', the size of the record to be inserted.
+			The number of records contained in the node is also incremented.
+
+Input:		btreePtr	- pointer to BTree control block
+			node		- pointer to node
+			index		- index at which to insert record
+			delta		- size of record to be inserted
+
+Result:		none
+-------------------------------------------------------------------------------*/
+
+void		InsertOffset		(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr			 node,
+								 u_int16_t				 index,
+								 u_int16_t				 delta )
+{
+	u_int16_t	*src, *dst;
+	u_int16_t	 numOffsets;
+	
+	src = GetOffsetAddress (btreePtr, node, node->numRecords);	// point to free offset
+	dst = src - 1; 												// point to new offset
+	numOffsets = node->numRecords++ - index;			// subtract index  & postincrement
+	
+	do {
+		*dst++ = *src++ + delta;								// to tricky?
+	} while (numOffsets--);
+}
+
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	DeleteOffset	-	Delete an offset.
+
+Function:	Delete the offset at 'index' by shifting 'index+1' through the last offset
+			and adjusting them by the size of the record 'index'.
+			The number of records contained in the node is also decremented.
+
+Input:		btreePtr	- pointer to BTree control block
+			node		- pointer to node
+			index		- index at which to delete record
+
+Result:		none
+-------------------------------------------------------------------------------*/
+
+void		DeleteOffset		(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr			 node,
+								 u_int16_t				 index )
+{
+	u_int16_t		*src, *dst;
+	u_int16_t		 numOffsets;
+	u_int16_t		 delta;
+	
+	dst			= GetOffsetAddress (btreePtr, node, index);
+	src			= dst - 1;
+	delta		= *src - *dst;
+	numOffsets	= --node->numRecords - index;	// predecrement numRecords & subtract index
+	
+	while (numOffsets--)
+	{
+		*--dst = *--src - delta;				// work our way left
+	}
+}
+
+
diff --git a/core/BTreeNodeReserve.c b/core/BTreeNodeReserve.c
new file mode 100644
index 0000000..c75af1f
--- /dev/null
+++ b/core/BTreeNodeReserve.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include "BTreesPrivate.h"
+#include "sys/malloc.h"
+#include <kern/locks.h>
+
+
+/*
+ * B-tree Node Reserve
+ *
+ * BTReserveSpace
+ * BTReleaseReserve
+ * BTUpdateReserve
+ *
+ * Each kernel thread can have it's own reserve of b-tree
+ * nodes. This reserve info is kept in a hash table.
+ *
+ * Don't forget to call BTReleaseReserve when you're finished
+ * or you will leave stale node reserves in the hash.
+ */
+
+
+/*
+ * BE CAREFUL WHEN INCREASING THE SIZE OF THIS STRUCT!
+ *
+ * It must remain equal in size to the opaque cat_cookie_t
+ * struct (in hfs_catalog.h).
+ */
+struct nreserve {
+	LIST_ENTRY(nreserve) nr_hash;  /* hash chain */
+	int  nr_nodecnt;               /* count of nodes held in reserve */
+	int  nr_newnodes;              /* nodes that were allocated */
+	struct	vnode *nr_btvp;        /* b-tree file vnode */
+	void  *nr_tag;                 /* unique tag (per thread) */
+};
+
+#define NR_GET_TAG()	(current_thread())
+
+#define	NR_CACHE 17
+
+#define NR_HASH(btvp, tag) \
+	(&nr_hashtbl[((((intptr_t)(btvp)) >> 8) ^ ((intptr_t)(tag) >> 4)) & nr_hashmask])
+
+LIST_HEAD(nodereserve, nreserve) *nr_hashtbl;
+
+u_long nr_hashmask;
+
+lck_grp_t * nr_lck_grp;
+lck_grp_attr_t * nr_lck_grp_attr;
+lck_attr_t * nr_lck_attr;
+
+lck_mtx_t  nr_mutex;
+
+/* Internal Node Reserve Hash Routines (private) */
+static void nr_insert (struct vnode *, struct nreserve *nrp, int);
+static void nr_delete (struct vnode *, struct nreserve *nrp, int *);
+static void nr_update (struct vnode *, int);
+
+
+/*
+ * BTReserveSetup - initialize the node reserve hash table
+ */
+void BTReserveSetup(void)
+{
+	if (sizeof(struct nreserve) != sizeof(cat_cookie_t))
+		panic("hfs: BTReserveSetup: nreserve size != opaque struct size");
+
+	nr_hashtbl = hashinit(NR_CACHE, M_TEMP, &nr_hashmask);
+
+	nr_lck_grp_attr= lck_grp_attr_alloc_init();
+	nr_lck_grp  = lck_grp_alloc_init("btree_node_reserve", nr_lck_grp_attr);
+
+	nr_lck_attr = lck_attr_alloc_init();
+
+	lck_mtx_init(&nr_mutex, nr_lck_grp, nr_lck_attr);
+}
+
+
+/*
+ * BTReserveSpace - obtain a node reserve (for current thread)
+ *
+ * Used by the Catalog Layer (hfs_catalog.c) to reserve space.
+ *
+ * When data is NULL, we only insure that there's enough space
+ * but it is not reserved (assumes you keep the b-tree lock).
+ */
+int
+BTReserveSpace(FCB *file, int operations, void* data)
+{
+	BTreeControlBlock *btree;
+	int rsrvNodes, availNodes, totalNodes;
+	int height;
+	int inserts, deletes;
+	u_int32_t clumpsize;
+	int err = 0;
+
+	btree = (BTreeControlBlockPtr)file->fcbBTCBPtr;
+	clumpsize = file->ff_clumpsize;
+
+	REQUIRE_FILE_LOCK(btree->fileRefNum, true);
+
+	/*
+	 * The node reserve is based on the number of b-tree
+	 * operations (insert/deletes) and the height of the
+	 * tree.
+	 */
+	height = btree->treeDepth;
+	if (height < 2)
+		height = 2;  /* prevent underflow in rsrvNodes calculation */
+	inserts = operations & 0xffff;
+	deletes = operations >> 16;
+	
+	/*
+	 * Allow for at least one root split.
+	 *
+	 * Each delete operation can propogate a big key up the
+	 * index. This can cause a split at each level up.
+	 *
+	 * Each insert operation can cause a local split and a
+	 * split at each level up.
+	 */
+	rsrvNodes = 1 + (deletes * (height - 2)) + (inserts * (height - 1));
+
+	availNodes = btree->freeNodes - btree->reservedNodes;
+	
+	if (rsrvNodes > availNodes) {
+		u_int32_t reqblks, freeblks, rsrvblks;
+		uint32_t bt_rsrv;
+		struct hfsmount *hfsmp;
+
+		/* 
+		 * For UNIX conformance, we try and reserve the MIN of either 5% of 
+		 * total file blocks or 10MB worth of blocks, for growing existing 
+		 * files.  On non-HFS filesystems, creating a new directory entry may
+		 * not cause additional disk space to be allocated, but on HFS, creating
+		 * a new entry could cause the b-tree to grow.  As a result, we take 
+		 * some precautions here to prevent that on configurations that try to 
+		 * satisfy conformance.
+		 */
+		hfsmp = VTOVCB(btree->fileRefNum);
+		rsrvblks = ((u_int64_t)hfsmp->allocLimit * 5) / 100;
+		if (hfsmp->blockSize > HFS_BT_MAXRESERVE) {
+			bt_rsrv = 1;	
+		}
+		else {
+			bt_rsrv = (HFS_BT_MAXRESERVE / hfsmp->blockSize);
+		}
+		rsrvblks = MIN(rsrvblks, bt_rsrv);	
+		
+		freeblks = hfs_freeblks(hfsmp, 0);
+		if (freeblks <= rsrvblks) {
+			/* When running low, disallow adding new items. */
+			if ((inserts > 0) && (deletes == 0)) {
+				return (ENOSPC);
+			}
+			freeblks = 0;
+		} else {
+			freeblks -= rsrvblks;
+		}
+		reqblks = clumpsize / hfsmp->blockSize;
+
+		if (reqblks > freeblks) {
+			reqblks = ((rsrvNodes - availNodes) * btree->nodeSize) / hfsmp->blockSize;
+			/* When running low, disallow adding new items. */
+			if ((reqblks > freeblks) && (inserts > 0) && (deletes == 0)) {
+				return (ENOSPC);
+			}
+			file->ff_clumpsize = freeblks * hfsmp->blockSize;
+		}
+		totalNodes = rsrvNodes + btree->totalNodes - availNodes;
+		
+		/* See if we also need a map node */
+		if (totalNodes > (int)CalcMapBits(btree)) {
+			++totalNodes;
+		}
+		if ((err = ExtendBTree(btree, totalNodes))) {
+			goto out;
+		}
+	}
+	/* Save this reserve if this is a persistent request. */
+	if (data) {
+		btree->reservedNodes += rsrvNodes;
+		nr_insert(btree->fileRefNum, (struct nreserve *)data, rsrvNodes);
+	}
+out:
+	/* Put clump size back if it was changed. */
+	if (file->ff_clumpsize != clumpsize)
+		file->ff_clumpsize = clumpsize;
+
+	return (err);
+}
+
+
+/*
+ * BTReleaseReserve - release the node reserve held by current thread
+ *
+ * Used by the Catalog Layer (hfs_catalog.c) to relinquish reserved space.
+ */
+int
+BTReleaseReserve(FCB *file, void* data)
+{
+	BTreeControlBlock *btree;
+	int nodecnt;
+
+	btree = (BTreeControlBlockPtr)file->fcbBTCBPtr;
+	
+	REQUIRE_FILE_LOCK(btree->fileRefNum, true);
+
+	nr_delete(btree->fileRefNum, (struct nreserve *)data, &nodecnt);
+
+	if (nodecnt)
+		btree->reservedNodes -= nodecnt;
+
+	return (0);
+}
+
+/*
+ * BTUpdateReserve - update a node reserve for allocations that occurred.
+ */
+void
+BTUpdateReserve(BTreeControlBlockPtr btreePtr, int nodes)
+{
+	nr_update(btreePtr->fileRefNum, nodes);
+}
+
+
+/*----------------------------------------------------------------------------*/
+/* Node Reserve Hash Functions (private) */
+
+
+int nrinserts = 0;
+int nrdeletes = 0;
+
+/*
+ * Insert a new node reserve.
+ */
+static void
+nr_insert(struct vnode * btvp, struct nreserve *nrp, int nodecnt)
+{
+	struct nodereserve *nrhead;
+	struct nreserve *tmp_nrp;
+	void * tag = NR_GET_TAG();
+
+	/*
+	 * Check the cache - there may already be a reserve
+	 */
+	lck_mtx_lock(&nr_mutex);
+	nrhead = NR_HASH(btvp, tag);
+	for (tmp_nrp = nrhead->lh_first; tmp_nrp;
+	     tmp_nrp = tmp_nrp->nr_hash.le_next) {
+		if ((tmp_nrp->nr_tag == tag) && (tmp_nrp->nr_btvp == btvp)) {
+			nrp->nr_tag = 0;
+			tmp_nrp->nr_nodecnt += nodecnt;
+			lck_mtx_unlock(&nr_mutex);
+			return;
+		}
+	}
+
+	nrp->nr_nodecnt = nodecnt;
+	nrp->nr_newnodes = 0;
+	nrp->nr_btvp = btvp;
+	nrp->nr_tag = tag;
+	LIST_INSERT_HEAD(nrhead, nrp, nr_hash);
+	++nrinserts;
+	lck_mtx_unlock(&nr_mutex);
+}
+
+/*
+ * Delete a node reserve.
+ */
+static void
+nr_delete(struct vnode * btvp, struct nreserve *nrp, int *nodecnt)
+{
+	void * tag = NR_GET_TAG();
+
+	lck_mtx_lock(&nr_mutex);
+	if (nrp->nr_tag) {
+		if ((nrp->nr_tag != tag) || (nrp->nr_btvp != btvp))
+			panic("hfs: nr_delete: invalid NR (%p)", nrp);
+		LIST_REMOVE(nrp, nr_hash);
+		*nodecnt = nrp->nr_nodecnt;
+		bzero(nrp, sizeof(struct nreserve));
+		++nrdeletes;
+	} else {
+		*nodecnt = 0;
+	}
+	lck_mtx_unlock(&nr_mutex);
+}
+
+
+/*
+ * Update a node reserve for any allocations that occurred.
+ */
+static void
+nr_update(struct vnode * btvp, int nodecnt)
+{
+	struct nodereserve *nrhead;
+	struct nreserve *nrp;
+	void* tag = NR_GET_TAG();
+
+	lck_mtx_lock(&nr_mutex);
+
+	nrhead = NR_HASH(btvp, tag);
+	for (nrp = nrhead->lh_first; nrp; nrp = nrp->nr_hash.le_next) {
+		if ((nrp->nr_tag == tag) && (nrp->nr_btvp == btvp)) {			
+			nrp->nr_newnodes += nodecnt;
+			break;
+		}
+	}
+	lck_mtx_unlock(&nr_mutex);
+}
diff --git a/core/BTreeScanner.c b/core/BTreeScanner.c
new file mode 100644
index 0000000..6ebf78a
--- /dev/null
+++ b/core/BTreeScanner.c
@@ -0,0 +1,410 @@
+/*
+ * Copyright (c) 1996-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ *
+ *	@(#)BTreeScanner.c
+ */
+#include <sys/kernel.h>
+#include "hfs_endian.h"
+
+#include "BTreeScanner.h"
+
+static int FindNextLeafNode(	BTScanState *scanState, Boolean avoidIO );
+static int ReadMultipleNodes( 	BTScanState *scanState );
+
+
+//_________________________________________________________________________________
+//
+//	Routine:	BTScanNextRecord
+//
+//	Purpose:	Return the next leaf record in a scan.
+//
+//	Inputs:
+//		scanState		Scanner's current state
+//		avoidIO			If true, don't do any I/O to refill the buffer
+//
+//	Outputs:
+//		key				Key of found record (points into buffer)
+//		data			Data of found record (points into buffer)
+//		dataSize		Size of data in found record
+//
+//	Result:
+//		noErr			Found a valid record
+//		btNotFound		No more records
+//		???				Needed to do I/O to get next node, but avoidIO set
+//
+//	Notes:
+//		This routine returns pointers to the found record's key and data.  It
+//		does not copy the key or data to a caller-supplied buffer (like
+//		GetBTreeRecord would).  The caller must not modify the key or data.
+//_________________________________________________________________________________
+
+int BTScanNextRecord(	BTScanState *	scanState,
+						Boolean			avoidIO,
+						void * *		key,
+						void * *		data,
+						u_int32_t *		dataSize  )
+{
+	int				err;
+	u_int16_t		dataSizeShort;
+	
+	err = noErr;
+
+	//
+	//	If this is the first call, there won't be any nodes in the buffer, so go
+	//	find the first first leaf node (if any).
+	//	
+	if ( scanState->nodesLeftInBuffer == 0 )
+	{
+		err = FindNextLeafNode( scanState, avoidIO );
+	}
+
+	while ( err == noErr ) 
+	{ 
+		//	See if we have a record in the current node
+		err = GetRecordByIndex( scanState->btcb, scanState->currentNodePtr, 
+								scanState->recordNum, (KeyPtr *) key, 
+								(u_int8_t **) data, &dataSizeShort  );
+
+		if ( err == noErr )
+		{
+			++scanState->recordsFound;
+			++scanState->recordNum;
+			if (dataSize != NULL)
+				*dataSize = dataSizeShort;
+			return noErr;
+		}
+		else if (err > 0)
+		{
+			//	We didn't get the node through the cache, so we can't invalidate it.
+			//XXX Should we do something else to avoid seeing the same record again?
+			return err;
+		}
+		
+		//	We're done with the current node.  See if we've returned all the records
+		if ( scanState->recordsFound >= scanState->btcb->leafRecords )
+		{
+			return btNotFound;
+		}
+
+		//	Move to the first record of the next leaf node
+		scanState->recordNum = 0; 
+		err = FindNextLeafNode( scanState, avoidIO );
+	}
+	
+	//
+	//	If we got an EOF error from FindNextLeafNode, then there are no more leaf
+	//	records to be found.
+	//
+	if ( err == fsEndOfIterationErr )
+		err = btNotFound;
+	
+	return err;
+	
+} /* BTScanNextRecord */
+
+
+//_________________________________________________________________________________
+//
+//	Routine:	FindNextLeafNode
+//
+//	Purpose:	Point to the next leaf node in the buffer.  Read more nodes
+//				into the buffer if needed (and allowed).
+//
+//	Inputs:
+//		scanState		Scanner's current state
+//		avoidIO			If true, don't do any I/O to refill the buffer
+//
+//	Result:
+//		noErr			Found a valid record
+//		fsEndOfIterationErr	No more nodes in file
+//		???				Needed to do I/O to get next node, but avoidIO set
+//_________________________________________________________________________________
+
+static int FindNextLeafNode(	BTScanState *scanState, Boolean avoidIO )
+{
+	int err;
+	BlockDescriptor block;
+	FileReference fref;
+	
+	err = noErr;		// Assume everything will be OK
+	
+	while ( 1 ) 
+	{
+		if ( scanState->nodesLeftInBuffer == 0 ) 
+		{
+			//	Time to read some more nodes into the buffer
+			if ( avoidIO ) 
+			{
+				return fsBTTimeOutErr;
+			}
+			else 
+			{
+				//	read some more nodes into buffer
+				err = ReadMultipleNodes( scanState );
+				if ( err != noErr ) 
+					break;
+			}
+		}
+		else 
+		{
+			//	Adjust the node counters and point to the next node in the buffer
+			++scanState->nodeNum;
+			--scanState->nodesLeftInBuffer;
+			
+			//	If we've looked at all nodes in the tree, then we're done
+			if ( scanState->nodeNum >= scanState->btcb->totalNodes )
+				return fsEndOfIterationErr;
+
+			if ( scanState->nodesLeftInBuffer == 0 )
+			{
+				scanState->recordNum = 0; 
+				continue; 
+			}
+
+			scanState->currentNodePtr = (BTNodeDescriptor *)(((u_int8_t *)scanState->currentNodePtr) 
+										+ scanState->btcb->nodeSize);
+		}
+		
+		/* Fake a BlockDescriptor */
+		block.blockHeader = NULL;	/* No buffer cache buffer */
+		block.buffer = scanState->currentNodePtr;
+		block.blockNum = scanState->nodeNum;
+		block.blockSize = scanState->btcb->nodeSize;
+		block.blockReadFromDisk = 1;
+		block.isModified = 0;
+		
+		fref = scanState->btcb->fileRefNum;
+		
+		/* This node was read from disk, so it must be swapped/checked.
+		 * Since we are reading multiple nodes, we might have read an 
+		 * unused node.  Therefore we allow swapping of unused nodes.
+		 */
+		err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost, true);
+		if ( err != noErr ) {
+			printf("hfs: FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum);
+			continue;
+		}
+
+		if ( scanState->currentNodePtr->kind == kBTLeafNode )
+			break;
+	}
+	
+	return err;
+	
+} /* FindNextLeafNode */
+
+
+//_________________________________________________________________________________
+//
+//	Routine:	ReadMultipleNodes
+//
+//	Purpose:	Read one or more nodes into the buffer.
+//
+//	Inputs:
+//		theScanStatePtr		Scanner's current state
+//
+//	Result:
+//		noErr				One or nodes were read
+//		fsEndOfIterationErr		No nodes left in file, none in buffer
+//_________________________________________________________________________________
+
+static int ReadMultipleNodes( BTScanState *theScanStatePtr )
+{
+	int						myErr = E_NONE;
+	BTreeControlBlockPtr  	myBTreeCBPtr;
+	daddr64_t				myPhyBlockNum;
+	u_int32_t				myBufferSize;
+	struct vnode *			myDevPtr;
+	unsigned int			myBlockRun;
+	u_int32_t				myBlocksInBufferCount;
+
+	// release old buffer if we have one
+	if ( theScanStatePtr->bufferPtr != NULL )
+	{
+	        buf_markinvalid(theScanStatePtr->bufferPtr);
+		buf_brelse( theScanStatePtr->bufferPtr );
+		theScanStatePtr->bufferPtr = NULL;
+		theScanStatePtr->currentNodePtr = NULL;
+	}
+	
+	myBTreeCBPtr = theScanStatePtr->btcb;
+			
+	// map logical block in catalog btree file to physical block on volume
+	myErr = hfs_bmap(myBTreeCBPtr->fileRefNum, theScanStatePtr->nodeNum, 
+	                 &myDevPtr, &myPhyBlockNum, &myBlockRun);
+	if ( myErr != E_NONE )
+	{
+		goto ExitThisRoutine;
+	}
+
+	// bmap block run gives us the remaining number of valid blocks (number of blocks 
+	// minus the first).  so if there are 10 valid blocks our run number will be 9.
+	// blocks, in our case is the same as nodes (both are 4K)
+	myBlocksInBufferCount = (theScanStatePtr->bufferSize / myBTreeCBPtr->nodeSize );
+	myBufferSize = theScanStatePtr->bufferSize;
+	if ( (myBlockRun + 1) < myBlocksInBufferCount )
+	{
+		myBufferSize = (myBlockRun + 1) * myBTreeCBPtr->nodeSize;
+	}
+	
+	// now read blocks from the device 
+	myErr = (int)buf_meta_bread(myDevPtr, 
+	                       myPhyBlockNum, 
+	                       myBufferSize,  
+	                       NOCRED, 
+	                       &theScanStatePtr->bufferPtr );
+	if ( myErr != E_NONE )
+	{
+		goto ExitThisRoutine;
+	}
+
+	theScanStatePtr->nodesLeftInBuffer = buf_count(theScanStatePtr->bufferPtr) / theScanStatePtr->btcb->nodeSize;
+	theScanStatePtr->currentNodePtr = (BTNodeDescriptor *) buf_dataptr(theScanStatePtr->bufferPtr);
+
+ExitThisRoutine:
+	return myErr;
+	
+} /* ReadMultipleNodes */
+
+
+
+//_________________________________________________________________________________
+//
+//	Routine:	BTScanInitialize
+//
+//	Purpose:	Prepare to start a new BTree scan, or resume a previous one.
+//
+//	Inputs:
+//		btreeFile		The B-Tree's file control block
+//		startingNode	Initial node number
+//		startingRecord	Initial record number within node
+//		recordsFound	Number of valid records found so far
+//		bufferSize		Size (in bytes) of buffer
+//
+//	Outputs:
+//		scanState		Scanner's current state; pass to other scanner calls
+//
+//	Notes:
+//		To begin a new scan and see all records in the B-Tree, pass zeroes for
+//		startingNode, startingRecord, and recordsFound.
+//
+//		To resume a scan from the point of a previous BTScanTerminate, use the
+//		values returned by BTScanTerminate as input for startingNode, startingRecord,
+//		and recordsFound.
+//
+//		When resuming a scan, the caller should check the B-tree's write count.  If
+//		it is different from the write count when the scan was terminated, then the
+//		tree may have changed and the current state may be incorrect.  In particular,
+//		you may see some records more than once, or never see some records.  Also,
+//		the scanner may not be able to detect when all leaf records have been seen,
+//		and will have to scan through many empty nodes.
+//
+//		XXXÊPerhaps the write count should be managed by BTScanInitialize and
+//		XXX BTScanTerminate?  This would avoid the caller having to peek at
+//		XXX internal B-Tree structures.
+//_________________________________________________________________________________
+
+int		BTScanInitialize(	const FCB *		btreeFile,
+							u_int32_t		startingNode,
+							u_int32_t		startingRecord,
+							u_int32_t		recordsFound,
+							u_int32_t		bufferSize,
+							BTScanState	*	scanState     )
+{
+	BTreeControlBlock	*btcb;
+	
+	//
+	//	Make sure this is a valid B-Tree file
+	//
+	btcb = (BTreeControlBlock *) btreeFile->fcbBTCBPtr;
+	if (btcb == NULL)
+		return fsBTInvalidFileErr;
+	
+	//
+	//	Make sure buffer size is big enough, and a multiple of the
+	//	B-Tree node size
+	//
+	if ( bufferSize < btcb->nodeSize )
+		return paramErr;
+	bufferSize = (bufferSize / btcb->nodeSize) * btcb->nodeSize;
+
+	//
+	//	Set up the scanner's state
+	//
+	scanState->bufferSize			= bufferSize;
+	scanState->bufferPtr 			= NULL;
+	scanState->btcb					= btcb;
+	scanState->nodeNum				= startingNode;
+	scanState->recordNum			= startingRecord;
+	scanState->currentNodePtr		= NULL;
+	scanState->nodesLeftInBuffer	= 0;		// no nodes currently in buffer
+	scanState->recordsFound			= recordsFound;
+	microuptime(&scanState->startTime);			// initialize our throttle
+		
+	return noErr;
+	
+} /* BTScanInitialize */
+
+
+//_________________________________________________________________________________
+//
+//	Routine:	BTScanTerminate
+//
+//	Purpose:	Return state information about a scan so that it can be resumed
+//				later via BTScanInitialize.
+//
+//	Inputs:
+//		scanState		Scanner's current state
+//
+//	Outputs:
+//		nextNode		Node number to resume a scan (pass to BTScanInitialize)
+//		nextRecord		Record number to resume a scan (pass to BTScanInitialize)
+//		recordsFound	Valid records seen so far (pass to BTScanInitialize)
+//_________________________________________________________________________________
+
+int	 BTScanTerminate(	BTScanState *		scanState,
+						u_int32_t *			startingNode,
+						u_int32_t *			startingRecord,
+						u_int32_t *			recordsFound	)
+{
+	*startingNode	= scanState->nodeNum;
+	*startingRecord	= scanState->recordNum;
+	*recordsFound	= scanState->recordsFound;
+
+	if ( scanState->bufferPtr != NULL )
+	{
+		buf_markinvalid(scanState->bufferPtr);
+		buf_brelse( scanState->bufferPtr );
+		scanState->bufferPtr = NULL;
+		scanState->currentNodePtr = NULL;
+	}
+	
+	return noErr;
+	
+} /* BTScanTerminate */
+
+
diff --git a/core/BTreeScanner.h b/core/BTreeScanner.h
new file mode 100644
index 0000000..05a1043
--- /dev/null
+++ b/core/BTreeScanner.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1996-2004 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ *
+ *	@(#)BTreeScanner.h
+ */
+
+#ifndef	_BTREESCANNER_H_
+#define _BTREESCANNER_H_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <sys/time.h>
+
+#include "FileMgrInternal.h"
+#include "BTreesPrivate.h"
+
+// amount of time we are allowed to process a catalog search (in µ secs)
+// NOTE - code assumes kMaxMicroSecsInKernel is less than 1,000,000
+enum { kMaxMicroSecsInKernel = (1000 * 100) };	// 1 tenth of a second
+
+// btree node scanner buffer size.  at 32K we get 8 nodes.  this is the size used
+// in Mac OS 9
+enum { kCatSearchBufferSize = (32 * 1024) };
+
+
+/*
+ * ============ W A R N I N G ! ============
+ * DO NOT INCREASE THE SIZE OF THIS STRUCT!
+ * It must be less than or equal to the size of 
+ * the opaque searchstate struct (in sys/attr.h).
+ */
+/* Private description used in hfs_search */
+struct CatPosition 
+{
+  u_int32_t		writeCount;    	/* The BTree's write count (to see if the catalog writeCount */
+                              	/* changed since the last search).  If 0, the rest */
+                             	/* of the record is invalid, start from beginning. */
+  u_int32_t     nextNode;     	/* node number to resume search */
+  u_int32_t   	nextRecord;  	/* record number to resume search */
+  u_int32_t   	recordsFound; 	/* number of leaf records seen so far */
+};
+typedef struct CatPosition              CatPosition;
+
+
+/*
+	BTScanState - This structure is used to keep track of the current state
+	of a BTree scan.  It contains both the dynamic state information (like
+	the current node number and record number) and information that is static
+	for the duration of a scan (such as buffer pointers).
+	
+	NOTE: recordNum may equal or exceed the number of records in the node
+	number nodeNum.  If so, then the next attempt to get a record will move
+	to a new node number.
+*/
+struct BTScanState 
+{
+	//	The following fields are set up once at initialization time.
+	//	They are not changed during a scan.
+	u_int32_t			bufferSize;
+	struct buf *		bufferPtr;
+	BTreeControlBlock *	btcb;
+	
+	//	The following fields are the dynamic state of the current scan.
+	u_int32_t			nodeNum;			// zero is first node
+	u_int32_t			recordNum;			// zero is first record
+	BTNodeDescriptor *	currentNodePtr;		// points to current node within buffer
+	u_int32_t			nodesLeftInBuffer;	// number of valid nodes still in the buffer
+	u_int32_t			recordsFound;		// number of leaf records seen so far
+	struct timeval		startTime;			// time we started catalog search
+};
+typedef struct BTScanState BTScanState;
+
+
+/* *********************** PROTOTYPES *********************** */
+
+int	BTScanInitialize(	const FCB *		btreeFile,
+						u_int32_t		startingNode,
+						u_int32_t		startingRecord,
+						u_int32_t		recordsFound,
+						u_int32_t		bufferSize,
+						BTScanState	*	scanState     );
+							
+int BTScanNextRecord(	BTScanState *	scanState,
+						Boolean			avoidIO,
+						void * *		key,
+						void * *		data,
+						u_int32_t *		dataSize  );
+
+int	BTScanTerminate(	BTScanState *	scanState,
+						u_int32_t *		startingNode,
+						u_int32_t *		startingRecord,
+						u_int32_t *		recordsFound	);
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* !_BTREESCANNER_H_ */
diff --git a/core/BTreeTreeOps.c b/core/BTreeTreeOps.c
new file mode 100644
index 0000000..74cd04e
--- /dev/null
+++ b/core/BTreeTreeOps.c
@@ -0,0 +1,1338 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTreeTreeOps.c
+
+	Contains:	Multi-node tree operations for the BTree Module.
+
+	Version:	xxx put the technology version here xxx
+
+	Written by:	Gordon Sheridan and Bill Bruffey
+
+	Copyright:	(c) 1992-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(msd)	Mark Day
+		(DSH)	Deric Horn
+		(djb)	Don Brady
+
+	Change History (most recent first):
+
+	   <MOSXS>	  6/1/99	djb		Sync up with Mac OS 8.6.
+	   <CS5>	 12/8/97	djb		Radar #2200632, CollapseTree wasn't marking root node dirty.
+	   <CS4>	11/24/97	djb		Radar #2005325, InsertLevel incorrectly handled root splits!
+	   <CS3>	10/17/97	msd		Conditionalize DebugStrs.
+	   <CS2>	 5/16/97	msd		InsertNode() needs a return statement in ErrorExit.
+	   <CS1>	 4/23/97	djb		first checked in
+
+	  <HFS8>	 3/17/97	DSH		Conditionalize out Panic assertion for SC.
+	  <HFS7>	  3/3/97	djb		Removed DebugStr in InsertLevel.
+	  <HFS6>	 2/19/97	djb		Major re-write of insert code; added InsertLevel and InsertNode.
+	  <HFS5>	 1/27/97	djb		InsertTree and DeleteTree are now recursive and support variable
+									sized index keys.
+	  <HFS4>	 1/16/97	djb		Removed DebugStr in SearchTree. Added initial support for
+									variable sized index keys.
+	  <HFS3>	  1/3/97	djb		Changed len8 to length8.
+	  <HFS2>	  1/3/97	djb		Added support for large keys.
+	  <HFS1>	12/19/96	djb		first checked in
+
+	History applicable to original Scarecrow Design:
+
+		 <3>	10/25/96	ser		Changing for new VFPI
+		 <2>	 1/22/96	dkh		Add #include Memory.h
+		 <1>	10/18/95	rst		Moved from Scarecrow project.
+
+		<12>	 7/18/95	mbb		Change MoveData & ClearBytes to BlockMoveData & BlockZero.
+		<11>	 9/30/94	prp		Get in sync with D2 interface changes.
+		<10>	 7/25/94	wjk		Eliminate usage of BytePtr in favor of UInt8 *.
+		 <9>	 7/22/94	wjk		Convert to the new set of header files.
+		 <8>	 12/2/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		 <7>	11/30/93	wjk		Change some Ptr's to BytePtr's in function definitions so they
+									agree with their prototypes.
+		 <6>	 5/21/93	gs		Debug DeleteTree. Modify InsertTree for BTReplaceRecord.
+		 <5>	 5/10/93	gs		Modify RotateLeft, and add DeleteTree, CollapseTree routines.
+		 <4>	 3/23/93	gs		revise RotateLeft to use InsertKeyRecord instead of
+									InsertRecord.
+		 <3>	 3/23/93	gs		Implement SplitLeft, InsertTree routine.
+		 <2>	  2/8/93	gs		Implement SearchTree, and RotateLeft.
+		 <1>	11/15/92	gs		first checked in
+
+*/
+
+#include "BTreesPrivate.h"
+#include "hfs_btreeio.h"
+
+//
+/////////////////////// Routines Internal To BTree Module ///////////////////////
+//
+//	SearchTree
+//	InsertTree
+//
+////////////////////// Routines Internal To BTreeTreeOps.c //////////////////////
+
+static OSStatus   AddNewRootNode	(BTreeControlBlockPtr		 btreePtr,
+									 NodeDescPtr				 leftNode,
+									 NodeDescPtr				 rightNode );
+
+static OSStatus   CollapseTree		(BTreeControlBlockPtr		 btreePtr,
+									 BlockDescriptor			*blockPtr );
+
+static OSStatus   RotateLeft		(BTreeControlBlockPtr		 btreePtr,
+									 NodeDescPtr				 leftNode,
+									 NodeDescPtr				 rightNode,
+									 u_int16_t					 rightInsertIndex,
+									 KeyPtr						 keyPtr,
+									 u_int8_t *					 recPtr,
+									 u_int16_t					 recSize,
+									 u_int16_t					*insertIndex,
+									 u_int32_t					*insertNodeNum,
+									 Boolean					*recordFit,
+									 u_int16_t					*recsRotated );
+
+static Boolean	   RotateRecordLeft	(BTreeControlBlockPtr		 btreePtr,
+									 NodeDescPtr				 leftNode,
+									 NodeDescPtr				 rightNode );
+
+static OSStatus	   SplitLeft		(BTreeControlBlockPtr		 btreePtr,
+									 BlockDescriptor			*leftNode,
+									 BlockDescriptor			*rightNode,
+									 u_int32_t					 rightNodeNum,
+									 u_int16_t					 index,
+									 KeyPtr						 keyPtr,
+									 u_int8_t *					 recPtr,
+									 u_int16_t					 recSize,
+									 u_int16_t					*insertIndex,
+									 u_int32_t					*insertNodeNum,
+									 u_int16_t					*recsRotated );
+								 
+
+
+static	OSStatus	InsertLevel		(BTreeControlBlockPtr		 btreePtr,
+									 TreePathTable				 treePathTable,
+									 InsertKey					*primaryKey,
+									 InsertKey					*secondaryKey,
+									 BlockDescriptor			*targetNode,
+									 u_int16_t					 index,
+									 u_int16_t					 level,
+									 u_int32_t					*insertNode );
+						 
+static OSErr		InsertNode 		(BTreeControlBlockPtr		 btreePtr,
+									 InsertKey					*key,
+									 BlockDescriptor			*rightNode,
+									 u_int32_t					 node,
+									 u_int16_t	 				 index,
+									 u_int32_t					*newNode,	
+									 u_int16_t					*newIndex,
+									 BlockDescriptor			*leftNode,
+									 Boolean					*updateParent,
+									 Boolean					*insertParent,
+									 Boolean					*rootSplit );
+									 
+static u_int16_t		GetKeyLength	(const BTreeControlBlock *btreePtr,
+									 const BTreeKey *key,
+									 Boolean forLeafNode );
+
+
+
+//////////////////////// BTree Multi-node Tree Operations ///////////////////////
+
+
+/*-------------------------------------------------------------------------------
+
+Routine:	SearchTree	-	Search BTree for key and set up Tree Path Table.
+
+Function:	Searches BTree for specified key, setting up the Tree Path Table to
+			reflect the search path.
+
+
+Input:		btreePtr		- pointer to control block of BTree to search
+			keyPtr			- pointer to the key to search for
+			treePathTable	- pointer to the tree path table to construct
+			
+Output:		nodeNum			- number of the node containing the key position
+			iterator		- BTreeIterator specifying record or insert position
+			
+Result:		noErr			- key found, index is record index
+			fsBTRecordNotFoundErr	- key not found, index is insert index
+			fsBTEmptyErr		- key not found, return params are nil
+			otherwise			- catastrophic failure (GetNode/ReleaseNode failed)
+-------------------------------------------------------------------------------*/
+
+OSStatus	SearchTree	(BTreeControlBlockPtr	 btreePtr,
+						 BTreeKeyPtr			 searchKey,
+						 TreePathTable			 treePathTable,
+						 u_int32_t				*nodeNum,
+						 BlockDescriptor		*nodePtr,
+						 u_int16_t				*returnIndex )
+{
+	OSStatus	err;
+	int16_t		level;					//	Expected depth of current node
+	u_int32_t	curNodeNum;				//	Current node we're searching
+	NodeRec		nodeRec;
+	u_int16_t	index;
+	Boolean		keyFound;
+	int8_t		nodeKind;				//	Kind of current node (index/leaf)
+	KeyPtr		keyPtr;
+	u_int8_t *	dataPtr;
+	u_int16_t	dataSize;
+	
+	
+	curNodeNum		= btreePtr->rootNode;
+	level			= btreePtr->treeDepth;
+	
+	if (level == 0)						// is the tree empty?
+	{
+		err = fsBTEmptyErr;
+		goto ErrorExit;
+	}
+	
+	// for debugging...
+	treePathTable [0].node		= 0;
+	treePathTable [0].index		= 0;
+
+	while (true)
+	{
+        //
+        //	[2550929] Node number 0 is the header node.  It is never a valid
+        //	index or leaf node.  If we're ever asked to search through node 0,
+        //	something has gone wrong (typically a bad child node number, or
+        //	we found a node full of zeroes that we thought was an index node).
+        //
+        if (curNodeNum == 0)
+        {
+//          Panic("SearchTree: curNodeNum is zero!");
+            err = btBadNode;
+            goto ErrorExit;
+        }
+        
+        err = GetNode (btreePtr, curNodeNum, 0, &nodeRec);
+        if (err != noErr)
+        {
+                goto ErrorExit;
+        }
+		
+        //
+        //	[2550929] Sanity check the node height and node type.  We expect
+        //	particular values at each iteration in the search.  This checking
+        //	quickly finds bad pointers, loops, and other damage to the
+        //	hierarchy of the B-tree.
+        //
+        if (((BTNodeDescriptor*)nodeRec.buffer)->height != level)
+        {
+//		Panic("Incorrect node height");
+                err = btBadNode;
+                goto ReleaseAndExit;
+        }
+        nodeKind = ((BTNodeDescriptor*)nodeRec.buffer)->kind;
+        if (level == 1)
+        {
+            //	Nodes at level 1 must be leaves, by definition
+            if (nodeKind != kBTLeafNode)
+            {
+ //		Panic("Incorrect node type: expected leaf");
+                err = btBadNode;
+                goto ReleaseAndExit;           
+            }
+        }
+        else
+        {
+            //	A node at any other depth must be an index node
+            if (nodeKind != kBTIndexNode)
+            {
+//		Panic("Incorrect node type: expected index");
+                err = btBadNode;
+                goto ReleaseAndExit;
+            }
+        }
+        
+        keyFound = SearchNode (btreePtr, nodeRec.buffer, searchKey, &index);
+
+        treePathTable [level].node		= curNodeNum;
+
+        if (nodeKind == kBTLeafNode)
+        {
+                treePathTable [level].index = index;
+                break;			// were done...
+        }
+        
+        if ( (keyFound != true) && (index != 0))
+                --index;
+
+        treePathTable [level].index = index;
+        
+        err = GetRecordByIndex (btreePtr, nodeRec.buffer, index, &keyPtr, &dataPtr, &dataSize);
+        if (err != noErr)
+        {
+            //	[2550929] If we got an error, it is probably because the index was bad
+            //	(typically a corrupt node that confused SearchNode).  Invalidate the node
+            //	so we won't accidentally use the corrupted contents.  NOTE: the Mac OS 9
+            //	sources call this InvalidateNode.
+            
+                (void) TrashNode(btreePtr, &nodeRec);
+                goto ErrorExit;
+        }
+        
+        //	Get the child pointer out of this index node.  We're now done with the current
+        //	node and can continue the search with the child node.
+        curNodeNum = *(u_int32_t *)dataPtr;
+        err = ReleaseNode (btreePtr, &nodeRec);
+        if (err != noErr)
+        {
+                goto ErrorExit;
+        }
+
+        //	The child node should be at a level one less than the parent.
+        --level;
+	}
+	
+	*nodeNum			= curNodeNum;
+	*nodePtr			= nodeRec;
+	*returnIndex		= index;
+
+	if (keyFound)
+		return	noErr;			// searchKey found, index identifies record in node
+	else
+		return	fsBTRecordNotFoundErr;	// searchKey not found, index identifies insert point
+
+ReleaseAndExit:
+    (void) ReleaseNode(btreePtr, &nodeRec);
+    //	fall into ErrorExit
+
+ErrorExit:
+	
+	*nodeNum					= 0;
+	nodePtr->buffer				= nil;
+	nodePtr->blockHeader		= nil;
+	*returnIndex				= 0;
+
+	return	err;
+}
+
+
+
+
+////////////////////////////////// InsertTree ///////////////////////////////////
+
+OSStatus	InsertTree ( BTreeControlBlockPtr		 btreePtr,
+						 TreePathTable				 treePathTable,
+						 KeyPtr						 keyPtr,
+						 u_int8_t *					 recPtr,
+						 u_int16_t					 recSize,
+						 BlockDescriptor			*targetNode,
+						 u_int16_t					 index,
+						 u_int16_t					 level,
+						 Boolean					 replacingKey,
+						 u_int32_t					*insertNode )
+{
+	InsertKey			primaryKey;
+	OSStatus			err;
+
+	primaryKey.keyPtr		= keyPtr;
+	primaryKey.keyLength	= GetKeyLength(btreePtr, primaryKey.keyPtr, (level == 1));
+	primaryKey.recPtr		= recPtr;
+	primaryKey.recSize		= recSize;
+	primaryKey.replacingKey	= replacingKey;
+	primaryKey.skipRotate	= false;
+
+	err	= InsertLevel (btreePtr, treePathTable, &primaryKey, nil,
+					   targetNode, index, level, insertNode );
+						
+	return err;
+
+} // End of InsertTree
+
+
+////////////////////////////////// InsertLevel //////////////////////////////////
+
+OSStatus	InsertLevel (BTreeControlBlockPtr		 btreePtr,
+						 TreePathTable				 treePathTable,
+						 InsertKey					*primaryKey,
+						 InsertKey					*secondaryKey,
+						 BlockDescriptor			*targetNode,
+						 u_int16_t					 index,
+						 u_int16_t					 level,
+						 u_int32_t					*insertNode )
+{
+	OSStatus			 err;
+	BlockDescriptor		 leftNode;
+	u_int32_t			 targetNodeNum;
+	u_int32_t			 newNodeNum;
+	u_int16_t			 newIndex;
+	Boolean				 insertParent;
+	Boolean				 updateParent;
+	Boolean				 newRoot;
+	InsertKey			insertKey;
+
+#if defined(applec) && !defined(__SC__)
+	PanicIf ((level == 1) && (((NodeDescPtr)targetNode->buffer)->kind != kBTLeafNode), " InsertLevel: non-leaf at level 1! ");
+#endif
+	leftNode.buffer = nil;
+	leftNode.blockHeader = nil;
+	targetNodeNum = treePathTable [level].node;
+
+	insertParent = false;
+	updateParent = false;
+
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, targetNode);
+
+	////// process first insert //////
+
+	err = InsertNode (btreePtr, primaryKey, targetNode, targetNodeNum, index,
+					  &newNodeNum, &newIndex, &leftNode, &updateParent, &insertParent, &newRoot );
+	M_ExitOnError (err);
+
+	if ( newRoot )
+	{
+		// Extend the treePathTable by adding an entry for the new
+		// root node that references the current targetNode.
+		// 
+		// If inserting the secondaryKey changes the first key of
+		// the target node, then we'll have to update the second
+		// key in the new root node.
+
+		treePathTable [level + 1].node  = btreePtr->rootNode;
+		treePathTable [level + 1].index = 1;	// 1 since we always split/rotate left
+	}
+	
+	if ( level == 1 )
+		*insertNode = newNodeNum;		
+	
+	////// process second insert (if any) //////
+
+	if  ( secondaryKey != nil )
+	{
+		Boolean				temp;
+
+		err = InsertNode (btreePtr, secondaryKey, targetNode, newNodeNum, newIndex,
+						  &newNodeNum, &newIndex, &leftNode, &updateParent, &insertParent, &temp);
+		M_ExitOnError (err);
+	}
+
+	//////////////////////// Update Parent(s) ///////////////////////////////
+
+	if ( insertParent || updateParent )
+	{
+		BlockDescriptor		parentNode;
+		u_int32_t			parentNodeNum;
+		KeyPtr				keyPtr;
+		u_int8_t *			recPtr;
+		u_int16_t			recSize;
+		
+		parentNode.buffer = nil;
+		parentNode.blockHeader = nil;
+
+		secondaryKey = nil;
+		
+		PanicIf ( (level == btreePtr->treeDepth), " InsertLevel: unfinished insert!?");
+
+		++level;
+
+		// Get Parent Node data...
+		index = treePathTable [level].index;
+		parentNodeNum = treePathTable [level].node;
+
+		PanicIf ( parentNodeNum == 0, " InsertLevel: parent node is zero!?");
+
+		err = GetNode (btreePtr, parentNodeNum, 0, &parentNode);	// released as target node in next level up
+		M_ExitOnError (err);
+		////////////////////////// Update Parent Index //////////////////////////////
+	
+		if ( updateParent )
+		{
+			// XXXdbg
+			ModifyBlockStart(btreePtr->fileRefNum, &parentNode);
+
+			// debug: check if ptr == targetNodeNum
+			GetRecordByIndex (btreePtr, parentNode.buffer, index, &keyPtr, &recPtr, &recSize);
+			PanicIf( (*(u_int32_t *) recPtr) != targetNodeNum, " InsertLevel: parent ptr doesn't match target node!");
+			
+			// need to delete and re-insert this parent key/ptr
+			// we delete it here and it gets re-inserted in the
+			// InsertLevel call below.
+			DeleteRecord (btreePtr, parentNode.buffer, index);
+	
+			primaryKey->keyPtr		 = (KeyPtr) GetRecordAddress( btreePtr, targetNode->buffer, 0 );
+			primaryKey->keyLength	 = GetKeyLength(btreePtr, primaryKey->keyPtr, false);
+			primaryKey->recPtr		 = (u_int8_t *) &targetNodeNum;
+			primaryKey->recSize		 = sizeof(targetNodeNum);
+			primaryKey->replacingKey = kReplaceRecord;
+			primaryKey->skipRotate   = insertParent;		// don't rotate left if we have two inserts occuring
+		}
+	
+		////////////////////////// Add New Parent Index /////////////////////////////
+	
+		if ( insertParent )
+		{
+			InsertKey	*insertKeyPtr;
+			
+			if ( updateParent )
+			{
+				insertKeyPtr = &insertKey;
+				secondaryKey = &insertKey;
+			}
+			else
+			{
+				insertKeyPtr = primaryKey;
+			}
+			
+			insertKeyPtr->keyPtr		= (KeyPtr) GetRecordAddress (btreePtr, leftNode.buffer, 0);
+			insertKeyPtr->keyLength		= GetKeyLength(btreePtr, insertKeyPtr->keyPtr, false);
+			insertKeyPtr->recPtr		= (u_int8_t *) &((NodeDescPtr)targetNode->buffer)->bLink;
+			insertKeyPtr->recSize		= sizeof(u_int32_t);
+			insertKeyPtr->replacingKey	= kInsertRecord;
+			insertKeyPtr->skipRotate	= false;		// a rotate is OK during second insert
+		}	
+		
+		err = InsertLevel (btreePtr, treePathTable, primaryKey, secondaryKey,
+						   &parentNode, index, level, insertNode );
+		M_ExitOnError (err);
+	}
+
+	err = UpdateNode (btreePtr, targetNode, 0, kLockTransaction);	// all done with target
+	M_ExitOnError (err);
+
+	err = UpdateNode (btreePtr, &leftNode, 0, kLockTransaction);		// all done with left sibling
+	M_ExitOnError (err);
+	
+	return	noErr;
+
+ErrorExit:
+
+	(void) ReleaseNode (btreePtr, targetNode);
+	(void) ReleaseNode (btreePtr, &leftNode);
+
+	Panic (" InsertLevel: an error occurred!");
+
+	return	err;
+
+} // End of InsertLevel
+
+
+
+////////////////////////////////// InsertNode ///////////////////////////////////
+
+static OSErr	InsertNode	(BTreeControlBlockPtr	 btreePtr,
+							 InsertKey				*key,
+
+							 BlockDescriptor		*rightNode,
+							 u_int32_t				 node,
+							 u_int16_t	 			 index,
+
+							 u_int32_t				*newNode,	
+							 u_int16_t				*newIndex,
+
+							 BlockDescriptor		*leftNode,
+							 Boolean				*updateParent,
+							 Boolean				*insertParent,
+							 Boolean				*rootSplit )
+{
+	BlockDescriptor		*targetNode = NULL;
+	u_int32_t			 leftNodeNum;
+	u_int16_t			 recsRotated;
+	OSErr				 err;
+	Boolean				 recordFit;
+
+	*rootSplit = false;
+	
+	PanicIf ( rightNode->buffer == leftNode->buffer, " InsertNode: rightNode == leftNode, huh?");
+	
+	leftNodeNum = ((NodeDescPtr) rightNode->buffer)->bLink;
+
+
+	/////////////////////// Try Simple Insert ///////////////////////////////
+
+	/* sanity check our left and right nodes here. */
+	if (node == leftNodeNum) {
+		if (leftNode->buffer == NULL) {
+			err = fsBTInvalidNodeErr;
+			M_ExitOnError(err);	
+		}
+		else{
+			targetNode = leftNode;
+		}
+	}
+	else {
+		// we can assume right node is initialized.
+		targetNode = rightNode;	
+	}
+	
+	
+	recordFit = InsertKeyRecord (btreePtr, targetNode->buffer, index, key->keyPtr, key->keyLength, key->recPtr, key->recSize);
+
+	if ( recordFit )
+	{
+		*newNode  = node;
+		*newIndex = index;
+	
+		if ( (index == 0) && (((NodeDescPtr) targetNode->buffer)->height != btreePtr->treeDepth) )
+			*updateParent = true;	// the first record changed so we need to update the parent
+	}
+
+
+	//////////////////////// Try Rotate Left ////////////////////////////////
+	
+	if ( !recordFit && leftNodeNum > 0 )
+	{
+		PanicIf ( leftNode->buffer != nil, " InsertNode: leftNode already acquired!");
+
+		if ( leftNode->buffer == nil )
+		{
+			err = GetNode (btreePtr, leftNodeNum, 0, leftNode);	// will be released by caller or a split below
+			M_ExitOnError (err);
+			// XXXdbg
+			ModifyBlockStart(btreePtr->fileRefNum, leftNode);
+		}
+
+		PanicIf ( ((NodeDescPtr) leftNode->buffer)->fLink != node, " InsertNode, RotateLeft: invalid sibling link!" );
+
+		if ( !key->skipRotate )		// are rotates allowed?
+		{
+			err = RotateLeft (btreePtr, leftNode->buffer, rightNode->buffer, index, key->keyPtr, key->recPtr,
+							  key->recSize, newIndex, newNode, &recordFit, &recsRotated );	
+			M_ExitOnError (err);
+
+			if ( recordFit )
+			{
+				if ( key->replacingKey || (recsRotated > 1) || (index > 0) )
+					*updateParent = true;			
+			}
+		}
+	}	
+
+
+	//////////////////////// Try Split Left /////////////////////////////////
+
+	if ( !recordFit )
+	{
+		// might not have left node...
+		err = SplitLeft (btreePtr, leftNode, rightNode, node, index, key->keyPtr,
+						 key->recPtr, key->recSize, newIndex, newNode, &recsRotated);
+		M_ExitOnError (err);
+
+		// if we split root node - add new root
+		
+		if ( ((NodeDescPtr) rightNode->buffer)->height == btreePtr->treeDepth )
+		{
+			err = AddNewRootNode (btreePtr, leftNode->buffer, rightNode->buffer);	// Note: does not update TPT
+			M_ExitOnError (err);
+			*rootSplit = true;
+		}
+		else
+		{
+			*insertParent = true;
+
+			if ( key->replacingKey || (recsRotated > 1) || (index > 0) )
+				*updateParent = true;
+		}
+	}
+	
+	return noErr;
+
+ErrorExit:
+	(void) ReleaseNode (btreePtr, leftNode);
+	return err;
+	
+} // End of InsertNode
+
+
+/*-------------------------------------------------------------------------------
+Routine:	DeleteTree	-	One_line_description.
+
+Function:	Brief_description_of_the_function_and_any_side_effects
+
+ToDo:		
+
+Input:		btreePtr		- description
+			treePathTable	- description
+			targetNode		- description
+			index			- description
+						
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+OSStatus	DeleteTree			(BTreeControlBlockPtr		 btreePtr,
+								 TreePathTable				 treePathTable,
+								 BlockDescriptor			*targetNode,
+								 u_int16_t					 index,
+								 u_int16_t					 level )
+{
+	OSStatus			err;
+	BlockDescriptor		parentNode;
+	BTNodeDescriptor	*targetNodePtr;
+	u_int32_t			targetNodeNum;
+	Boolean				deleteRequired;
+	Boolean				updateRequired;
+
+	// XXXdbg - initialize these to null in case we get an
+	//          error and try to exit before it's initialized
+	parentNode.buffer      = nil;	
+	parentNode.blockHeader = nil;
+	
+	deleteRequired = false;
+	updateRequired = false;
+
+	targetNodeNum = treePathTable[level].node;
+	targetNodePtr = targetNode->buffer;
+	PanicIf (targetNodePtr == nil, "DeleteTree: targetNode has nil buffer!");
+
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, targetNode);
+
+	DeleteRecord (btreePtr, targetNodePtr, index);
+		
+	// coalesce remaining records?
+
+	if ( targetNodePtr->numRecords == 0 )	// did we delete the last record?
+	{
+		BlockDescriptor		siblingNode;
+		u_int32_t			siblingNodeNum;
+
+		deleteRequired = true;
+		
+		siblingNode.buffer = nil;
+		siblingNode.blockHeader = nil;
+
+		////////////////// Get Siblings & Update Links //////////////////////////
+		
+		siblingNodeNum = targetNodePtr->bLink;				// Left Sibling Node
+		if ( siblingNodeNum != 0 )
+		{
+			err = GetNode (btreePtr, siblingNodeNum, 0, &siblingNode);
+			M_ExitOnError (err);
+
+			// XXXdbg
+			ModifyBlockStart(btreePtr->fileRefNum, &siblingNode);
+
+			((NodeDescPtr)siblingNode.buffer)->fLink = targetNodePtr->fLink;
+			err = UpdateNode (btreePtr, &siblingNode, 0, kLockTransaction);
+			M_ExitOnError (err);
+		}
+		else if ( targetNodePtr->kind == kBTLeafNode )		// update firstLeafNode
+		{
+			btreePtr->firstLeafNode = targetNodePtr->fLink;
+		}
+
+		siblingNodeNum = targetNodePtr->fLink;				// Right Sibling Node
+		if ( siblingNodeNum != 0 )
+		{
+			err = GetNode (btreePtr, siblingNodeNum, 0, &siblingNode);
+			M_ExitOnError (err);
+
+			// XXXdbg
+			ModifyBlockStart(btreePtr->fileRefNum, &siblingNode);
+
+			((NodeDescPtr)siblingNode.buffer)->bLink = targetNodePtr->bLink;
+			err = UpdateNode (btreePtr, &siblingNode, 0, kLockTransaction);
+			M_ExitOnError (err);
+		}
+		else if ( targetNodePtr->kind == kBTLeafNode )		// update lastLeafNode
+		{
+			btreePtr->lastLeafNode = targetNodePtr->bLink;
+		}
+		
+		//////////////////////// Free Empty Node ////////////////////////////////
+
+		ClearNode (btreePtr, targetNodePtr);
+		
+		err = UpdateNode (btreePtr, targetNode, 0, kLockTransaction);
+		M_ExitOnError (err);
+
+		err = FreeNode (btreePtr, targetNodeNum);
+		M_ExitOnError (err);
+	}
+	else if ( index == 0 )			// did we delete the first record?
+	{
+		updateRequired = true;		// yes, so we need to update parent
+	}
+
+
+	if ( level == btreePtr->treeDepth )		// then targetNode->buffer is the root node
+	{
+		deleteRequired = false;
+		updateRequired = false;
+		
+		if ( targetNode->buffer == nil )	// then root was freed and the btree is empty
+		{
+			btreePtr->rootNode  = 0;
+			btreePtr->treeDepth = 0;
+		}
+		else if ( ((NodeDescPtr)targetNode->buffer)->numRecords == 1 )
+		{
+			err = CollapseTree (btreePtr, targetNode);
+			M_ExitOnError (err);
+		}
+	}
+
+
+	if ( updateRequired || deleteRequired )
+	{
+		++level;	// next level
+
+		//// Get Parent Node and index
+		index = treePathTable [level].index;
+		err = GetNode (btreePtr, treePathTable[level].node, 0, &parentNode);
+		M_ExitOnError (err);
+
+		if ( updateRequired )
+		{
+			 KeyPtr		keyPtr;
+			 u_int8_t *	recPtr;
+			 u_int16_t	recSize;
+			 u_int32_t	insertNode;
+			 
+			 // XXXdbg
+			 ModifyBlockStart(btreePtr->fileRefNum, &parentNode);
+
+			// debug: check if ptr == targetNodeNum
+			GetRecordByIndex (btreePtr, parentNode.buffer, index, &keyPtr, &recPtr, &recSize);
+			PanicIf( (*(u_int32_t *) recPtr) != targetNodeNum, " DeleteTree: parent ptr doesn't match targetNodeNum!!");
+			
+			// need to delete and re-insert this parent key/ptr
+			DeleteRecord (btreePtr, parentNode.buffer, index);
+	
+			keyPtr = (KeyPtr) GetRecordAddress( btreePtr, targetNode->buffer, 0 );
+			recPtr = (u_int8_t *) &targetNodeNum;
+			recSize = sizeof(targetNodeNum);
+			
+			err = InsertTree (btreePtr, treePathTable, keyPtr, recPtr, recSize,
+							  &parentNode, index, level, kReplaceRecord, &insertNode);
+			M_ExitOnError (err);
+		}
+		else // deleteRequired
+		{
+			err = DeleteTree (btreePtr, treePathTable, &parentNode, index, level);
+			M_ExitOnError (err);
+		}
+	}	
+
+
+	err = UpdateNode (btreePtr, targetNode, 0, kLockTransaction);
+	M_ExitOnError (err);
+
+	return	noErr;
+
+ErrorExit:
+
+	(void) ReleaseNode (btreePtr, targetNode);
+	(void) ReleaseNode (btreePtr, &parentNode);
+
+	return	err;
+
+} // end DeleteTree
+
+
+
+///////////////////////////////// CollapseTree //////////////////////////////////
+
+static OSStatus	CollapseTree	(BTreeControlBlockPtr		btreePtr,
+							 	 BlockDescriptor			*blockPtr )
+{
+	OSStatus		err;
+	u_int32_t		originalRoot;
+	u_int32_t		nodeNum;
+	
+	originalRoot	= btreePtr->rootNode;
+	
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, blockPtr);
+
+	while (true)
+	{
+		if ( ((NodeDescPtr)blockPtr->buffer)->numRecords > 1)
+			break;							// this will make a fine root node
+		
+		if ( ((NodeDescPtr)blockPtr->buffer)->kind == kBTLeafNode)
+			break;							// we've hit bottom
+		
+		nodeNum				= btreePtr->rootNode;
+		btreePtr->rootNode	= GetChildNodeNum (btreePtr, blockPtr->buffer, 0);
+		--btreePtr->treeDepth;
+
+		//// Clear and Free Current Old Root Node ////
+		ClearNode (btreePtr, blockPtr->buffer);
+		err = UpdateNode (btreePtr, blockPtr, 0, kLockTransaction);
+		M_ExitOnError (err);
+		err = FreeNode (btreePtr, nodeNum);
+		M_ExitOnError (err);
+		
+		//// Get New Root Node
+		err = GetNode (btreePtr, btreePtr->rootNode, 0, blockPtr);
+		M_ExitOnError (err);
+
+		// XXXdbg
+		ModifyBlockStart(btreePtr->fileRefNum, blockPtr);
+	}
+	
+	if (btreePtr->rootNode != originalRoot)
+		M_BTreeHeaderDirty (btreePtr);
+		
+	err = UpdateNode (btreePtr, blockPtr, 0, kLockTransaction);	// always update!
+	M_ExitOnError (err);
+	
+	return	noErr;
+	
+
+/////////////////////////////////// ErrorExit ///////////////////////////////////
+
+ErrorExit:
+	(void)	ReleaseNode (btreePtr, blockPtr);
+	return	err;
+}
+
+
+
+////////////////////////////////// RotateLeft ///////////////////////////////////
+
+/*-------------------------------------------------------------------------------
+
+Routine:	RotateLeft	-	One_line_description.
+
+Function:	Brief_description_of_the_function_and_any_side_effects
+
+Algorithm:	if rightIndex > insertIndex, subtract 1 for actual rightIndex
+
+Input:		btreePtr			- description
+			leftNode			- description
+			rightNode			- description
+			rightInsertIndex	- description
+			keyPtr				- description
+			recPtr				- description
+			recSize				- description
+			
+Output:		insertIndex
+			insertNodeNum		- description
+			recordFit			- description
+			recsRotated
+			
+Result:		noErr		- success
+			!= noErr	- failure
+-------------------------------------------------------------------------------*/
+
+static OSStatus	RotateLeft		(BTreeControlBlockPtr		 btreePtr,
+								 NodeDescPtr				 leftNode,
+								 NodeDescPtr				 rightNode,
+								 u_int16_t					 rightInsertIndex,
+								 KeyPtr						 keyPtr,
+								 u_int8_t *					 recPtr,
+								 u_int16_t					 recSize,
+								 u_int16_t					*insertIndex,
+								 u_int32_t					*insertNodeNum,
+								 Boolean					*recordFit,
+								 u_int16_t					*recsRotated )
+{
+	OSStatus			err;
+	int32_t				insertSize;
+	int32_t				nodeSize;
+	int32_t				leftSize, rightSize;
+	int32_t				moveSize = 0;
+	u_int16_t			keyLength;
+	u_int16_t			lengthFieldSize;
+	u_int16_t			index, moveIndex;
+	Boolean				didItFit;
+
+	///////////////////// Determine If Record Will Fit //////////////////////////
+	
+	keyLength = GetKeyLength(btreePtr, keyPtr, (rightNode->kind == kBTLeafNode));
+
+	// the key's length field is 8-bits in HFS and 16-bits in HFS+
+	if ( btreePtr->attributes & kBTBigKeysMask )
+		lengthFieldSize = sizeof(u_int16_t);
+	else
+		lengthFieldSize = sizeof(u_int8_t);
+
+	insertSize = keyLength + lengthFieldSize + recSize + sizeof(u_int16_t);
+
+	if ( M_IsOdd (insertSize) )
+		++insertSize;	// add pad byte;
+
+	nodeSize		= btreePtr->nodeSize;
+
+	// add size of insert record to right node
+	rightSize		= nodeSize - GetNodeFreeSize (btreePtr, rightNode) + insertSize;
+	leftSize		= nodeSize - GetNodeFreeSize (btreePtr, leftNode);
+
+	moveIndex	= 0;
+
+	while ( leftSize < rightSize )
+	{
+		if ( moveIndex < rightInsertIndex )
+		{
+			moveSize = GetRecordSize (btreePtr, rightNode, moveIndex) + 2;
+		}
+		else if ( moveIndex == rightInsertIndex )
+		{
+			moveSize = insertSize;
+		}
+		else // ( moveIndex > rightInsertIndex )
+		{
+			moveSize = GetRecordSize (btreePtr, rightNode, moveIndex - 1) + 2;
+		}
+		
+		leftSize	+= moveSize;
+		rightSize	-= moveSize;
+		++moveIndex;
+	}	
+	
+	if ( leftSize > nodeSize )	// undo last move
+	{
+		leftSize	-= moveSize;
+		rightSize	+= moveSize;
+		--moveIndex;
+	}
+	
+	if ( rightSize > nodeSize )	// record won't fit - failure, but not error
+	{
+		*insertIndex	= 0;
+		*insertNodeNum	= 0;
+		*recordFit		= false;
+		*recsRotated	= 0;
+		
+		return	noErr;
+	}
+	
+	// we've found balance point, moveIndex == number of records moved into leftNode
+	
+
+	//////////////////////////// Rotate Records /////////////////////////////////
+
+	*recsRotated	= moveIndex;
+	*recordFit		= true;
+	index			= 0;
+
+	while ( index < moveIndex )
+	{
+		if ( index == rightInsertIndex )	// insert new record in left node
+		{
+			u_int16_t	leftInsertIndex;
+			
+			leftInsertIndex = leftNode->numRecords;
+
+			didItFit = InsertKeyRecord (btreePtr, leftNode, leftInsertIndex,
+										keyPtr, keyLength, recPtr, recSize);
+			if ( !didItFit )
+			{
+				Panic ("RotateLeft: InsertKeyRecord (left) returned false!");
+				err = fsBTBadRotateErr;
+				goto ErrorExit;
+			}
+			
+			*insertIndex = leftInsertIndex;
+			*insertNodeNum = rightNode->bLink;
+		}
+		else
+		{
+			didItFit = RotateRecordLeft (btreePtr, leftNode, rightNode);
+			if ( !didItFit )
+			{
+				Panic ("RotateLeft: RotateRecordLeft returned false!");
+				err = fsBTBadRotateErr;
+				goto ErrorExit;
+			}
+		}
+		
+		++index;
+	}
+	
+	if ( moveIndex <= rightInsertIndex )	// then insert new record in right node
+	{
+		rightInsertIndex -= index;			// adjust for records already rotated
+		
+		didItFit = InsertKeyRecord (btreePtr, rightNode, rightInsertIndex,
+									keyPtr, keyLength, recPtr, recSize);
+		if ( !didItFit )
+		{
+			Panic ("RotateLeft: InsertKeyRecord (right) returned false!");
+			err = fsBTBadRotateErr;
+			goto ErrorExit;
+		}
+	
+		*insertIndex = rightInsertIndex;
+		*insertNodeNum = leftNode->fLink;
+	}
+
+
+	return noErr;
+
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+
+	*insertIndex	= 0;
+	*insertNodeNum	= 0;
+	*recordFit		= false;
+	*recsRotated	= 0;
+	
+	return	err;
+}
+
+
+
+/////////////////////////////////// SplitLeft ///////////////////////////////////
+
+static OSStatus	SplitLeft		(BTreeControlBlockPtr		 btreePtr,
+								 BlockDescriptor			*leftNode,
+								 BlockDescriptor			*rightNode,
+								 u_int32_t					 rightNodeNum,
+								 u_int16_t					 index,
+								 KeyPtr						 keyPtr,
+								 u_int8_t *					 recPtr,
+								 u_int16_t					 recSize,
+								 u_int16_t					*insertIndex,
+								 u_int32_t					*insertNodeNum,
+								 u_int16_t					*recsRotated )
+{
+	OSStatus			err;
+	NodeDescPtr			left, right;
+	u_int32_t			newNodeNum;
+	Boolean				recordFit;
+	
+	
+	///////////////////////////// Compare Nodes /////////////////////////////////
+
+	right = rightNode->buffer;
+	left  = leftNode->buffer;
+	
+	PanicIf ( right->bLink != 0 && left == 0, " SplitLeft: left sibling missing!?" );
+	
+	/* type should be kBTLeafNode or kBTIndexNode */
+	
+	if ( (right->height == 1) && (right->kind != kBTLeafNode) )
+		return	fsBTInvalidNodeErr;
+	
+	if ( left != nil )
+	{
+		if ( left->fLink != rightNodeNum )
+			return fsBTInvalidNodeErr;										// E_BadSibling ?
+	
+		if ( left->height != right->height )
+			return	fsBTInvalidNodeErr;										// E_BadNodeHeight ?
+		
+		if ( left->kind != right->kind )
+			return	fsBTInvalidNodeErr;										// E_BadNodeType ?
+	}
+	
+
+	///////////////////////////// Allocate Node /////////////////////////////////
+
+	err = AllocateNode (btreePtr, &newNodeNum);
+	M_ExitOnError (err);
+	
+
+	/////////////// Update Forward Link In Original Left Node ///////////////////
+
+	if ( left != nil )
+	{
+		// XXXdbg
+		ModifyBlockStart(btreePtr->fileRefNum, leftNode);
+
+		left->fLink	= newNodeNum;
+		err = UpdateNode (btreePtr, leftNode, 0, kLockTransaction);
+		M_ExitOnError (err);
+	}
+
+
+	/////////////////////// Initialize New Left Node ////////////////////////////
+
+	err = GetNewNode (btreePtr, newNodeNum, leftNode);
+	M_ExitOnError (err);
+	
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, leftNode);
+
+	left		= leftNode->buffer;
+	left->fLink	= rightNodeNum;
+	
+
+	// Steal Info From Right Node
+	
+	left->bLink  = right->bLink;
+	left->kind   = right->kind;
+	left->height = right->height;
+	
+	right->bLink		= newNodeNum;			// update Right bLink
+
+	if ( (left->kind == kBTLeafNode) && (left->bLink == 0) )
+	{
+		// if we're adding a new first leaf node - update BTreeInfoRec
+		
+		btreePtr->firstLeafNode = newNodeNum;
+		M_BTreeHeaderDirty (btreePtr);		// AllocateNode should have set the bit already...
+	}
+
+	////////////////////////////// Rotate Left //////////////////////////////////
+
+	err = RotateLeft (btreePtr, left, right, index, keyPtr, recPtr, recSize,
+					  insertIndex, insertNodeNum, &recordFit, recsRotated);
+	
+	M_ExitOnError (err);
+
+	return noErr;
+	
+ErrorExit:
+	
+	(void) ReleaseNode (btreePtr, leftNode);
+	(void) ReleaseNode (btreePtr, rightNode);
+	
+	// Free new node if allocated?
+
+	*insertIndex	= 0;
+	*insertNodeNum	= 0;
+	*recsRotated	= 0;
+	
+	return	err;
+}
+
+
+
+/////////////////////////////// RotateRecordLeft ////////////////////////////////
+
+static Boolean RotateRecordLeft (BTreeControlBlockPtr		btreePtr,
+								 NodeDescPtr				leftNode,
+							 	 NodeDescPtr				rightNode )
+{
+	u_int16_t	size;
+	u_int8_t *	recPtr;
+	Boolean		recordFit;
+	
+	size	= GetRecordSize (btreePtr, rightNode, 0);
+	recPtr	= GetRecordAddress (btreePtr, rightNode, 0);
+	
+	recordFit = InsertRecord (btreePtr, leftNode, leftNode->numRecords, recPtr, size);
+	
+	if ( !recordFit )
+		return false;
+	
+	DeleteRecord (btreePtr, rightNode, 0);
+	
+	return true;
+}
+
+
+//////////////////////////////// AddNewRootNode /////////////////////////////////
+
+static OSStatus	AddNewRootNode	(BTreeControlBlockPtr	 btreePtr,
+								 NodeDescPtr			 leftNode,
+								 NodeDescPtr			 rightNode )
+{
+	OSStatus			err;
+	BlockDescriptor		rootNode;
+	u_int32_t			rootNum;
+	KeyPtr				keyPtr;
+	Boolean				didItFit;
+	u_int16_t			keyLength;	
+	
+	rootNode.buffer = nil;
+	rootNode.blockHeader = nil;
+
+	PanicIf (leftNode == nil, "AddNewRootNode: leftNode == nil");
+	PanicIf (rightNode == nil, "AddNewRootNode: rightNode == nil");
+	
+	
+	/////////////////////// Initialize New Root Node ////////////////////////////
+	
+	err = AllocateNode (btreePtr, &rootNum);
+	M_ExitOnError (err);
+	
+	err = GetNewNode (btreePtr, rootNum, &rootNode);
+	M_ExitOnError (err);
+		
+	// XXXdbg
+	ModifyBlockStart(btreePtr->fileRefNum, &rootNode);
+
+	((NodeDescPtr)rootNode.buffer)->kind = kBTIndexNode;
+	((NodeDescPtr)rootNode.buffer)->height	= ++btreePtr->treeDepth;
+	
+
+	///////////////////// Insert Left Node Index Record /////////////////////////	
+
+	keyPtr = (KeyPtr) GetRecordAddress (btreePtr, leftNode, 0);
+	keyLength = GetKeyLength(btreePtr, keyPtr, false);
+
+	didItFit = InsertKeyRecord ( btreePtr, rootNode.buffer, 0, keyPtr, keyLength,
+								 (u_int8_t *) &rightNode->bLink, 4 );
+
+	PanicIf ( !didItFit, "AddNewRootNode:InsertKeyRecord failed for left index record");
+
+
+	//////////////////// Insert Right Node Index Record /////////////////////////
+
+	keyPtr = (KeyPtr) GetRecordAddress (btreePtr, rightNode, 0);
+	keyLength = GetKeyLength(btreePtr, keyPtr, false);
+
+	didItFit = InsertKeyRecord ( btreePtr, rootNode.buffer, 1, keyPtr, keyLength,
+								 (u_int8_t *) &leftNode->fLink, 4 );
+
+	PanicIf ( !didItFit, "AddNewRootNode:InsertKeyRecord failed for right index record");
+
+	
+	/////////////////////////// Release Root Node ///////////////////////////////
+	
+	err = UpdateNode (btreePtr, &rootNode, 0, kLockTransaction);
+	M_ExitOnError (err);
+	
+	// update BTreeInfoRec
+	
+	btreePtr->rootNode	 = rootNum;
+	M_BTreeHeaderDirty(btreePtr);
+
+	return noErr;
+
+
+	////////////////////////////// Error Exit ///////////////////////////////////
+
+ErrorExit:
+
+	return	err;
+}
+
+
+static u_int16_t	GetKeyLength ( const BTreeControlBlock *btreePtr, const BTreeKey *key, Boolean forLeafNode )
+{
+	u_int16_t length;
+
+	if ( forLeafNode || btreePtr->attributes & kBTVariableIndexKeysMask )
+		length = KeyLength (btreePtr, key);		// just use actual key length
+	else
+		length = btreePtr->maxKeyLength;		// fixed sized index key (i.e. HFS)		// shouldn't we clear the pad bytes?
+
+	return length;
+}
+
diff --git a/core/BTreeWrapper.c b/core/BTreeWrapper.c
new file mode 100644
index 0000000..12ce54d
--- /dev/null
+++ b/core/BTreeWrapper.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2000, 2002, 2005-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include "BTreesPrivate.h"
+#include <sys/kernel.h>
+#include <libkern/libkern.h>
+
+
+// local routines
+static OSErr	CheckBTreeKey(const BTreeKey *key, const BTreeControlBlock *btcb);
+
+#if DEBUG
+static Boolean	ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, u_int16_t recordSize);
+#endif
+
+OSErr ReplaceBTreeRecord(FileReference refNum, const void* key, u_int32_t hint, void *newData, u_int16_t dataSize, u_int32_t *newHint)
+{
+	FSBufferDescriptor	btRecord;
+	struct BTreeIterator *iterator = NULL;
+	FCB					*fcb;
+	BTreeControlBlock	*btcb;
+	OSStatus			result;
+
+	iterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	fcb = GetFileControlBlock(refNum);
+	btcb = (BTreeControlBlock*) fcb->fcbBTCBPtr;
+
+	btRecord.bufferAddress = newData;
+	btRecord.itemSize = dataSize;
+	btRecord.itemCount = 1;
+
+	iterator->hint.nodeNum = hint;
+
+	result = CheckBTreeKey((const BTreeKey *) key, btcb);
+	if (result) {
+		goto ErrorExit;
+	}
+
+	BlockMoveData(key, &iterator->key, CalcKeySize(btcb, (const BTreeKey *) key));		// should we range check against maxkeylen?
+
+#if DEBUG
+	if ( !ValidHFSRecord(newData, btcb, dataSize) )
+		DebugStr("ReplaceBTreeRecord: bad record?");
+#endif
+
+	result = BTReplaceRecord( fcb, iterator, &btRecord, dataSize );
+
+	*newHint = iterator->hint.nodeNum;
+
+ErrorExit:
+
+	hfs_free(iterator, sizeof(*iterator));
+	return result;
+}
+
+
+
+static OSErr CheckBTreeKey(const BTreeKey *key, const BTreeControlBlock *btcb)
+{
+	u_int16_t	keyLen;
+	
+	if ( btcb->attributes & kBTBigKeysMask )
+		keyLen = key->length16;
+	else
+		keyLen = key->length8;
+
+	if ( (keyLen < 6) || (keyLen > btcb->maxKeyLength) )
+	{
+		hfs_debug("CheckBTreeKey: bad key length!");
+		return fsBTInvalidKeyLengthErr;
+	}
+	
+	return noErr;
+}
+
+#if DEBUG
+
+static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, u_int16_t recordSize)
+{
+	u_int32_t			cNodeID;
+	
+	if (btcb->maxKeyLength == kHFSPlusExtentKeyMaximumLength )
+	{
+		return ( recordSize == sizeof(HFSPlusExtentRecord) );
+	}
+#if CONFIG_HFS_STD
+	else if ( btcb->maxKeyLength == kHFSExtentKeyMaximumLength )
+	{
+		return ( recordSize == sizeof(HFSExtentRecord) );
+	}
+#endif
+
+	else // Catalog record
+	{
+		const CatalogRecord *catalogRecord = (const CatalogRecord*) record;
+
+		switch(catalogRecord->recordType)
+		{
+
+#if CONFIG_HFS_STD
+			/*
+			 * HFS standard File/folder records and File/Folder Thread records
+			 * are only valid on configs that support HFS standard.
+			 */
+			case kHFSFolderRecord:
+			{
+				if ( recordSize != sizeof(HFSCatalogFolder) )
+					return false;
+				if ( catalogRecord->hfsFolder.flags != 0 )
+					return false;
+				if ( catalogRecord->hfsFolder.valence > 0x7FFF )
+					return false;
+					
+				cNodeID = catalogRecord->hfsFolder.folderID;
+	
+				if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) )
+					return false;
+			}
+			break;
+
+			case kHFSFileRecord:
+			{
+				const HFSExtentDescriptor	*dataExtent;
+				const HFSExtentDescriptor	*rsrcExtent;
+				
+				if ( recordSize != sizeof(HFSCatalogFile) )
+					return false;								
+				if ( (catalogRecord->hfsFile.flags & ~(0x83)) != 0 )
+					return false;
+					
+				cNodeID = catalogRecord->hfsFile.fileID;
+				
+				if ( cNodeID < 16 )
+					return false;
+		
+				// make sure 0 ¾ LEOF ¾ PEOF for both forks
+				
+				if ( catalogRecord->hfsFile.dataLogicalSize < 0 )
+					return false;
+				if ( catalogRecord->hfsFile.dataPhysicalSize < catalogRecord->hfsFile.dataLogicalSize )
+					return false;
+				if ( catalogRecord->hfsFile.rsrcLogicalSize < 0 )
+					return false;
+				if ( catalogRecord->hfsFile.rsrcPhysicalSize < catalogRecord->hfsFile.rsrcLogicalSize )
+					return false;
+		
+				dataExtent = (const HFSExtentDescriptor*) &catalogRecord->hfsFile.dataExtents;
+				rsrcExtent = (const HFSExtentDescriptor*) &catalogRecord->hfsFile.rsrcExtents;
+	
+#if 0
+				for (i = 0; i < kHFSExtentDensity; ++i)
+				{
+					if ( (dataExtent[i].blockCount > 0) && (dataExtent[i].startBlock == 0) )
+						return false;
+					if ( (rsrcExtent[i].blockCount > 0) && (rsrcExtent[i].startBlock == 0) )
+						return false;
+				}
+#endif
+			}
+			break;
+
+			case kHFSFileThreadRecord:
+			case kHFSFolderThreadRecord:
+			{
+				if ( recordSize != sizeof(HFSCatalogThread) )
+					return false;
+	
+				cNodeID = catalogRecord->hfsThread.parentID;
+				if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) )
+					return false;
+							
+				if ( (catalogRecord->hfsThread.nodeName[0] == 0) ||
+					 (catalogRecord->hfsThread.nodeName[0] > 31) )
+					return false;
+			}
+			break;
+#endif
+
+			case kHFSPlusFolderRecord:
+			{
+				if ( recordSize != sizeof(HFSPlusCatalogFolder) )
+					return false;
+				if ( catalogRecord->hfsPlusFolder.flags != 0 )
+					return false;
+				if ( catalogRecord->hfsPlusFolder.valence > 0x7FFF )
+					return false;
+					
+				cNodeID = catalogRecord->hfsPlusFolder.folderID;
+	
+				if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) )
+					return false;
+			}
+			break;
+		
+			case kHFSPlusFileRecord:
+			{
+//				u_int16_t					i;
+				const HFSPlusExtentDescriptor	*dataExtent;
+				const HFSPlusExtentDescriptor	*rsrcExtent;
+				
+				if ( recordSize != sizeof(HFSPlusCatalogFile) )
+					return false;								
+				if ( (catalogRecord->hfsPlusFile.flags & ~(0x83)) != 0 )
+					return false;
+					
+				cNodeID = catalogRecord->hfsPlusFile.fileID;
+				
+				if ( cNodeID < 16 )
+					return false;
+		
+				// make sure 0 ¾ LEOF ¾ PEOF for both forks
+		
+				dataExtent = (const HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.dataFork.extents;
+				rsrcExtent = (const HFSPlusExtentDescriptor*) &catalogRecord->hfsPlusFile.resourceFork.extents;
+	
+#if 0
+				for (i = 0; i < kHFSPlusExtentDensity; ++i)
+				{
+					if ( (dataExtent[i].blockCount > 0) && (dataExtent[i].startBlock == 0) )
+						return false;
+					if ( (rsrcExtent[i].blockCount > 0) && (rsrcExtent[i].startBlock == 0) )
+						return false;
+				}
+#endif
+			}
+			break;		
+
+			case kHFSPlusFileThreadRecord:
+			case kHFSPlusFolderThreadRecord:
+			{
+				if ( recordSize > sizeof(HFSPlusCatalogThread) || recordSize < (sizeof(HFSPlusCatalogThread) - sizeof(HFSUniStr255)))
+					return false;
+	
+				cNodeID = catalogRecord->hfsPlusThread.parentID;
+				if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) )
+					return false;
+							
+				if ( (catalogRecord->hfsPlusThread.nodeName.length == 0) ||
+					 (catalogRecord->hfsPlusThread.nodeName.length > 255) )
+					return false;
+			}
+			break;
+
+			default:
+				return false;
+		}
+	}
+	
+	return true;	// record appears to be OK
+}
+
+#endif // DEBUG
diff --git a/core/BTreesInternal.h b/core/BTreesInternal.h
new file mode 100644
index 0000000..11a7842
--- /dev/null
+++ b/core/BTreesInternal.h
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTreesInternal.h
+
+	Contains:	IPI to File Manager B-tree
+
+	Version:	HFS Plus 1.0
+
+	Copyright:	(c) 1996-1998 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(msd)	Mark Day
+		(DSH)	Deric Horn
+		(djb)	Don Brady
+
+	Change History (most recent first):
+	
+	  <RHAP>	 9/22/99	ser		Added prototypes for BTGetLastSync and BTSetLastSync
+	  <RHAP>	 6/22/98	djb		Add ERR_BASE to btree error codes to make them negative (for MacOS X only).
+
+	   <CS7>	 7/28/97	msd		Add enum for fsBTTimeOutErr.
+	   <CS6>	 7/25/97	DSH		Added heuristicHint as parameter to BTSearchRecord.
+	   <CS5>	 7/24/97	djb		Add blockReadFromDisk flag to BlockDescriptor. Callbacks now use
+									a file refNum instead of an FCB.
+	   <CS4>	 7/16/97	DSH		FilesInternal.i renamed FileMgrInternal.i to avoid name
+									collision
+	   <CS3>	  6/2/97	DSH		Added SetEndOfForkProc() prototype, so Attributes.c can call it
+									directly.
+	   <CS2>	 5/19/97	djb		kMaxKeyLength is now 520.
+	   <CS1>	 4/28/97	djb		first checked in
+
+	  <HFS6>	 3/17/97	DSH		Remove Key Comparison prototype, already in FilesInternal.h.
+	  <HFS5>	 2/19/97	djb		Add SetBlockSizeProcPtr. Add blockSize field to BlockDescriptor.
+									Remove E_ type error enums.
+	  <HFS4>	 1/27/97	djb		Include Types.h and FilesInternal.h.
+	  <HFS3>	 1/13/97	djb		Added kBTreeCurrentRecord for BTIterateRecord.
+	  <HFS2>	  1/3/97	djb		Added support for large keys.
+	  <HFS1>	12/19/96	djb		first checked in
+
+*/
+
+#ifndef	__BTREESINTERNAL__
+#define __BTREESINTERNAL__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#ifndef __FILEMGRINTERNAL__
+#include "FileMgrInternal.h"
+#endif
+
+enum {
+	fsBTInvalidHeaderErr			= btBadHdr,
+	fsBTBadRotateErr				= dsBadRotate,
+	fsBTInvalidNodeErr				= btBadNode,
+	fsBTRecordTooLargeErr			= btNoFit,
+	fsBTRecordNotFoundErr			= btNotFound,
+	fsBTDuplicateRecordErr			= btExists,
+	fsBTFullErr						= btNoSpaceAvail,
+
+	fsBTInvalidFileErr				= ERR_BASE + 0x0302,	/* no BTreeCB has been allocated for fork*/
+	fsBTrFileAlreadyOpenErr			= ERR_BASE + 0x0303,
+	fsBTInvalidIteratorErr			= ERR_BASE + 0x0308,
+	fsBTEmptyErr					= ERR_BASE + 0x030A,
+	fsBTNoMoreMapNodesErr			= ERR_BASE + 0x030B,
+	fsBTBadNodeSize					= ERR_BASE + 0x030C,
+	fsBTBadNodeType					= ERR_BASE + 0x030D,
+	fsBTInvalidKeyLengthErr			= ERR_BASE + 0x030E,
+	fsBTStartOfIterationErr			= ERR_BASE + 0x0353,
+	fsBTEndOfIterationErr			= ERR_BASE + 0x0354,
+	fsBTUnknownVersionErr			= ERR_BASE + 0x0355,
+	fsBTTreeTooDeepErr				= ERR_BASE + 0x0357,
+	fsIteratorExitedScopeErr		= ERR_BASE + 0x0A02,	/* iterator exited the scope*/
+	fsIteratorScopeExceptionErr		= ERR_BASE + 0x0A03,	/* iterator is undefined due to error or movement of scope locality*/
+	fsUnknownIteratorMovementErr	= ERR_BASE + 0x0A04,	/* iterator movement is not defined*/
+	fsInvalidIterationMovmentErr	= ERR_BASE + 0x0A05,	/* iterator movement is invalid in current context*/
+	fsClientIDMismatchErr			= ERR_BASE + 0x0A06,	/* wrong client process ID*/
+	fsEndOfIterationErr				= ERR_BASE + 0x0A07,	/* there were no objects left to return on iteration*/
+	fsBTTimeOutErr					= ERR_BASE + 0x0A08		/* BTree scan interrupted -- no time left for physical I/O */
+};
+
+struct BlockDescriptor{
+	void		*buffer;
+	void		*blockHeader;
+	daddr64_t	 blockNum;	/* logical block number (used by hfs_swap_BTNode) */
+	ByteCount	 blockSize;
+	Boolean		 blockReadFromDisk;
+	Byte         isModified;             // XXXdbg - for journaling
+	Byte		 reserved[2];
+};
+typedef struct BlockDescriptor BlockDescriptor;
+typedef BlockDescriptor *BlockDescPtr;
+
+
+struct FSBufferDescriptor {
+	void *		bufferAddress;
+	ByteCount	itemSize;
+	ItemCount	itemCount;
+};
+typedef struct FSBufferDescriptor FSBufferDescriptor;
+
+typedef FSBufferDescriptor *FSBufferDescriptorPtr;
+
+
+/*
+	Fork Level Access Method Block get options
+*/
+enum {
+		kGetBlock			= 0x00000000,
+		kGetBlockHint		= 0x00000001,	// if set, the block is being looked up using hint
+		kForceReadBlock		= 0x00000002,	// how does this relate to Read/Verify? Do we need this?
+		kGetEmptyBlock		= 0x00000008
+};
+typedef u_int32_t	GetBlockOptions;
+
+/*
+	Fork Level Access Method Block release options
+*/
+enum {
+		kReleaseBlock		= 0x00000000,
+		kForceWriteBlock	= 0x00000001,
+		kMarkBlockDirty		= 0x00000002,
+		kTrashBlock			= 0x00000004,
+		kLockTransaction    = 0x00000100
+};
+typedef u_int32_t	ReleaseBlockOptions;
+
+typedef	u_int64_t	FSSize;
+typedef	u_int32_t	ForkBlockNumber;
+
+/*============================================================================
+	Fork Level Buffered I/O Access Method
+============================================================================*/
+
+typedef	OSStatus	(* GetBlockProcPtr)		(FileReference				 fileRefNum,
+											 u_int32_t					 blockNum,
+											 GetBlockOptions			 options,
+											 BlockDescriptor			*block );
+							 
+
+typedef	OSStatus	(* ReleaseBlockProcPtr)	(FileReference				 fileRefNum,
+											 BlockDescPtr				 blockPtr,
+											 ReleaseBlockOptions		 options );
+
+typedef	OSStatus	(* SetEndOfForkProcPtr)	(FileReference				 fileRefNum,
+											 FSSize						 minEOF,
+											 FSSize						 maxEOF );
+								 
+typedef	OSStatus	(* SetBlockSizeProcPtr)	(FileReference				 fileRefNum,
+											 ByteCount					 blockSize,
+											 ItemCount					 minBlockCount );
+
+OSStatus		SetEndOfForkProc ( FileReference fileRefNum, FSSize minEOF, FSSize maxEOF );
+
+
+/*
+	B*Tree Information Version
+*/
+
+enum BTreeInformationVersion{
+	kBTreeInfoVersion	= 0
+};
+
+/*
+	B*Tree Iteration Operation Constants
+*/
+
+enum BTreeIterationOperations{
+	kBTreeFirstRecord,
+	kBTreeNextRecord,
+	kBTreePrevRecord,
+	kBTreeLastRecord,
+	kBTreeCurrentRecord
+};
+typedef u_int16_t BTreeIterationOperation;
+
+
+/*
+	Btree types: 0 is HFS CAT/EXT file, 1~127 are AppleShare B*Tree files, 128~254 unused
+	hfsBtreeType	EQU		0			; control file
+	validBTType		EQU		$80			; user btree type starts from 128
+	userBT1Type		EQU		$FF			; 255 is our Btree type. Used by BTInit and BTPatch
+*/
+
+enum BTreeTypes{
+	kHFSBTreeType			=   0,		// control file
+	kUserBTreeType			= 128,		// user btree type starts from 128
+	kReservedBTreeType		= 255		//
+};
+
+#define	kBTreeHeaderUserBytes	128
+
+
+typedef BTreeKey *BTreeKeyPtr;
+
+
+/*
+	BTreeInfoRec Structure - for BTGetInformation
+*/
+struct BTreeInfoRec{
+	u_int16_t			version;
+	u_int16_t			nodeSize;
+	u_int16_t			maxKeyLength;
+	u_int16_t			treeDepth;
+	u_int32_t			lastfsync;		/* Last time that this was fsynced  */
+	ItemCount			numRecords;
+	ItemCount			numNodes;
+	ItemCount			numFreeNodes;
+	u_int8_t			keyCompareType;
+	u_int8_t			reserved[3];
+};
+typedef struct BTreeInfoRec BTreeInfoRec;
+typedef BTreeInfoRec *BTreeInfoPtr;
+
+/*
+	BTreeHint can never be exported to the outside. Use u_int32_t BTreeHint[4],
+	u_int8_t BTreeHint[16], etc.
+ */
+struct BTreeHint{
+	ItemCount				writeCount;
+	u_int32_t				nodeNum;			// node the key was last seen in
+	u_int16_t				index;				// index then key was last seen at
+	u_int16_t				reserved1;
+	u_int32_t				reserved2;
+};
+typedef struct BTreeHint BTreeHint;
+typedef BTreeHint *BTreeHintPtr;
+
+/*
+	BTree Iterator
+*/
+struct BTreeIterator{
+	BTreeHint				hint;
+	u_int16_t				version;
+	u_int16_t				reserved;
+	u_int32_t				hitCount;			// Total number of leaf records hit
+	u_int32_t				maxLeafRecs;		// Max leaf records over iteration
+	BTreeKey				key;
+};
+typedef struct BTreeIterator BTreeIterator;
+typedef BTreeIterator *BTreeIteratorPtr;
+
+
+/*============================================================================
+	B*Tree SPI
+============================================================================*/
+
+/*
+	Key Comparison Function ProcPtr Type - for BTOpenPath
+*/
+//typedef int32_t 				(* KeyCompareProcPtr)(BTreeKeyPtr a, BTreeKeyPtr b);
+
+
+typedef int32_t (* IterateCallBackProcPtr)(BTreeKeyPtr key, void * record, void * state);
+
+
+extern OSStatus	BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc);
+
+extern OSStatus	BTClosePath			(FCB		 				*filePtr );
+
+
+extern OSStatus	BTSearchRecord		(FCB		 				*filePtr,
+									 BTreeIterator				*searchIterator,
+									 FSBufferDescriptor			*btRecord,
+									 u_int16_t					*recordLen,
+									 BTreeIterator				*resultIterator );
+
+extern OSStatus	BTIterateRecord		(FCB		 				*filePtr,
+									 BTreeIterationOperation	 operation,
+									 BTreeIterator				*iterator,
+									 FSBufferDescriptor			*btRecord,
+									 u_int16_t					*recordLen );
+
+
+extern OSStatus BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator *iterator,
+		 IterateCallBackProcPtr	 callBackProc, void * callBackState);
+
+extern OSStatus	BTInsertRecord		(FCB		 				*filePtr,
+									 BTreeIterator				*iterator,
+									 FSBufferDescriptor			*btrecord,
+									 u_int16_t					 recordLen );
+
+extern OSStatus	BTReplaceRecord		(FCB		 				*filePtr,
+									 BTreeIterator				*iterator,
+									 FSBufferDescriptor			*btRecord,
+									 u_int16_t					 recordLen );
+
+extern OSStatus	BTUpdateRecord		(FCB		 				*filePtr,
+									 BTreeIterator				*iterator,
+									 IterateCallBackProcPtr		 callBackProc,
+									 void						*callBackState );
+
+extern OSStatus	BTDeleteRecord		(FCB		 				*filePtr,
+									 BTreeIterator				*iterator );
+
+extern OSStatus	BTGetInformation	(FCB		 				*filePtr,
+									 u_int16_t					 vers,
+									 BTreeInfoRec				*info );
+
+extern OSStatus BTIsDirty(FCB *filePtr);
+
+extern OSStatus	BTFlushPath			(FCB		 				*filePtr );
+
+extern OSStatus BTReloadData		(FCB *filePtr);
+
+extern OSStatus	BTInvalidateHint	(BTreeIterator				*iterator );
+
+extern OSStatus	BTGetLastSync		(FCB		 				*filePtr,
+									 u_int32_t					*lastfsync );
+
+extern OSStatus	BTSetLastSync		(FCB		 				*filePtr,
+									 u_int32_t					lastfsync );
+
+extern OSStatus	BTHasContiguousNodes(FCB		 				*filePtr);
+
+extern OSStatus BTGetUserData(FCB *filePtr, void * dataPtr, int dataSize);
+
+extern OSStatus BTSetUserData(FCB *filePtr, void * dataPtr, int dataSize);
+
+/* B-tree node reserve routines. */
+extern void BTReserveSetup(void);
+
+extern int  BTReserveSpace(FCB *file, int operations, void * data);
+
+extern int  BTReleaseReserve(FCB *file, void * data);
+
+extern int  BTZeroUnusedNodes(FCB *file);
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif // __BTREESINTERNAL__
diff --git a/core/BTreesPrivate.h b/core/BTreesPrivate.h
new file mode 100644
index 0000000..260de38
--- /dev/null
+++ b/core/BTreesPrivate.h
@@ -0,0 +1,516 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		BTreesPrivate.h
+
+	Contains:	Private interface file for the BTree Module.
+
+	Version:	xxx put the technology version here xxx
+
+	Written by:	Gordon Sheridan and Bill Bruffey
+
+	Copyright:	(c) 1992-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		Mark Day
+
+		Technology:			File Systems
+
+	Writers:
+
+		(msd)	Mark Day
+		(DSH)	Deric Horn
+		(djb)	Don Brady
+		(ser)	Scott Roberts
+		(dkh)	Dave Heller
+
+	Change History (most recent first):
+	   <MacOSX>	 3/19/99	djb		Disable MoveRecordsLeft/Right macros since bcopy is broken.
+	
+	   <MacOSX>	 8/10/98	djb		Removed unused BTreeIterator from BTreeControlBlock, fixed alignment.
+
+	   <CS5>	  9/4/97	djb		Convert MoveRecordsLeft and GetLeftSiblingNode to macros.
+	   <CS4>	 7/24/97	djb		Add macro for GetRecordAddress (was a function before).
+	   <CS3>	 7/21/97	msd		GetRecordByIndex now returns an OSStatus.
+	   <CS2>	 7/16/97	DSH		FilesInternal.i renamed FileMgrInternal.i to avoid name
+									collision
+	   <CS1>	 4/23/97	djb		first checked in
+
+	  <HFS6>	 3/17/97	DSH		Added a refCon field to BTreeControlBlock, for DFA use, to point
+									to additional data.  Fixed Panic macros for use with SC.
+	  <HFS5>	 2/19/97	djb		Add InsertKey struct. Moved on-disk definitions to
+									HFSBTreesPriv.h
+	  <HFS4>	 1/27/97	djb		InsertTree and DeleteTree are now recursive and support variable
+									sized index keys.
+	  <HFS3>	 1/15/97	djb		Move GetFileRefNumFromFCB macro to FilesInternal.h. Added
+									kBTVariableIndexKeysMask.
+	  <HFS2>	  1/3/97	djb		Added support for large keys.
+	  <HFS1>	12/19/96	djb		first checked in
+
+	History applicable to original Scarecrow Design:
+
+		 <7>	10/25/96	ser		Changing for new VFPI
+		 <6>	10/18/96	ser		Converting over VFPI changes
+		 <5>	 9/17/96	dkh		More BTree statistics
+		 <4>	 9/16/96	dkh		Revised BTree statistics
+		 <3>	 6/20/96	dkh		Radar #1358740. Switch from using Pools to debug MemAllocators.
+		 <2>	 12/7/95	dkh		D10E2 build. Changed usage of Ref data type to LogicalAddress.
+		 <1>	10/18/95	rst		Moved from Scarecrow project.
+
+		<19>	11/22/94	djb		Add prototype for GetMapNode
+		<18>	11/16/94	prp		Add IsItAHint routine prototype.
+		<17>	 9/30/94	prp		Get in sync with D2 interface changes.
+		<16>	 7/25/94	wjk		Eliminate usage of BytePtr in favor of UInt8 *.
+		<15>	 7/22/94	wjk		Convert to the new set of header files.
+		<14>	 5/31/94	srs		Moved Btree types to public interface
+		<13>	 12/9/93	wjk		Add 68k alignment pragma's around persistent structures.
+		<12>	11/30/93	wjk		Move from Makefiles to BuildFiles. Fit into the ModernOS and
+									NRCmds environments.
+		<11>	11/23/93	wjk		Changes required to compile on the RS6000.
+		<10>	 8/30/93	CH		Removed the M_ExitOnError and M_ReturnErrorIf macros which were
+									already defined in FileSystemPriv.h (included here).
+		 <9>	 8/30/93	CH		Added parens around the M_ReturnErrorIf macro.
+		 <8>	 5/21/93	gs		Add kBadClose flag. Add some prototypes for internal routines.
+		 <7>	 5/10/93	gs		Change Ptr to BytePtr. Move BTreeTypes to BTree.h. Add
+									DeleteTree prototype.
+		 <6>	 3/23/93	gs		Remove mysterious "flags" field from HeaderRec structure. Move
+									prototypes of private functions to top of respective source
+									files.
+		 <5>	  2/8/93	gs		Update to use FSAgent.h Get/Release/SetEOF/SetBlockSize
+									procPtrs. Add UpdateNode routine.
+		 <4>	12/10/92	gs		Add Key Descriptor function declarations.
+		 <3>	 12/8/92	gs		Add HeaderRec structure and incorporate review feedback.
+		 <2>	 12/2/92	gs		Add GetNode and ReleaseNode callback procptrs to BTree CB, and
+									add internal function declarations.
+		 <1>	11/15/92	gs		first checked in
+
+*/
+
+#ifndef	__BTREESPRIVATE__
+#define __BTREESPRIVATE__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#include "hfs_macos_defs.h"
+
+#ifndef __FILEMGRINTERNAL__
+#include "FileMgrInternal.h"
+#endif
+
+#ifndef __BTREESINTERNAL__
+#include "BTreesInternal.h"
+#endif
+
+
+/////////////////////////////////// Constants ///////////////////////////////////
+
+#define		kBTreeVersion		  1
+#define		kMaxTreeDepth		 16
+
+
+#define		kHeaderNodeNum		  0
+#define		kKeyDescRecord		  1
+
+
+// Header Node Record Offsets
+enum {
+	kHeaderRecOffset	=	0x000E,
+	kKeyDescRecOffset	=	0x0078,
+	kHeaderMapRecOffset	=	0x00F8
+};
+
+#define		kMinNodeSize		512
+
+#define		kMinRecordSize		  6
+										// where is minimum record size enforced?
+
+// miscellaneous BTree constants
+enum {
+			kOffsetSize				= 2
+};
+
+// Insert Operations
+typedef enum {
+			kInsertRecord			= 0,
+			kReplaceRecord			= 1
+} InsertType;
+
+// illegal string attribute bits set in mask
+#define		kBadStrAttribMask		0xCF
+
+
+
+//////////////////////////////////// Macros /////////////////////////////////////
+
+#define		M_NodesInMap(mapSize)				((mapSize) << 3)
+
+#define		M_ClearBitNum(integer,bitNumber) 	((integer) &= (~(1<<(bitNumber))))
+#define		M_SetBitNum(integer,bitNumber) 		((integer) |= (1<<(bitNumber)))
+#define		M_IsOdd(integer) 					(((integer) & 1) != 0)
+#define		M_IsEven(integer) 					(((integer) & 1) == 0)
+
+#define		M_MapRecordSize(nodeSize)			(nodeSize - sizeof (BTNodeDescriptor) - 6)
+#define		M_HeaderMapRecordSize(nodeSize)		(nodeSize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) - 128 - 8)
+
+#define		M_SWAP_BE16_ClearBitNum(integer,bitNumber)  ((integer) &= SWAP_BE16(~(1<<(bitNumber))))
+#define		M_SWAP_BE16_SetBitNum(integer,bitNumber)    ((integer) |= SWAP_BE16(1<<(bitNumber)))
+
+///////////////////////////////////// Types /////////////////////////////////////
+
+typedef struct BTreeControlBlock {					// fields specific to BTree CBs
+
+	u_int8_t		keyCompareType;   /* Key string Comparison Type */
+	u_int8_t					 btreeType;
+	u_int16_t					 treeDepth;
+	FileReference				 fileRefNum;		// refNum of btree file
+	KeyCompareProcPtr			 keyCompareProc;
+	u_int32_t					 rootNode;
+	u_int32_t					 leafRecords;
+	u_int32_t					 firstLeafNode;
+	u_int32_t					 lastLeafNode;
+	u_int16_t					 nodeSize;
+	u_int16_t					 maxKeyLength;
+	u_int32_t					 totalNodes;
+	u_int32_t					 freeNodes;
+
+	u_int16_t					 reserved3;			// 4-byte alignment
+
+	// new fields
+	int16_t						 version;
+	u_int32_t					 flags;				// dynamic flags
+	u_int32_t					 attributes;		// persistent flags
+	u_int32_t					 writeCount;
+	u_int32_t					 lastfsync;		/* Last time that this was fsynced  */
+
+	GetBlockProcPtr			 	 getBlockProc;
+	ReleaseBlockProcPtr			 releaseBlockProc;
+	SetEndOfForkProcPtr			 setEndOfForkProc;
+
+	// statistical information
+	u_int32_t					 numGetNodes;
+	u_int32_t					 numGetNewNodes;
+	u_int32_t					 numReleaseNodes;
+	u_int32_t					 numUpdateNodes;
+	u_int32_t					 numMapNodesRead;	// map nodes beyond header node
+	u_int32_t					 numHintChecks;
+	u_int32_t					 numPossibleHints;	// Looks like a formated hint
+	u_int32_t					 numValidHints;		// Hint used to find correct record.
+	u_int32_t					reservedNodes;
+	BTreeIterator   iterator; // useable when holding exclusive b-tree lock
+
+#if DEBUG
+	void						*madeDirtyBy[2];
+#endif
+} BTreeControlBlock, *BTreeControlBlockPtr;
+
+u_int32_t CalcKeySize(const BTreeControlBlock *btcb, const BTreeKey *key);
+#define CalcKeySize(btcb, key)			( ((btcb)->attributes & kBTBigKeysMask) ? ((key)->length16 + 2) : ((key)->length8 + 1) )
+
+u_int32_t KeyLength(const BTreeControlBlock *btcb, const BTreeKey *key);
+#define KeyLength(btcb, key)			( ((btcb)->attributes & kBTBigKeysMask) ? (key)->length16 : (key)->length8 )
+
+
+
+typedef enum {
+					kBTHeaderDirty	= 0x00000001
+}	BTreeFlags;
+
+static inline void M_BTreeHeaderDirty(BTreeControlBlock *bt) {
+#if DEBUG
+	bt->madeDirtyBy[0] = __builtin_return_address(0);
+	bt->madeDirtyBy[1] = __builtin_return_address(1);
+#endif
+	bt->flags |= kBTHeaderDirty;
+}
+
+typedef	int8_t				*NodeBuffer;
+typedef BlockDescriptor		 NodeRec, *NodePtr;		// remove this someday...
+
+
+
+
+//// Tree Path Table - constructed by SearchTree, used by InsertTree and DeleteTree
+
+typedef struct {
+	u_int32_t				node;				// node number
+	u_int16_t				index;
+	u_int16_t				reserved;			// align size to a power of 2
+} TreePathRecord, *TreePathRecordPtr;
+
+typedef TreePathRecord		TreePathTable [kMaxTreeDepth];
+
+
+//// InsertKey - used by InsertTree, InsertLevel and InsertNode
+
+struct InsertKey {
+	BTreeKeyPtr		keyPtr;
+	u_int8_t *		recPtr;
+	u_int16_t		keyLength;
+	u_int16_t		recSize;
+	Boolean			replacingKey;
+	Boolean			skipRotate;
+};
+
+typedef struct InsertKey InsertKey;
+
+
+//// For Notational Convenience
+
+typedef	BTNodeDescriptor*	 NodeDescPtr;
+typedef u_int8_t			*RecordPtr;
+typedef BTreeKeyPtr			 KeyPtr;
+
+
+//////////////////////////////////// Globals ////////////////////////////////////
+
+
+//////////////////////////////////// Macros /////////////////////////////////////
+
+#if DEBUG
+	#define Panic( message )					DebugStr( message )
+	#define PanicIf( condition, message )		do { if ( (condition) != 0 )	DebugStr( message ); } while(0)
+#else
+	#define Panic( message )				do { } while(0)
+	#define PanicIf( condition, message )	do { } while(0)
+#endif
+
+//	Exit function on error
+#define M_ExitOnError( result )	do { if ( ( result ) != noErr )	goto ErrorExit; } while(0)
+
+//	Test for passed condition and return if true
+#define	M_ReturnErrorIf( condition, error )	do { if ( condition )	return( error ); } while(0)
+
+//////////////////////////////// Key Operations /////////////////////////////////
+
+int32_t		CompareKeys				(BTreeControlBlockPtr	 btreePtr,
+									 KeyPtr					 searchKey,
+									 KeyPtr					 trialKey );
+
+//////////////////////////////// Map Operations /////////////////////////////////
+
+OSStatus	AllocateNode			(BTreeControlBlockPtr	 btreePtr,
+									 u_int32_t				*nodeNum);
+
+OSStatus	FreeNode				(BTreeControlBlockPtr	 btreePtr,
+									 u_int32_t				 nodeNum);
+
+OSStatus	ExtendBTree				(BTreeControlBlockPtr	 btreePtr,
+									 u_int32_t				 nodes );
+
+u_int32_t	CalcMapBits				(BTreeControlBlockPtr	 btreePtr);
+
+
+void 		BTUpdateReserve				(BTreeControlBlockPtr btreePtr,
+                                                         int nodes);
+
+//////////////////////////////// Misc Operations ////////////////////////////////
+
+u_int16_t	CalcKeyRecordSize		(u_int16_t				 keySize,
+									 u_int16_t				 recSize );
+
+OSStatus	VerifyHeader			(FCB					*filePtr,
+									 BTHeaderRec				*header );
+
+OSStatus	UpdateHeader			(BTreeControlBlockPtr	 btreePtr,
+						 Boolean forceWrite );
+
+OSStatus	FindIteratorPosition	(BTreeControlBlockPtr	 btreePtr,
+									 BTreeIteratorPtr		 iterator,
+									 BlockDescriptor		*left,
+									 BlockDescriptor		*middle,
+									 BlockDescriptor		*right,
+									 u_int32_t				*nodeNum,
+									 u_int16_t				*index,
+									 Boolean				*foundRecord );
+
+OSStatus	CheckInsertParams		(FCB					*filePtr,
+									 BTreeIterator			*iterator,
+									 FSBufferDescriptor		*record,
+									 u_int16_t				 recordLen );
+
+OSStatus	TrySimpleReplace		(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 nodePtr,
+									 BTreeIterator			*iterator,
+									 FSBufferDescriptor		*record,
+									 u_int16_t				 recordLen,
+									 Boolean				*recordInserted );
+
+OSStatus	IsItAHint				(BTreeControlBlockPtr 	 btreePtr, 
+									 BTreeIterator 			*iterator, 
+									 Boolean 				*answer );
+
+extern OSStatus TreeIsDirty(BTreeControlBlockPtr btreePtr);
+
+//////////////////////////////// Node Operations ////////////////////////////////
+
+//// Node Operations
+
+OSStatus	GetNode					(BTreeControlBlockPtr	 btreePtr,
+									 u_int32_t				 nodeNum,
+									 u_int32_t 				 flags, 
+									 NodeRec				*returnNodePtr );
+
+/* Flags for GetNode() */
+#define		kGetNodeHint	0x1		/* If set, the node is being looked up using a hint */
+
+OSStatus	GetLeftSiblingNode		(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 node,
+									 NodeRec				*left );
+
+#define		GetLeftSiblingNode(btree,node,left)			GetNode ((btree), ((NodeDescPtr)(node))->bLink, 0, (left))
+
+OSStatus	GetRightSiblingNode		(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 node,
+									 NodeRec				*right );
+
+#define		GetRightSiblingNode(btree,node,right)		GetNode ((btree), ((NodeDescPtr)(node))->fLink, 0, (right))
+
+
+OSStatus	GetNewNode				(BTreeControlBlockPtr	 btreePtr,
+									 u_int32_t				 nodeNum,
+									 NodeRec				*returnNodePtr );
+
+OSStatus	ReleaseNode				(BTreeControlBlockPtr	 btreePtr,
+									 NodePtr				 nodePtr );
+
+OSStatus	TrashNode				(BTreeControlBlockPtr	 btreePtr,
+									 NodePtr				 nodePtr );
+
+OSStatus	UpdateNode				(BTreeControlBlockPtr	 btreePtr,
+									 NodePtr				 nodePtr,
+									 u_int32_t				 transactionID,
+									 u_int32_t				 flags );
+
+//// Node Buffer Operations
+
+void		ClearNode				(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 node );
+
+u_int16_t	GetNodeDataSize			(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 node );
+
+u_int16_t	GetNodeFreeSize			(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 node );
+
+
+//// Record Operations
+
+Boolean		InsertRecord			(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr	 		 node,
+									 u_int16_t	 			 index,
+									 RecordPtr				 recPtr,
+									 u_int16_t				 recSize );
+
+Boolean		InsertKeyRecord			(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr 			 node,
+									 u_int16_t	 			 index,
+									 KeyPtr					 keyPtr,
+									 u_int16_t				 keyLength,
+									 RecordPtr				 recPtr,
+									 u_int16_t				 recSize );
+
+void		DeleteRecord			(BTreeControlBlockPtr	btree,
+									 NodeDescPtr	 		node,
+									 u_int16_t	 			index );
+
+
+Boolean		SearchNode				(BTreeControlBlockPtr	 btree,
+									 NodeDescPtr			 node,
+									 KeyPtr					 searchKey,
+									 u_int16_t				*index );
+
+OSStatus	GetRecordByIndex		(BTreeControlBlockPtr	 btree,
+									 NodeDescPtr			 node,
+									 u_int16_t				 index,
+									 KeyPtr					*keyPtr,
+									 u_int8_t *				*dataPtr,
+									 u_int16_t				*dataSize );
+
+u_int8_t *	GetRecordAddress		(BTreeControlBlockPtr	 btree,
+									 NodeDescPtr			 node,
+									 u_int16_t				 index );
+
+#define GetRecordAddress(btreePtr,node,index)		((u_int8_t *)(node) + (*(short *) ((u_int8_t *)(node) + (btreePtr)->nodeSize - ((index) << 1) - kOffsetSize)))
+
+
+u_int16_t	GetRecordSize			(BTreeControlBlockPtr	 btree,
+									 NodeDescPtr			 node,
+									 u_int16_t				 index );
+
+u_int32_t	GetChildNodeNum			(BTreeControlBlockPtr	 btreePtr,
+									 NodeDescPtr			 nodePtr,
+									 u_int16_t				 index );
+
+void		MoveRecordsLeft			(u_int8_t *				 src,
+									 u_int8_t *				 dst,
+									 u_int16_t				 bytesToMove );
+
+#define		MoveRecordsLeft(src,dst,bytes)			bcopy((src),(dst),(bytes))
+
+void		MoveRecordsRight		(u_int8_t *				 src,
+									 u_int8_t *				 dst,
+									 u_int16_t				 bytesToMove );
+
+#define		MoveRecordsRight(src,dst,bytes)			bcopy((src),(dst),(bytes))
+
+
+//////////////////////////////// Tree Operations ////////////////////////////////
+
+OSStatus	SearchTree				(BTreeControlBlockPtr	 btreePtr,
+									 BTreeKeyPtr			 keyPtr,
+									 TreePathTable			 treePathTable,
+									 u_int32_t				*nodeNum,
+									 BlockDescriptor		*nodePtr,
+									 u_int16_t				*index );
+
+OSStatus	InsertTree				(BTreeControlBlockPtr	 btreePtr,
+									 TreePathTable			 treePathTable,
+									 KeyPtr					 keyPtr,
+									 u_int8_t *				 recPtr,
+									 u_int16_t				 recSize,
+									 BlockDescriptor		*targetNode,
+									 u_int16_t				 index,
+									 u_int16_t				 level,
+									 Boolean				 replacingKey,
+									 u_int32_t				*insertNode );
+
+OSStatus	DeleteTree				(BTreeControlBlockPtr	 btreePtr,
+									 TreePathTable			 treePathTable,
+									 BlockDescriptor		*targetNode,
+									 u_int16_t				 index,
+									 u_int16_t				 level );
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif //__BTREESPRIVATE__
diff --git a/core/CatalogPrivate.h b/core/CatalogPrivate.h
new file mode 100644
index 0000000..72abbfe
--- /dev/null
+++ b/core/CatalogPrivate.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2000-2005, 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		CatalogPrivate.h
+
+	Contains:	Private Catalog Manager interfaces.
+
+	Version:	HFS Plus 1.0
+
+	Copyright:	(c) 1997-1998 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Don Brady
+
+		Other Contact:		xxx put other contact here xxx
+
+		Technology:			xxx put technology here xxx
+
+	Writers:
+
+		(JL)	Jim Luther
+		(msd)	Mark Day
+		(DSH)	Deric Horn
+		(djb)	Don Brady
+
+	Change History (most recent first):
+	  <MacOSX>	 11/10/98	djb		Remove obsolete PrepareInputName prototype;
+	  <MacOSX>	  4/6/98	djb		Added lock data stuctures and ReleaseCatalogIterator prototype;
+	  <MacOSX>	  4/6/98	djb		Removed CatalogDataCache since its no longer used.
+	  <MacOSX>	  4/2/98	djb		InvalidateCatalogNodeCache does nothing under MacOS X.
+	  <MacOSX>	 3/31/98	djb		Sync up with final HFSVolumes.h header file.
+
+	  <CS10>	11/20/97	djb		Radar #2002357. Fixing retry mechanism.
+	   <CS9>	11/17/97	djb		PrepareInputName routine now returns an error.
+	   <CS8>	11/13/97	djb		Radar #1683572. Move CatalogIterator to this file from
+									FileMgrInternal.i. Double size of short unicode name.
+	   <CS7>	10/31/97	JL		#2000184 - Changed prototypes for CreateFileThreadID and
+									ExchangeFiles.
+	   <CS6>	10/17/97	msd		In CatalogCacheGlobals, add room for a single UniStr255 so
+									catalog iterators can step over long Unicode names.
+	   <CS5>	10/17/97	djb		Add ConvertInputNameToUnicode for Catalog Create/Rename.
+	   <CS4>	 10/1/97	djb		Change catalog iterator implementation.
+	   <CS3>	 7/16/97	DSH		FilesInternal.i renamed FileMgrInternal.i to avoid name
+									collision
+	   <CS2>	 6/24/97	djb		Add LocateCatalogNodeByMangledName routine.
+	   <CS1>	 6/24/97	djb		first checked in
+*/
+
+#ifndef	__CATALOGPRIVATE__
+#define __CATALOGPRIVATE__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#include "hfs_format.h"
+
+#include	"FileMgrInternal.h"
+#include	"BTreesInternal.h"
+
+//
+// Private Catalog Manager Routines (for use only by Catalog Manager, CatSearch and FileID Services)
+//
+
+
+extern OSErr	LocateCatalogNodeByKey ( const ExtendedVCB *volume, u_int32_t hint, CatalogKey *keyPtr,
+										 CatalogRecord *dataPtr, u_int32_t *newHint );
+
+extern OSErr	LocateCatalogRecord( const ExtendedVCB *volume, HFSCatalogNodeID folderID, const CatalogName *name,
+									 u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr, u_int32_t *newHint);
+
+extern OSErr	LocateCatalogNodeWithRetry ( const ExtendedVCB *volume, HFSCatalogNodeID folderID, ConstStr31Param pascalName,
+											 CatalogName *unicodeName, u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr,
+											 u_int32_t *newHint );
+extern OSErr	FlushCatalog( ExtendedVCB *volume);
+
+
+extern void		ConvertInputNameToUnicode(ConstStr31Param name, TextEncoding encodingHint,
+										  TextEncoding *actualEncoding, CatalogName *catalogName);
+
+extern	void	BuildCatalogKey( HFSCatalogNodeID parentID, const CatalogName *name, Boolean isHFSPlus,
+								 CatalogKey *key);
+
+extern	OSErr	BuildCatalogKeyUTF8(ExtendedVCB *volume, HFSCatalogNodeID parentID, const unsigned char *name,
+				    u_int32_t length, CatalogKey *key);
+
+extern void		CopyCatalogName( const CatalogName *srcName, CatalogName *dstName, Boolean isHFSPLus);
+
+extern OSErr	ResolveFileID( ExtendedVCB *vcb, HFSCatalogNodeID fileID, HFSCatalogNodeID *parentID, Str31 name );
+
+#if 0
+extern OSErr	CreateFileThreadID( FIDParam *filePB, WDCBRecPtr *wdcbPtr );
+
+extern OSErr	ExchangeFiles( FIDParam *filePB, WDCBRecPtr *wdcbPtr );
+#endif 
+
+extern	void	UpdateCatalogName( ConstStr31Param srcName, Str31 destName );
+
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif //__CATALOGPRIVATE__
diff --git a/core/CatalogUtilities.c b/core/CatalogUtilities.c
new file mode 100644
index 0000000..e56473d
--- /dev/null
+++ b/core/CatalogUtilities.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2000-2002, 2004-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/param.h>
+#include <sys/utfconv.h>
+#include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <libkern/libkern.h>
+
+#include "FileMgrInternal.h"
+#include "BTreesInternal.h"
+#include "CatalogPrivate.h"
+#include "HFSUnicodeWrappers.h"
+#include "BTreesPrivate.h"
+#include <string.h>
+
+//
+//	Routine:	LocateCatalogNodeByKey
+//
+// Function: 	Locates the catalog record for an existing folder or file
+//				CNode and returns the key and data records.
+//
+
+OSErr
+LocateCatalogNodeByKey(const ExtendedVCB *volume, u_int32_t hint, CatalogKey *keyPtr,
+						CatalogRecord *dataPtr, u_int32_t *newHint)
+{
+	OSErr				result;
+	CatalogName 		*nodeName = NULL;
+	HFSCatalogNodeID	threadParentID;
+	u_int16_t tempSize;
+	FSBufferDescriptor	 btRecord;
+	struct BTreeIterator *searchIterator;
+	FCB			*fcb;
+
+	searchIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	fcb = GetFileControlBlock(volume->catalogRefNum);
+
+	btRecord.bufferAddress = dataPtr;
+	btRecord.itemCount = 1;
+	btRecord.itemSize = sizeof(CatalogRecord);
+
+	searchIterator->hint.nodeNum = hint;
+
+	bcopy(keyPtr, &searchIterator->key, sizeof(CatalogKey));
+	
+	result = BTSearchRecord( fcb, searchIterator, &btRecord, &tempSize, searchIterator );
+
+	if (result == noErr)
+	{
+		*newHint = searchIterator->hint.nodeNum;
+
+		BlockMoveData(&searchIterator->key, keyPtr, sizeof(CatalogKey));
+	}
+
+	if (result == btNotFound) {
+		result = cmNotFound;
+	}	
+
+	if (result) {
+		hfs_free(searchIterator, sizeof(*searchIterator));
+		return result;
+	}
+	
+	// if we got a thread record, then go look up real record
+	switch ( dataPtr->recordType )
+	{
+
+#if CONFIG_HFS_STD
+		case kHFSFileThreadRecord:
+		case kHFSFolderThreadRecord:
+			threadParentID = dataPtr->hfsThread.parentID;
+			nodeName = (CatalogName *) &dataPtr->hfsThread.nodeName;
+			break;
+#endif
+
+		case kHFSPlusFileThreadRecord:
+		case kHFSPlusFolderThreadRecord:
+			threadParentID = dataPtr->hfsPlusThread.parentID;
+			nodeName = (CatalogName *) &dataPtr->hfsPlusThread.nodeName;	
+			break;
+
+		default:
+			threadParentID = 0;
+			break;
+	}
+	
+	if ( threadParentID )		// found a thread
+		result = LocateCatalogRecord(volume, threadParentID, nodeName, kNoHint, keyPtr, dataPtr, newHint);
+	
+	hfs_free(searchIterator, sizeof(*searchIterator));
+	return result;
+}
+
+
+
+//*******************************************************************************
+//	Routine:	LocateCatalogRecord
+//
+// Function: 	Locates the catalog record associated with folderID and name
+//
+//*******************************************************************************
+
+OSErr
+LocateCatalogRecord(const ExtendedVCB *volume, HFSCatalogNodeID folderID, const CatalogName *name,
+					__unused u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr, u_int32_t *newHint)
+{
+	OSErr result;
+	uint16_t tempSize;
+	FSBufferDescriptor btRecord;
+	struct BTreeIterator *searchIterator = NULL;
+	FCB *fcb;
+	BTreeControlBlock *btcb;
+
+	searchIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	fcb = GetFileControlBlock(volume->catalogRefNum);
+	btcb = (BTreeControlBlock *)fcb->fcbBTCBPtr;
+	
+	btRecord.bufferAddress = dataPtr;
+	btRecord.itemCount = 1;
+	btRecord.itemSize = sizeof(CatalogRecord);
+
+	BuildCatalogKey(folderID, name, (volume->vcbSigWord == kHFSPlusSigWord), (CatalogKey *)&searchIterator->key);
+
+	result = BTSearchRecord(fcb, searchIterator, &btRecord, &tempSize, searchIterator);
+	if (result == noErr) {
+		*newHint = searchIterator->hint.nodeNum;
+		BlockMoveData(&searchIterator->key, keyPtr, CalcKeySize(btcb, &searchIterator->key));
+	}
+
+	hfs_free(searchIterator, sizeof(*searchIterator));
+	return (result == btNotFound ? cmNotFound : result);
+}
+
+
+
+/*
+ *	Routine:	BuildCatalogKey
+ *
+ *	Function: 	Constructs a catalog key record (ckr) given the parent
+ *				folder ID and CName.  Works for both classic and extended
+ *				HFS volumes.
+ *
+ */
+
+void
+BuildCatalogKey(HFSCatalogNodeID parentID, const CatalogName *cName, Boolean isHFSPlus, CatalogKey *key)
+{
+	if ( isHFSPlus )
+	{
+		key->hfsPlus.keyLength			= kHFSPlusCatalogKeyMinimumLength;	// initial key length (4 + 2)
+		key->hfsPlus.parentID			= parentID;		// set parent ID
+		key->hfsPlus.nodeName.length	= 0;			// null CName length
+		if ( cName != NULL )
+		{
+			CopyCatalogName(cName, (CatalogName *) &key->hfsPlus.nodeName, isHFSPlus);
+			key->hfsPlus.keyLength += sizeof(UniChar) * cName->ustr.length;	// add CName size to key length
+		}
+	}
+#if CONFIG_HFS_STD
+	else
+	{
+		key->hfs.keyLength		= kHFSCatalogKeyMinimumLength;	// initial key length (1 + 4 + 1)
+		key->hfs.reserved		= 0;				// clear unused byte
+		key->hfs.parentID		= parentID;			// set parent ID
+		key->hfs.nodeName[0]	= 0;				// null CName length
+		if ( cName != NULL )
+		{
+			UpdateCatalogName(cName->pstr, key->hfs.nodeName);
+			key->hfs.keyLength += key->hfs.nodeName[0];		// add CName size to key length
+		}
+	}
+#endif
+
+}
+
+OSErr
+BuildCatalogKeyUTF8(ExtendedVCB *volume, HFSCatalogNodeID parentID, const unsigned char *name, u_int32_t nameLength,
+		    CatalogKey *key)
+{
+	OSErr err = 0;
+
+    if ( name == NULL)
+        nameLength = 0;
+    else if (nameLength == kUndefinedStrLen)
+        nameLength = strlen((const char *)name);
+
+	if ( volume->vcbSigWord == kHFSPlusSigWord ) {
+		size_t unicodeBytes = 0;
+
+		key->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength;	// initial key length (4 + 2)
+		key->hfsPlus.parentID = parentID;			// set parent ID
+		key->hfsPlus.nodeName.length = 0;			// null CName length
+		if ( nameLength > 0 ) {
+			err = utf8_decodestr(name, nameLength, key->hfsPlus.nodeName.unicode,
+				&unicodeBytes, sizeof(key->hfsPlus.nodeName.unicode), ':', UTF_DECOMPOSED);
+			key->hfsPlus.nodeName.length = unicodeBytes / sizeof(UniChar);
+			key->hfsPlus.keyLength += unicodeBytes;
+		}
+	}
+#if CONFIG_HFS_STD
+	else {
+		key->hfs.keyLength		= kHFSCatalogKeyMinimumLength;	// initial key length (1 + 4 + 1)
+		key->hfs.reserved		= 0;				// clear unused byte
+		key->hfs.parentID		= parentID;			// set parent ID
+		key->hfs.nodeName[0]	= 0;				// null CName length
+		if ( nameLength > 0 ) {
+			err = utf8_to_hfs(volume, nameLength, name, &key->hfs.nodeName[0]);
+			/*
+			 * Retry with MacRoman in case that's how it was exported.
+			 * When textEncoding != NULL we know that this is a create
+			 * or rename call and can skip the retry (ugly but it works).
+			 */
+			if (err)
+				err = utf8_to_mac_roman(nameLength, name, &key->hfs.nodeName[0]);
+			key->hfs.keyLength += key->hfs.nodeName[0];		// add CName size to key length
+		}
+	}
+#endif
+
+	if (err) {
+		if (err == ENAMETOOLONG)
+			err = bdNamErr;	/* name is too long */
+		else
+			err = paramErr;	/* name has invalid characters */
+	}
+
+	return err;
+}
+
+
+//*******************************************************************************
+//	Routine:	FlushCatalog
+//
+// Function: 	Flushes the catalog for a specified volume.
+//
+//*******************************************************************************
+
+OSErr
+FlushCatalog(ExtendedVCB *volume)
+{
+	FCB *	fcb;
+	OSErr	result;
+	struct hfsmount *hfsmp = VCBTOHFS (volume);
+	
+	fcb = GetFileControlBlock(volume->catalogRefNum);
+	result = BTFlushPath(fcb);
+
+	if (result == noErr)
+	{
+		//--- check if catalog's fcb is dirty...
+		
+		if ( (0) /*fcb->fcbFlags & fcbModifiedMask*/ )
+		{
+			hfs_lock_mount (hfsmp);
+			MarkVCBDirty(volume);	// Mark the VCB dirty
+			volume->vcbLsMod = GetTimeUTC();	// update last modified date
+			hfs_unlock_mount (hfsmp);
+
+		//	result = FlushVolumeControlBlock(volume);
+		}
+	}
+	
+	return result;
+}
+
+
+//ÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑ
+//	Routine:	UpdateCatalogName
+//
+//	Function: 	Updates a CName.
+//
+//ÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑ
+
+void
+UpdateCatalogName(ConstStr31Param srcName, Str31 destName)
+{
+	Size length = srcName[0];
+	
+	if (length > CMMaxCName)
+		length = CMMaxCName;				// truncate to max
+
+	destName[0] = length;					// set length byte
+	
+	BlockMoveData(&srcName[1], &destName[1], length);
+}
+
+//_______________________________________________________________________
+
+void
+CopyCatalogName(const CatalogName *srcName, CatalogName *dstName, Boolean isHFSPlus)
+{
+	u_int32_t	length = 0;
+	
+	if ( srcName == NULL )
+	{
+		if ( dstName != NULL )
+			dstName->ustr.length = 0;	// set length byte to zero (works for both unicode and pascal)		
+		return;
+	}
+	
+	if (isHFSPlus) {
+		length = sizeof(UniChar) * (srcName->ustr.length + 1);
+	}
+#if CONFIG_HFS_STD
+	else {
+		length = sizeof(u_int8_t) + srcName->pstr[0];
+	}
+#endif
+
+	if ( length > 1 )
+		BlockMoveData(srcName, dstName, length);
+	else
+		dstName->ustr.length = 0;	// set length byte to zero (works for both unicode and pascal)		
+}
+
diff --git a/core/FileExtentMapping.c b/core/FileExtentMapping.c
new file mode 100644
index 0000000..1ea93f9
--- /dev/null
+++ b/core/FileExtentMapping.c
@@ -0,0 +1,2249 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+#include "hfs.h"
+#include "hfs_format.h"
+#include "hfs_endian.h"
+
+#include "FileMgrInternal.h"
+#include "BTreesInternal.h"
+
+#include <sys/malloc.h>
+ 
+/*
+============================================================
+Public (Exported) Routines:
+============================================================
+
+	ExtendFileC		Allocate more space to a given file.
+
+	CompareExtentKeys
+					Compare two extents file keys (a search key and a trial
+					key).  Used by the BTree manager when searching for,
+					adding, or deleting keys in the extents file of an HFS
+					volume.
+					
+	CompareExtentKeysPlus
+					Compare two extents file keys (a search key and a trial
+					key).  Used by the BTree manager when searching for,
+					adding, or deleting keys in the extents file of an HFS+
+					volume.
+					
+	MapFileBlockC	Convert (map) an offset within a given file into a
+					physical disk address.
+					
+	TruncateFileC	Truncates the disk space allocated to a file.  The file
+					space is truncated to a specified new physical EOF, rounded
+					up to the next allocation block boundry.  There is an option
+					to truncate to the end of the extent containing the new EOF.
+	
+	FlushExtentFile
+					Flush the extents file for a given volume.
+
+	SearchExtentFile
+					Search the FCB and extents file for an extent record that
+					contains a given file position (in bytes).
+
+
+============================================================
+Internal Routines:
+============================================================
+	FindExtentRecord
+					Search the extents BTree for a particular extent record.
+	SearchExtentRecord
+					Search a given extent record to see if it contains a given
+					file position (in bytes).  Used by SearchExtentFile.
+	ReleaseExtents
+					Deallocate all allocation blocks in all extents of an extent
+					data record.
+	TruncateExtents
+					Deallocate blocks and delete extent records for all allocation
+					blocks beyond a certain point in a file.  The starting point
+					must be the first file allocation block for some extent record
+					for the file.
+	DeallocateFork
+					Deallocate all allocation blocks belonging to a given fork.
+	UpdateExtentRecord
+					If the extent record came from the extents file, write out
+					the updated record; otherwise, copy the updated record into
+					the FCB resident extent record.  If the record has no extents,
+					and was in the extents file, then delete the record instead.
+*/
+
+#if CONFIG_HFS_STD
+static const int64_t kTwoGigabytes = 0x80000000LL;
+#endif
+
+enum
+{
+	kDataForkType			= 0,
+	kResourceForkType		= 0xFF,
+	
+	kPreviousRecord			= -1
+};
+
+
+#if CONFIG_HFS_STD
+static OSErr HFSPlusToHFSExtents(
+	const HFSPlusExtentRecord	oldExtents,
+	HFSExtentRecord				newExtents);
+#endif
+
+static OSErr FindExtentRecord(
+	const ExtendedVCB		*vcb,
+	u_int8_t				forkType,
+	u_int32_t				fileID,
+	u_int32_t				startBlock,
+	Boolean					allowPrevious,
+	HFSPlusExtentKey		*foundKey,
+	HFSPlusExtentRecord		foundData,
+	u_int32_t				*foundHint);
+
+static OSErr DeleteExtentRecord(
+	const ExtendedVCB		*vcb,
+	u_int8_t				forkType,
+	u_int32_t				fileID,
+	u_int32_t				startBlock);
+
+static OSErr CreateExtentRecord(
+	ExtendedVCB		*vcb,
+	HFSPlusExtentKey		*key,
+	HFSPlusExtentRecord		extents,
+	u_int32_t				*hint);
+
+
+static OSErr GetFCBExtentRecord(
+	const FCB				*fcb,
+	HFSPlusExtentRecord		extents);
+
+static OSErr SearchExtentRecord(
+	ExtendedVCB		*vcb,
+	u_int32_t				searchFABN,
+	const HFSPlusExtentRecord	extentData,
+	u_int32_t				extentDataStartFABN,
+	u_int32_t				*foundExtentDataOffset,
+	u_int32_t				*endingFABNPlusOne,
+	Boolean					*noMoreExtents);
+
+static OSErr ReleaseExtents(
+	ExtendedVCB				*vcb,
+	const HFSPlusExtentRecord	extentRecord,
+	u_int32_t				*numReleasedAllocationBlocks,
+	Boolean 				*releasedLastExtent);
+
+static OSErr DeallocateFork(
+	ExtendedVCB 		*vcb,
+	HFSCatalogNodeID	fileID,
+	u_int8_t			forkType,
+	HFSPlusExtentRecord	catalogExtents,
+	Boolean *		recordDeleted);
+
+static OSErr TruncateExtents(
+	ExtendedVCB			*vcb,
+	u_int8_t			forkType,
+	u_int32_t			fileID,
+	u_int32_t			startBlock,
+	Boolean *			recordDeleted);
+
+static OSErr UpdateExtentRecord (
+	ExtendedVCB		*vcb,
+	FCB				*fcb,
+	int				deleted,
+	const HFSPlusExtentKey	*extentFileKey,
+	const HFSPlusExtentRecord	extentData,
+	u_int32_t					extentBTreeHint);
+
+static Boolean ExtentsAreIntegral(
+	const HFSPlusExtentRecord extentRecord,
+	u_int32_t	mask,
+	u_int32_t	*blocksChecked,
+	Boolean		*checkedLastExtent);
+
+//_________________________________________________________________________________
+//
+//	Routine:	FindExtentRecord
+//
+//	Purpose:	Search the extents BTree for an extent record matching the given
+//				FileID, fork, and starting file allocation block number.
+//
+//	Inputs:
+//		vcb				Volume to search
+//		forkType		0 = data fork, -1 = resource fork
+//		fileID			File's FileID (CatalogNodeID)
+//		startBlock		Starting file allocation block number
+//		allowPrevious	If the desired record isn't found and this flag is set,
+//						then see if the previous record belongs to the same fork.
+//						If so, then return it.
+//
+//	Outputs:
+//		foundKey	The key data for the record actually found
+//		foundData	The extent record actually found (NOTE: on an HFS volume, the
+//					fourth entry will be zeroes.
+//		foundHint	The BTree hint to find the node again
+//_________________________________________________________________________________
+static OSErr FindExtentRecord(
+	const ExtendedVCB	*vcb,
+	u_int8_t			forkType,
+	u_int32_t			fileID,
+	u_int32_t			startBlock,
+	Boolean				allowPrevious,
+	HFSPlusExtentKey	*foundKey,
+	HFSPlusExtentRecord	foundData,
+	u_int32_t			*foundHint)
+{
+	FCB *				fcb;
+	struct BTreeIterator *btIterator = NULL;
+	FSBufferDescriptor	btRecord;
+	OSErr				err;
+	u_int16_t			btRecordSize;
+	
+	err = noErr;
+	if (foundHint)
+		*foundHint = 0;
+	fcb = GetFileControlBlock(vcb->extentsRefNum);
+
+	btIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	/* HFS Plus / HFSX */
+	if (vcb->vcbSigWord != kHFSSigWord) {
+		HFSPlusExtentKey *	extentKeyPtr;
+		HFSPlusExtentRecord	extentData;
+
+		extentKeyPtr = (HFSPlusExtentKey*) &btIterator->key;
+		extentKeyPtr->keyLength	 = kHFSPlusExtentKeyMaximumLength;
+		extentKeyPtr->forkType	 = forkType;
+		extentKeyPtr->pad		 = 0;
+		extentKeyPtr->fileID	 = fileID;
+		extentKeyPtr->startBlock = startBlock;
+		
+		btRecord.bufferAddress = &extentData;
+		btRecord.itemSize = sizeof(HFSPlusExtentRecord);
+		btRecord.itemCount = 1;
+
+		err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
+
+		if (err == btNotFound && allowPrevious) {
+			err = BTIterateRecord(fcb, kBTreePrevRecord, btIterator, &btRecord, &btRecordSize);
+
+			//	A previous record may not exist, so just return btNotFound (like we would if
+			//	it was for the wrong file/fork).
+			if (err == (OSErr) fsBTStartOfIterationErr)		//¥¥ fsBTStartOfIterationErr is type unsigned long
+				err = btNotFound;
+
+			if (err == noErr) {
+				//	Found a previous record.  Does it belong to the same fork of the same file?
+				if (extentKeyPtr->fileID != fileID || extentKeyPtr->forkType != forkType)
+					err = btNotFound;
+			}
+		}
+
+		if (err == noErr) {
+			// Copy the found key back for the caller
+			if (foundKey)
+				BlockMoveData(extentKeyPtr, foundKey, sizeof(HFSPlusExtentKey));
+			// Copy the found data back for the caller
+			BlockMoveData(&extentData, foundData, sizeof(HFSPlusExtentRecord));
+		}
+	}
+#if CONFIG_HFS_STD
+	else { 
+		HFSExtentKey *		extentKeyPtr;
+		HFSExtentRecord		extentData;
+
+		extentKeyPtr = (HFSExtentKey*) &btIterator->key;
+		extentKeyPtr->keyLength	= kHFSExtentKeyMaximumLength;
+		extentKeyPtr->forkType = forkType;
+		extentKeyPtr->fileID = fileID;
+		extentKeyPtr->startBlock = startBlock;
+		
+		btRecord.bufferAddress = &extentData;
+		btRecord.itemSize = sizeof(HFSExtentRecord);
+		btRecord.itemCount = 1;
+
+		err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
+
+		if (err == btNotFound && allowPrevious) {
+			err = BTIterateRecord(fcb, kBTreePrevRecord, btIterator, &btRecord, &btRecordSize);
+
+			//	A previous record may not exist, so just return btNotFound (like we would if
+			//	it was for the wrong file/fork).
+			if (err == (OSErr) fsBTStartOfIterationErr)		//¥¥ fsBTStartOfIterationErr is type unsigned long
+				err = btNotFound;
+
+			if (err == noErr) {
+				//	Found a previous record.  Does it belong to the same fork of the same file?
+				if (extentKeyPtr->fileID != fileID || extentKeyPtr->forkType != forkType)
+					err = btNotFound;
+			}
+		}
+
+		if (err == noErr) {
+			u_int16_t	i;
+			
+			// Copy the found key back for the caller
+			if (foundKey) {
+				foundKey->keyLength  = kHFSPlusExtentKeyMaximumLength;
+				foundKey->forkType   = extentKeyPtr->forkType;
+				foundKey->pad        = 0;
+				foundKey->fileID     = extentKeyPtr->fileID;
+				foundKey->startBlock = extentKeyPtr->startBlock;
+			}
+			// Copy the found data back for the caller
+			foundData[0].startBlock = extentData[0].startBlock;
+			foundData[0].blockCount = extentData[0].blockCount;
+			foundData[1].startBlock = extentData[1].startBlock;
+			foundData[1].blockCount = extentData[1].blockCount;
+			foundData[2].startBlock = extentData[2].startBlock;
+			foundData[2].blockCount = extentData[2].blockCount;
+			
+			for (i = 3; i < kHFSPlusExtentDensity; ++i)
+			{
+				foundData[i].startBlock = 0;
+				foundData[i].blockCount = 0;
+			}
+		}
+	}
+#endif
+
+	if (foundHint)
+		*foundHint = btIterator->hint.nodeNum;
+
+	hfs_free(btIterator, sizeof(*btIterator));
+	return err;
+}
+
+
+
+static OSErr CreateExtentRecord(
+	ExtendedVCB	*vcb,
+	HFSPlusExtentKey	*key,
+	HFSPlusExtentRecord	extents,
+	u_int32_t			*hint)
+{
+	struct BTreeIterator *btIterator = NULL;
+	FSBufferDescriptor	btRecord;
+	u_int16_t  btRecordSize;
+	int  lockflags;
+	OSErr  err;
+	
+	err = noErr;
+	*hint = 0;
+
+	btIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	/*
+	 * The lock taken by callers of ExtendFileC is speculative and
+	 * only occurs when the file already has overflow extents. So
+	 * We need to make sure we have the lock here.  The extents
+	 * btree lock can be nested (its recursive) so we always take
+	 * it here.
+	 */
+	lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+	/* HFS+/HFSX */
+	if (vcb->vcbSigWord != kHFSSigWord) {
+		btRecordSize = sizeof(HFSPlusExtentRecord);
+		btRecord.bufferAddress = extents;
+		btRecord.itemSize = btRecordSize;
+		btRecord.itemCount = 1;
+
+		BlockMoveData(key, &btIterator->key, sizeof(HFSPlusExtentKey));
+	}
+#if CONFIG_HFS_STD
+	else {
+		/* HFS Standard */
+		HFSExtentKey *		keyPtr;
+		HFSExtentRecord		data;
+		
+		btRecordSize = sizeof(HFSExtentRecord);
+		btRecord.bufferAddress = &data;
+		btRecord.itemSize = btRecordSize;
+		btRecord.itemCount = 1;
+
+		keyPtr = (HFSExtentKey*) &btIterator->key;
+		keyPtr->keyLength	= kHFSExtentKeyMaximumLength;
+		keyPtr->forkType	= key->forkType;
+		keyPtr->fileID		= key->fileID;
+		keyPtr->startBlock	= key->startBlock;
+		
+		err = HFSPlusToHFSExtents(extents, data);
+	}
+#endif
+
+	if (err == noErr)
+		err = BTInsertRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator, &btRecord, btRecordSize);
+
+	if (err == noErr)
+		*hint = btIterator->hint.nodeNum;
+
+	(void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum));
+	
+	hfs_systemfile_unlock(vcb, lockflags);
+
+	hfs_free(btIterator, sizeof(*btIterator));
+	return err;
+}
+
+
+static OSErr DeleteExtentRecord(
+	const ExtendedVCB	*vcb,
+	u_int8_t			forkType,
+	u_int32_t			fileID,
+	u_int32_t			startBlock)
+{
+	struct BTreeIterator *btIterator = NULL;
+	OSErr				err;
+	
+	err = noErr;
+
+	btIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	/* HFS+ / HFSX */
+	if (vcb->vcbSigWord != kHFSSigWord) {		//	HFS Plus volume
+		HFSPlusExtentKey *	keyPtr;
+
+		keyPtr = (HFSPlusExtentKey*) &btIterator->key;
+		keyPtr->keyLength	= kHFSPlusExtentKeyMaximumLength;
+		keyPtr->forkType	= forkType;
+		keyPtr->pad			= 0;
+		keyPtr->fileID		= fileID;
+		keyPtr->startBlock	= startBlock;
+	}
+#if CONFIG_HFS_STD
+	else {
+		/* HFS standard */
+		HFSExtentKey *	keyPtr;
+
+		keyPtr = (HFSExtentKey*) &btIterator->key;
+		keyPtr->keyLength	= kHFSExtentKeyMaximumLength;
+		keyPtr->forkType	= forkType;
+		keyPtr->fileID		= fileID;
+		keyPtr->startBlock	= startBlock;
+	}
+#endif
+
+	err = BTDeleteRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator);
+	(void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum));
+	
+
+	hfs_free(btIterator, sizeof(*btIterator));
+	return err;
+}
+
+
+
+//_________________________________________________________________________________
+//
+// Routine:		MapFileBlock
+//
+// Function: 	Maps a file position into a physical disk address.
+//
+//_________________________________________________________________________________
+
+OSErr MapFileBlockC (
+	ExtendedVCB		*vcb,				// volume that file resides on
+	FCB				*fcb,				// FCB of file
+	size_t			numberOfBytes,		// number of contiguous bytes desired
+	off_t			offset,				// starting offset within file (in bytes)
+	daddr64_t		*startSector,		// first sector (NOT an allocation block)
+	size_t			*availableBytes)	// number of contiguous bytes (up to numberOfBytes)
+{
+	OSErr				err;
+	u_int32_t			allocBlockSize;			//	Size of the volume's allocation block
+	u_int32_t			sectorSize;
+	HFSPlusExtentKey	foundKey;
+	HFSPlusExtentRecord	foundData;
+	u_int32_t			foundIndex;
+	u_int32_t			hint;
+	u_int32_t			firstFABN = 0;			// file allocation block of first block in found extent
+	u_int32_t			nextFABN;				// file allocation block of block after end of found extent
+	off_t				dataEnd;				// (offset) end of range that is contiguous
+	u_int32_t			sectorsPerBlock;		// Number of sectors per allocation block
+	u_int32_t			startBlock = 0;			// volume allocation block corresponding to firstFABN
+	daddr64_t			temp;
+	off_t				tmpOff;
+
+	allocBlockSize = vcb->blockSize;
+	sectorSize = VCBTOHFS(vcb)->hfs_logical_block_size;
+
+	err = SearchExtentFile(vcb, fcb, offset, &foundKey, foundData, &foundIndex, &hint, &nextFABN);
+	if (err == noErr) {
+		startBlock = foundData[foundIndex].startBlock;
+		firstFABN = nextFABN - foundData[foundIndex].blockCount;
+	}
+	
+	if (err != noErr)
+	{
+		return err;
+	}
+
+	//
+	//	Determine the end of the available space.  It will either be the end of the extent,
+	//	or the file's PEOF, whichever is smaller.
+	//
+	dataEnd = (off_t)((off_t)(nextFABN) * (off_t)(allocBlockSize));   // Assume valid data through end of this extent
+	if (((off_t)fcb->ff_blocks * (off_t)allocBlockSize) < dataEnd)    // Is PEOF shorter?
+		dataEnd = (off_t)fcb->ff_blocks * (off_t)allocBlockSize;  // Yes, so only map up to PEOF
+	
+	//	Compute the number of sectors in an allocation block
+	sectorsPerBlock = allocBlockSize / sectorSize;	// sectors per allocation block
+	
+	//
+	//	Compute the absolute sector number that contains the offset of the given file
+	//	offset in sectors from start of the extent +
+	//      offset in sectors from start of allocation block space
+	//
+	temp = (daddr64_t)((offset - (off_t)((off_t)(firstFABN) * (off_t)(allocBlockSize)))/sectorSize);
+	temp += (daddr64_t)startBlock * (daddr64_t)sectorsPerBlock;
+
+	/* Add in any volume offsets */
+	if (vcb->vcbSigWord == kHFSPlusSigWord)
+		temp += vcb->hfsPlusIOPosOffset / sectorSize;
+	else
+		temp += vcb->vcbAlBlSt;
+	
+	//	Return the desired sector for file position "offset"
+	*startSector = temp;
+	
+	//
+	//	Determine the number of contiguous bytes until the end of the extent
+	//	(or the amount they asked for, whichever comes first).
+	//
+	if (availableBytes)
+	{
+		tmpOff = dataEnd - offset;
+		/*
+		 * Disallow negative runs.
+		 */
+		if (tmpOff <= 0) {
+			/* This shouldn't happen unless something is corrupt */
+			hfs_corruption_debug("MapFileBlockC: tmpOff <= 0 (%lld)\n", tmpOff);
+			return EINVAL;
+		}
+
+		if (tmpOff > (off_t)(numberOfBytes)) {
+			*availableBytes = numberOfBytes;  // more there than they asked for, so pin the output
+		}
+		else {
+			*availableBytes = tmpOff;
+		}
+	}
+
+	return noErr;
+}
+
+
+//
+//	Routine:	ReleaseExtents
+//
+//	Function: 	Release the extents of a single extent data record.
+//
+
+static OSErr ReleaseExtents(
+	ExtendedVCB 			*vcb,
+	const HFSPlusExtentRecord	extentRecord,
+	u_int32_t				*numReleasedAllocationBlocks,
+	Boolean 				*releasedLastExtent)
+{
+	u_int32_t	extentIndex;
+	u_int32_t	numberOfExtents;
+	OSErr	err = noErr;
+	
+	*numReleasedAllocationBlocks = 0;
+	*releasedLastExtent = false;
+	
+	if (vcb->vcbSigWord == kHFSPlusSigWord)
+		numberOfExtents = kHFSPlusExtentDensity;
+	else
+		numberOfExtents = kHFSExtentDensity;
+
+	for( extentIndex = 0; extentIndex < numberOfExtents; extentIndex++)
+	{
+		u_int32_t	numAllocationBlocks;
+		
+		// Loop over the extent record and release the blocks associated with each extent.
+		
+		numAllocationBlocks = extentRecord[extentIndex].blockCount;
+		if ( numAllocationBlocks == 0 )
+		{
+			*releasedLastExtent = true;
+			break;
+		}
+
+		err = BlockDeallocate( vcb, extentRecord[extentIndex].startBlock, numAllocationBlocks , 0);
+		if ( err != noErr )
+			break;
+					
+		*numReleasedAllocationBlocks += numAllocationBlocks;		//	bump FABN to beg of next extent
+	}
+
+	return( err );
+}
+
+
+
+//
+//	Routine:	TruncateExtents
+//
+//	Purpose:	Delete extent records whose starting file allocation block number
+//				is greater than or equal to a given starting block number.  The
+//				allocation blocks represented by the extents are deallocated.
+//
+//	Inputs:
+//		vcb			Volume to operate on
+//		fileID		Which file to operate on
+//		startBlock	Starting file allocation block number for first extent
+//					record to delete.
+//
+
+static OSErr TruncateExtents(
+	ExtendedVCB		*vcb,
+	u_int8_t		forkType,
+	u_int32_t		fileID,
+	u_int32_t		startBlock,
+	Boolean *		recordDeleted)
+{
+	OSErr				err;
+	u_int32_t			numberExtentsReleased;
+	Boolean				releasedLastExtent;
+	u_int32_t			hint;
+	HFSPlusExtentKey	key;
+	HFSPlusExtentRecord	extents;
+	int  lockflags;
+
+	/*
+	 * The lock taken by callers of TruncateFileC is speculative and
+	 * only occurs when the file already has overflow extents. So
+	 * We need to make sure we have the lock here.  The extents
+	 * btree lock can be nested (its recursive) so we always take
+	 * it here.
+	 */
+	lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+	while (true) {
+		err = FindExtentRecord(vcb, forkType, fileID, startBlock, false, &key, extents, &hint);
+		if (err != noErr) {
+			if (err == btNotFound)
+				err = noErr;
+			break;
+		}
+		
+		err = ReleaseExtents( vcb, extents, &numberExtentsReleased, &releasedLastExtent );
+		if (err != noErr) break;
+		
+		err = DeleteExtentRecord(vcb, forkType, fileID, startBlock);
+		if (err != noErr) break;
+
+		*recordDeleted = true;
+		startBlock += numberExtentsReleased;
+	}
+	hfs_systemfile_unlock(vcb, lockflags);
+	
+	return err;
+}
+
+
+
+//
+//	Routine:	DeallocateFork
+//
+//	Function: 	De-allocates all disk space allocated to a specified fork.
+//
+
+static OSErr DeallocateFork(
+	ExtendedVCB 		*vcb,
+	HFSCatalogNodeID	fileID,
+	u_int8_t			forkType,
+	HFSPlusExtentRecord	catalogExtents,
+	Boolean *		recordDeleted) /* true if a record was deleted */
+{
+	OSErr				err;
+	u_int32_t			numReleasedAllocationBlocks;
+	Boolean				releasedLastExtent;
+	
+	//	Release the catalog extents
+	err = ReleaseExtents( vcb, catalogExtents, &numReleasedAllocationBlocks, &releasedLastExtent );
+	// Release the extra extents, if present
+	if (err == noErr && !releasedLastExtent)
+		err = TruncateExtents(vcb, forkType, fileID, numReleasedAllocationBlocks, recordDeleted);
+
+	return( err );
+}
+
+//
+//	Routine:	FlushExtentFile
+//
+//	Function: 	Flushes the extent file for a specified volume
+//
+
+OSErr FlushExtentFile( ExtendedVCB *vcb )
+{
+	FCB *	fcb;
+	OSErr	err;
+	int  lockflags;
+	
+	fcb = GetFileControlBlock(vcb->extentsRefNum);
+
+	lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+	err = BTFlushPath(fcb);
+	hfs_systemfile_unlock(vcb, lockflags);
+
+	if ( err == noErr )
+	{
+		// If the FCB for the extent "file" is dirty, mark the VCB as dirty.
+		
+        if (FTOC(fcb)->c_flag & C_MODIFIED)
+		{
+			MarkVCBDirty( vcb );
+		//	err = FlushVolumeControlBlock( vcb );
+		}
+	}
+	
+	return( err );
+}
+
+
+#if CONFIG_HFS_STD
+//
+//	Routine:	CompareExtentKeys
+//
+//	Function: 	Compares two extent file keys (a search key and a trial key) for
+//				an HFS volume.
+//
+
+int32_t CompareExtentKeys( const HFSExtentKey *searchKey, const HFSExtentKey *trialKey )
+{
+	int32_t	result;		//	± 1
+	
+	#if DEBUG
+		if (searchKey->keyLength != kHFSExtentKeyMaximumLength)
+			DebugStr("HFS: search Key is wrong length");
+		if (trialKey->keyLength != kHFSExtentKeyMaximumLength)
+			DebugStr("HFS: trial Key is wrong length");
+	#endif
+	
+	result = -1;		//	assume searchKey < trialKey
+	
+	if (searchKey->fileID == trialKey->fileID) {
+		//
+		//	FileNum's are equal; compare fork types
+		//
+		if (searchKey->forkType == trialKey->forkType) {
+			//
+			//	Fork types are equal; compare allocation block number
+			//
+			if (searchKey->startBlock == trialKey->startBlock) {
+				//
+				//	Everything is equal
+				//
+				result = 0;
+			}
+			else {
+				//
+				//	Allocation block numbers differ; determine sign
+				//
+				if (searchKey->startBlock > trialKey->startBlock)
+					result = 1;
+			}
+		}
+		else {
+			//
+			//	Fork types differ; determine sign
+			//
+			if (searchKey->forkType > trialKey->forkType)
+				result = 1;
+		}
+	}
+	else {
+		//
+		//	FileNums differ; determine sign
+		//
+		if (searchKey->fileID > trialKey->fileID)
+			result = 1;
+	}
+	
+	return( result );
+}
+#endif
+
+
+//
+//	Routine:	CompareExtentKeysPlus
+//
+//	Function: 	Compares two extent file keys (a search key and a trial key) for
+//				an HFS volume.
+//
+
+int32_t CompareExtentKeysPlus( const HFSPlusExtentKey *searchKey, const HFSPlusExtentKey *trialKey )
+{
+	int32_t	result;		//	± 1
+	
+	#if DEBUG
+		if (searchKey->keyLength != kHFSPlusExtentKeyMaximumLength)
+			DebugStr("HFS: search Key is wrong length");
+		if (trialKey->keyLength != kHFSPlusExtentKeyMaximumLength)
+			DebugStr("HFS: trial Key is wrong length");
+	#endif
+	
+	result = -1;		//	assume searchKey < trialKey
+	
+	if (searchKey->fileID == trialKey->fileID) {
+		//
+		//	FileNum's are equal; compare fork types
+		//
+		if (searchKey->forkType == trialKey->forkType) {
+			//
+			//	Fork types are equal; compare allocation block number
+			//
+			if (searchKey->startBlock == trialKey->startBlock) {
+				//
+				//	Everything is equal
+				//
+				result = 0;
+			}
+			else {
+				//
+				//	Allocation block numbers differ; determine sign
+				//
+				if (searchKey->startBlock > trialKey->startBlock)
+					result = 1;
+			}
+		}
+		else {
+			//
+			//	Fork types differ; determine sign
+			//
+			if (searchKey->forkType > trialKey->forkType)
+				result = 1;
+		}
+	}
+	else {
+		//
+		//	FileNums differ; determine sign
+		//
+		if (searchKey->fileID > trialKey->fileID)
+			result = 1;
+	}
+	
+	return( result );
+}
+
+static int
+should_pin_blocks(hfsmount_t *hfsmp, FCB *fcb)
+{
+	if (!ISSET(hfsmp->hfs_flags, HFS_CS_HOTFILE_PIN)
+		|| fcb->ff_cp == NULL || fcb->ff_cp->c_vp == NULL) {
+		return 0;
+	}
+
+	int pin_blocks;
+
+	//
+	// File system metadata should get pinned
+	//
+	if (vnode_issystem(fcb->ff_cp->c_vp)) {
+		return 1;
+	}
+
+	//
+	// If a file is AutoCandidate, we should not pin its blocks because
+	// it was an automatically added file and this function is intended
+	// to pin new blocks being added to user-generated content.
+	//
+	if (fcb->ff_cp->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+		return 0;
+	}
+
+	//
+	// If a file is marked FastDevPinned it is an existing pinned file 
+	// or a new file that should be pinned.
+	//
+	// If a file is marked FastDevCandidate it is a new file that is
+	// being written to for the first time so we don't want to pin it
+	// just yet as it may not meet the criteria (i.e. too large).
+	//
+	if ((fcb->ff_cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask)) != 0) {
+		pin_blocks = 1;
+	} else {
+		pin_blocks = 0;
+	}
+
+	return pin_blocks;
+}
+	
+
+
+static void
+pin_blocks_if_needed(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockCount)	
+{
+	if (!should_pin_blocks(vcb, fcb)) {
+		return;
+	}
+	
+	// ask CoreStorage to pin the new blocks being added to this file
+	if (hfs_pin_block_range((struct hfsmount *)vcb, HFS_PIN_IT, startBlock, blockCount) == 0) {
+		struct vnode *vp = fcb->ff_cp->c_vp;
+		
+		// and make sure to keep our accounting in order
+		hfs_hotfile_adjust_blocks(vp, -blockCount);
+	}
+}
+
+
+
+/*
+ * Add a file extent to a file.
+ *
+ * Used by hfs_extendfs to extend the volume allocation bitmap file.
+ *
+ */
+int
+AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockCount)
+{
+	HFSPlusExtentKey foundKey;
+	HFSPlusExtentRecord foundData;
+	u_int32_t foundIndex;
+	u_int32_t hint;
+	u_int32_t nextBlock;
+	int64_t peof;
+	int i;
+	int error;
+
+	peof = (int64_t)(fcb->ff_blocks + blockCount) * (int64_t)vcb->blockSize;
+
+	error = SearchExtentFile(vcb, fcb, peof-1, &foundKey, foundData, &foundIndex, &hint, &nextBlock);
+	if (error != fxRangeErr)
+		return (EBUSY);
+
+	/*
+	 * Add new extent.  See if there is room in the current record.
+	 */
+	if (foundData[foundIndex].blockCount != 0)
+		++foundIndex;
+	if (foundIndex == kHFSPlusExtentDensity) {
+		/*
+		 * Existing record is full so create a new one.
+		 */
+		foundKey.keyLength = kHFSPlusExtentKeyMaximumLength;
+		foundKey.forkType = kDataForkType;
+		foundKey.pad = 0;
+		foundKey.fileID = FTOC(fcb)->c_fileid;
+		foundKey.startBlock = nextBlock;
+		
+		foundData[0].startBlock = startBlock;
+		foundData[0].blockCount = blockCount;
+		
+		/* zero out remaining extents. */
+		for (i = 1; i < kHFSPlusExtentDensity; ++i) {
+			foundData[i].startBlock = 0;
+			foundData[i].blockCount = 0;
+		}
+
+		foundIndex = 0;
+
+		error = CreateExtentRecord(vcb, &foundKey, foundData, &hint);
+		if (error == fxOvFlErr) {
+			error = dskFulErr;
+		} else if (error == 0) {
+			pin_blocks_if_needed(vcb, fcb, startBlock, blockCount);
+		}
+		
+	} else {
+		/* 
+		 * Add a new extent into existing record.
+		 */
+		foundData[foundIndex].startBlock = startBlock;
+		foundData[foundIndex].blockCount = blockCount;
+		error = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
+		if (error == 0) {
+			pin_blocks_if_needed(vcb, fcb, startBlock, blockCount);
+		}
+	}
+	(void) FlushExtentFile(vcb);
+
+	return (error);
+}
+
+
+//_________________________________________________________________________________
+//
+// Routine:		Extendfile
+//
+// Function: 	Extends the disk space allocated to a file.
+//
+//_________________________________________________________________________________
+
+OSErr ExtendFileC (
+	ExtendedVCB		*vcb,				// volume that file resides on
+	FCB				*fcb,				// FCB of file to truncate
+	int64_t			bytesToAdd,			// number of bytes to allocate
+	u_int32_t		blockHint,			// desired starting allocation block
+	u_int32_t		flags,				// EFContig and/or EFAll
+	int64_t			*actualBytesAdded)	// number of bytes actually allocated
+{
+	OSErr				err;
+	u_int32_t			volumeBlockSize;
+	int64_t				blocksToAdd;
+	int64_t				bytesThisExtent;
+	HFSPlusExtentKey	foundKey;
+	HFSPlusExtentRecord	foundData;
+	u_int32_t			foundIndex;
+	u_int32_t			hint;
+	u_int32_t			nextBlock;
+	u_int32_t			startBlock;
+	Boolean				allOrNothing;
+	Boolean				forceContig;
+	Boolean				wantContig;
+	Boolean				useMetaZone;
+	Boolean				needsFlush;
+	int					allowFlushTxns;
+	u_int32_t			actualStartBlock;
+	u_int32_t			actualNumBlocks;
+	u_int32_t			numExtentsPerRecord;
+	int64_t				maximumBytes;
+	int64_t 			availbytes;
+	int64_t				peof;
+	u_int32_t			prevblocks;
+	uint32_t			fastdev = 0;
+
+	struct hfsmount *hfsmp = (struct hfsmount*)vcb;	
+	allowFlushTxns = 0;
+	needsFlush = false;
+	*actualBytesAdded = 0;
+	volumeBlockSize = vcb->blockSize;
+	allOrNothing = ((flags & kEFAllMask) != 0);
+	forceContig = ((flags & kEFContigMask) != 0);
+	prevblocks = fcb->ff_blocks;
+
+	if (vcb->vcbSigWord != kHFSSigWord) {
+		numExtentsPerRecord = kHFSPlusExtentDensity;
+	}
+#if CONFIG_HFS_STD
+	else {
+		/* HFS Standard */
+		numExtentsPerRecord = kHFSExtentDensity;
+
+		/* Make sure the request and new PEOF are less than 2GB if HFS std*/
+		if (bytesToAdd >=  kTwoGigabytes)
+			goto HFS_Std_Overflow;
+		if ((((int64_t)fcb->ff_blocks * (int64_t)volumeBlockSize) + bytesToAdd) >= kTwoGigabytes)
+			goto HFS_Std_Overflow;
+	}
+#endif
+
+	//
+	//	Determine how many blocks need to be allocated.
+	//	Round up the number of desired bytes to add.
+	//
+	blocksToAdd = howmany(bytesToAdd, volumeBlockSize);
+	bytesToAdd = (int64_t)((int64_t)blocksToAdd * (int64_t)volumeBlockSize);
+
+	/*
+	 * For deferred allocations just reserve the blocks.
+	 */
+	if ((flags & kEFDeferMask)
+	&&  (vcb->vcbSigWord == kHFSPlusSigWord)
+	&&  (bytesToAdd < (int64_t)HFS_MAX_DEFERED_ALLOC)
+	&&  (blocksToAdd < hfs_freeblks(VCBTOHFS(vcb), 1))) {
+		hfs_lock_mount (hfsmp);
+		vcb->loanedBlocks += blocksToAdd;
+		hfs_unlock_mount(hfsmp);
+
+		fcb->ff_unallocblocks += blocksToAdd;
+		FTOC(fcb)->c_blocks   += blocksToAdd;
+		fcb->ff_blocks        += blocksToAdd;
+
+		/*
+		 * We haven't touched the disk here; no blocks have been
+		 * allocated and the volume will not be inconsistent if we
+		 * don't update the catalog record immediately.
+		 */
+		FTOC(fcb)->c_flag |= C_MINOR_MOD;
+		*actualBytesAdded = bytesToAdd;
+		return (0);
+	}
+	/* 
+	 * Give back any unallocated blocks before doing real allocations.
+	 */
+	if (fcb->ff_unallocblocks > 0) {
+		u_int32_t loanedBlocks;
+
+		loanedBlocks = fcb->ff_unallocblocks;
+		blocksToAdd += loanedBlocks;
+		bytesToAdd = (int64_t)blocksToAdd * (int64_t)volumeBlockSize;
+		FTOC(fcb)->c_blocks -= loanedBlocks;
+		fcb->ff_blocks -= loanedBlocks;
+		fcb->ff_unallocblocks  = 0;
+
+		hfs_lock_mount(hfsmp);
+		vcb->loanedBlocks -= loanedBlocks;
+		hfs_unlock_mount(hfsmp);
+	}
+
+	//
+	//	If the file's clump size is larger than the allocation block size,
+	//	then set the maximum number of bytes to the requested number of bytes
+	//	rounded up to a multiple of the clump size.
+	//
+	if ((vcb->vcbClpSiz > (int32_t)volumeBlockSize)
+	&&  (bytesToAdd < (int64_t)HFS_MAX_DEFERED_ALLOC)
+	&&  (flags & kEFNoClumpMask) == 0) {
+		maximumBytes = (int64_t)howmany(bytesToAdd, vcb->vcbClpSiz);
+		maximumBytes *= vcb->vcbClpSiz;
+	} else {
+		maximumBytes = bytesToAdd;
+	}
+	
+#if CONFIG_HFS_STD
+	//
+	//	Compute new physical EOF, rounded up to a multiple of a block.
+	//
+	if ( (vcb->vcbSigWord == kHFSSigWord) &&		//	Too big?
+		 ((((int64_t)fcb->ff_blocks * (int64_t)volumeBlockSize) + bytesToAdd) >= kTwoGigabytes) ) {
+		if (allOrNothing)					// Yes, must they have it all?
+			goto HFS_Std_Overflow;						// Yes, can't have it
+		else {
+			--blocksToAdd;						// No, give give 'em one block less
+			bytesToAdd -= volumeBlockSize;
+		}
+	}
+#endif
+
+	//
+	//	If allocation is all-or-nothing, make sure there are
+	//	enough free blocks on the volume (quick test).
+	//
+	if (allOrNothing &&
+	    (blocksToAdd > hfs_freeblks(VCBTOHFS(vcb), flags & kEFReserveMask))) {
+		err = dskFulErr;
+		goto ErrorExit;
+	}
+	
+	//
+	//	See if there are already enough blocks allocated to the file.
+	//
+	peof = ((int64_t)fcb->ff_blocks * (int64_t)volumeBlockSize) + bytesToAdd;  // potential new PEOF
+	err = SearchExtentFile(vcb, fcb, peof-1, &foundKey, foundData, &foundIndex, &hint, &nextBlock);
+	if (err == noErr) {
+		//	Enough blocks are already allocated.  Just update the FCB to reflect the new length.
+		fcb->ff_blocks = peof / volumeBlockSize;
+		FTOC(fcb)->c_blocks += (bytesToAdd / volumeBlockSize);
+		FTOC(fcb)->c_flag |= C_MODIFIED;
+		goto Exit;
+	}
+	if (err != fxRangeErr)		// Any real error?
+		goto ErrorExit;				// Yes, so exit immediately
+
+	//
+	//	Adjust the PEOF to the end of the last extent.
+	//
+	peof = (int64_t)((int64_t)nextBlock * (int64_t)volumeBlockSize);			// currently allocated PEOF
+	bytesThisExtent = (int64_t)(nextBlock - fcb->ff_blocks) * (int64_t)volumeBlockSize;
+	if (bytesThisExtent != 0) {
+		fcb->ff_blocks = nextBlock;
+		FTOC(fcb)->c_blocks += (bytesThisExtent / volumeBlockSize);
+		FTOC(fcb)->c_flag |= C_MODIFIED;
+		bytesToAdd -= bytesThisExtent;
+	}
+	
+	//
+	//	Allocate some more space.
+	//
+	//	First try a contiguous allocation (of the whole amount).
+	//	If that fails, get whatever we can.
+	//		If forceContig, then take whatever we got
+	//		else, keep getting bits and pieces (non-contig)
+	
+	/*
+	 * Note that for sparse devices (like sparse bundle dmgs), we
+	 * should only be aggressive with re-using once-allocated pieces
+	 * if we're not dealing with system files.  If we're trying to operate
+	 * on behalf of a system file, we need the maximum contiguous amount
+	 * possible.  For non-system files we favor locality and fragmentation over
+	 * contiguity as it can result in fewer blocks being needed from the underlying
+	 * filesystem that the sparse image resides upon. 
+	 */
+	err = noErr;
+	if (   (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)
+			&& (fcb->ff_cp->c_fileid >= kHFSFirstUserCatalogNodeID)
+			&& (flags & kEFMetadataMask) == 0) {
+		/*
+		 * We want locality over contiguity so by default we set wantContig to 
+		 * false unless we hit one of the circumstances below.
+		 */ 
+		wantContig = false;
+		if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
+			/* 
+			 * If the red-black tree is acive, we can always find a suitable contiguous
+			 * chunk.  So if the user specifically requests contiguous files,  we should 
+			 * honor that no matter what kind of device it is.
+			 */
+			if (forceContig) {
+				wantContig = true;
+			}
+		}
+		else {
+			/* 
+			 * If the red-black tree is not active, then only set wantContig to true
+			 * if we have never done a contig scan on the device, which would populate
+			 * the free extent cache.  Note that the caller may explicitly unset the 
+			 * DID_CONTIG_SCAN bit in order to force us to vend a contiguous extent here
+			 * if the caller wants to get a contiguous chunk.
+			 */
+			if ((vcb->hfs_flags & HFS_DID_CONTIG_SCAN) == 0) { 
+				vcb->hfs_flags |= HFS_DID_CONTIG_SCAN;	
+				wantContig = true;
+			}
+		}
+	} 
+	else {
+		wantContig = true;
+	}
+
+	if (should_pin_blocks(hfsmp, fcb))
+		fastdev = HFS_ALLOC_FAST_DEV;
+
+	useMetaZone = flags & kEFMetadataMask;
+	do {
+		if (blockHint != 0)
+			startBlock = blockHint;
+		else
+			startBlock = foundData[foundIndex].startBlock + foundData[foundIndex].blockCount;
+
+		actualNumBlocks = 0;
+		actualStartBlock = 0;
+			
+		/* Find number of free blocks based on reserved block flag option */
+		availbytes = (int64_t)hfs_freeblks(VCBTOHFS(vcb), flags & kEFReserveMask) *
+		             (int64_t)volumeBlockSize;
+		if (availbytes <= 0) {
+			err = dskFulErr;
+		} else {
+			if (wantContig && (availbytes < bytesToAdd)) {
+				err = dskFulErr;
+			}
+			else {
+				uint32_t ba_flags = fastdev;
+
+				if (wantContig) {
+					ba_flags |= HFS_ALLOC_FORCECONTIG;	
+				}
+				if (useMetaZone) {
+					ba_flags |= HFS_ALLOC_METAZONE;
+				}
+				if (allowFlushTxns) {
+					ba_flags |= HFS_ALLOC_FLUSHTXN;
+				}
+
+				err = BlockAllocate(
+						  vcb,
+						  startBlock,
+						  howmany(MIN(bytesToAdd, availbytes), volumeBlockSize),
+						  howmany(MIN(maximumBytes, availbytes), volumeBlockSize),
+						  ba_flags,
+						  &actualStartBlock,
+						  &actualNumBlocks);
+			}
+		}
+		if (err == dskFulErr) {
+			if (forceContig) {
+				if (allowFlushTxns == 0) {
+					/* If we're forcing contiguity, re-try but allow plucking from recently freed regions */
+					allowFlushTxns = 1;
+					wantContig = 1;
+					err = noErr;
+					continue;
+				}
+				else {
+					break;			// AllocContig failed because not enough contiguous space
+				}
+			}
+			if (wantContig) {
+				//	Couldn't get one big chunk, so get whatever we can.
+				err = noErr;
+				wantContig = false;
+				continue;
+			}
+			if (actualNumBlocks != 0)
+				err = noErr;
+
+			if (useMetaZone == 0) {
+				/* Couldn't get anything so dip into metadat zone */
+				err = noErr;
+				useMetaZone = 1;
+				continue;
+			}
+
+			/* If we couldn't find what we needed without flushing the journal, then go ahead and do it now */
+			if (allowFlushTxns == 0) {
+				allowFlushTxns = 1;
+				err = noErr;
+				continue;
+			}
+
+		}
+		if (err == noErr) {
+			//	Add the new extent to the existing extent record, or create a new one.
+			if ((actualStartBlock == startBlock) && (blockHint == 0)) {
+				//	We grew the file's last extent, so just adjust the number of blocks.
+				foundData[foundIndex].blockCount += actualNumBlocks;
+				err = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
+				if (err != noErr) break;
+			}
+			else {
+				u_int16_t	i;
+
+				//	Need to add a new extent.  See if there is room in the current record.
+				if (foundData[foundIndex].blockCount != 0)	//	Is current extent free to use?
+					++foundIndex;							// 	No, so use the next one.
+				if (foundIndex == numExtentsPerRecord) {
+					//	This record is full.  Need to create a new one.
+					if (FTOC(fcb)->c_fileid == kHFSExtentsFileID) {
+						(void) BlockDeallocate(vcb, actualStartBlock, actualNumBlocks, 0);
+						err = dskFulErr;		// Oops.  Can't extend extents file past first record.
+						break;
+					}
+					
+					foundKey.keyLength = kHFSPlusExtentKeyMaximumLength;
+					if (FORK_IS_RSRC(fcb))
+						foundKey.forkType = kResourceForkType;
+					else
+						foundKey.forkType = kDataForkType;
+					foundKey.pad = 0;
+					foundKey.fileID = FTOC(fcb)->c_fileid;
+					foundKey.startBlock = nextBlock;
+					
+					foundData[0].startBlock = actualStartBlock;
+					foundData[0].blockCount = actualNumBlocks;
+					
+					// zero out remaining extents...
+					for (i = 1; i < kHFSPlusExtentDensity; ++i)
+					{
+						foundData[i].startBlock = 0;
+						foundData[i].blockCount = 0;
+					}
+
+					foundIndex = 0;
+					
+					err = CreateExtentRecord(vcb, &foundKey, foundData, &hint);
+					if (err == fxOvFlErr) {
+						//	We couldn't create an extent record because extents B-tree
+						//	couldn't grow.  Dellocate the extent just allocated and
+						//	return a disk full error.
+						(void) BlockDeallocate(vcb, actualStartBlock, actualNumBlocks, 0);
+						err = dskFulErr;
+					}
+					if (err != noErr) break;
+
+					needsFlush = true;		//	We need to update the B-tree header
+				}
+				else {
+					//	Add a new extent into this record and update.
+					foundData[foundIndex].startBlock = actualStartBlock;
+					foundData[foundIndex].blockCount = actualNumBlocks;
+					err = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
+					if (err != noErr) break;
+				}
+			}
+
+			// Figure out how many bytes were actually allocated.
+			// NOTE: BlockAllocate could have allocated more than we asked for.
+			// Don't set the PEOF beyond what our client asked for.
+			nextBlock += actualNumBlocks;
+			bytesThisExtent = (int64_t)((int64_t)actualNumBlocks * (int64_t)volumeBlockSize);
+			if (bytesThisExtent > bytesToAdd) {
+				bytesToAdd = 0;
+			}
+			else {
+				bytesToAdd -= bytesThisExtent;
+				maximumBytes -= bytesThisExtent;
+			}
+			fcb->ff_blocks += (bytesThisExtent / volumeBlockSize);
+			FTOC(fcb)->c_blocks += (bytesThisExtent / volumeBlockSize);
+			FTOC(fcb)->c_flag |= C_MODIFIED;
+
+			//	If contiguous allocation was requested, then we've already got one contiguous
+			//	chunk.  If we didn't get all we wanted, then adjust the error to disk full.
+			if (forceContig) {
+				if (bytesToAdd != 0)
+					err = dskFulErr;
+				break;			//	We've already got everything that's contiguous
+			}
+		}
+	} while (err == noErr && bytesToAdd);
+
+ErrorExit:
+Exit:
+	if (VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) {
+		/* Keep the roving allocator out of the metadata zone. */
+		if (vcb->nextAllocation >= VCBTOHFS(vcb)->hfs_metazone_start &&
+		    vcb->nextAllocation <= VCBTOHFS(vcb)->hfs_metazone_end) {
+			hfs_lock_mount (hfsmp);
+			HFS_UPDATE_NEXT_ALLOCATION(vcb, VCBTOHFS(vcb)->hfs_metazone_end + 1);	
+			MarkVCBDirty(vcb);
+			hfs_unlock_mount(hfsmp);
+		}
+	}
+	if (prevblocks < fcb->ff_blocks) {
+		*actualBytesAdded = (int64_t)(fcb->ff_blocks - prevblocks) * (int64_t)volumeBlockSize;
+	} else {
+		*actualBytesAdded = 0;
+	}
+
+	if (fastdev) {
+		hfs_hotfile_adjust_blocks(fcb->ff_cp->c_vp, 
+					  (int64_t)prevblocks - fcb->ff_blocks);
+	}
+
+	if (needsFlush)
+		(void) FlushExtentFile(vcb);
+
+	return err;
+
+#if CONFIG_HFS_STD
+HFS_Std_Overflow:
+	err = fileBoundsErr;
+	goto ErrorExit;
+#endif
+}
+
+
+
+//_________________________________________________________________________________
+//
+// Routine:		TruncateFileC
+//
+// Function: 	Truncates the disk space allocated to a file.  The file space is
+//				truncated to a specified new PEOF rounded up to the next allocation
+//				block boundry.  If the 'TFTrunExt' option is specified, the file is
+//				truncated to the end of the extent containing the new PEOF.
+//
+//_________________________________________________________________________________
+
+OSErr TruncateFileC (
+	ExtendedVCB		*vcb,				// volume that file resides on
+	FCB				*fcb,				// FCB of file to truncate
+	int64_t			peof,				// new physical size for file
+	int				deleted,			// if nonzero, the file's catalog record has already been deleted.
+	int				rsrc,				// does this represent a resource fork or not?
+	uint32_t		fileid,				// the fileid of the file we're manipulating.
+	Boolean			truncateToExtent)	// if true, truncate to end of extent containing newPEOF
+
+{
+	OSErr				err;
+	u_int32_t			nextBlock;		//	next file allocation block to consider
+	u_int32_t			startBlock;		//	Physical (volume) allocation block number of start of a range
+	u_int32_t			physNumBlocks;	//	Number of allocation blocks in file (according to PEOF)
+	u_int32_t			numBlocks;
+	HFSPlusExtentKey	key;			//	key for current extent record; key->keyLength == 0 if FCB's extent record
+	u_int32_t			hint;			//	BTree hint corresponding to key
+	HFSPlusExtentRecord	extentRecord;
+	u_int32_t			extentIndex;
+	u_int32_t			extentNextBlock;
+	u_int32_t			numExtentsPerRecord;
+	int64_t             temp64;
+	u_int8_t			forkType;
+	Boolean				extentChanged;	// true if we actually changed an extent
+	Boolean				recordDeleted;	// true if an extent record got deleted
+
+	recordDeleted = false;
+	
+	if (vcb->vcbSigWord == kHFSPlusSigWord) {
+		numExtentsPerRecord = kHFSPlusExtentDensity;
+	}
+	else {
+		numExtentsPerRecord = kHFSExtentDensity;
+	}
+	
+	if (rsrc) {
+		forkType = kResourceForkType;
+	}
+	else {
+		forkType = kDataForkType;
+	}
+	
+	temp64 = fcb->ff_blocks;
+	physNumBlocks = (u_int32_t)temp64;
+
+	//
+	//	Round newPEOF up to a multiple of the allocation block size.  If new size is
+	//	two gigabytes or more, then round down by one allocation block (??? really?
+	//	shouldn't that be an error?).
+	//
+	nextBlock = howmany(peof, vcb->blockSize);	// number of allocation blocks to remain in file
+	peof = (int64_t)((int64_t)nextBlock * (int64_t)vcb->blockSize);					// number of bytes in those blocks
+
+#if CONFIG_HFS_STD
+	if ((vcb->vcbSigWord == kHFSSigWord) && (peof >= kTwoGigabytes)) {
+		#if DEBUG
+			DebugStr("HFS: Trying to truncate a file to 2GB or more");
+		#endif
+		err = fileBoundsErr;
+		goto ErrorExit;
+	}
+#endif
+
+	//
+	//	Update FCB's length
+	//
+	/*
+	 * XXX Any errors could cause ff_blocks and c_blocks to get out of sync...
+	 */
+	numBlocks = peof / vcb->blockSize;
+	if (!deleted) {
+		FTOC(fcb)->c_blocks -= (fcb->ff_blocks - numBlocks);
+	}
+	fcb->ff_blocks = numBlocks;
+	
+	// this catalog entry is modified and *must* get forced 
+	// to disk when hfs_update() is called
+	if (!deleted) {
+		/* 
+		 * If the file is already C_NOEXISTS, then the catalog record
+		 * has been removed from disk already.  We wouldn't need to force 
+		 * another update
+		 */
+		FTOC(fcb)->c_flag |= C_MODIFIED;
+	}
+	//
+	//	If the new PEOF is 0, then truncateToExtent has no meaning (we should always deallocate
+	//	all storage).
+	//
+	if (peof == 0) {
+		int i;
+		
+		//	Deallocate all the extents for this fork
+		err = DeallocateFork(vcb, fileid, forkType, fcb->fcbExtents, &recordDeleted);
+		if (err != noErr) goto ErrorExit;	//	got some error, so return it
+		
+		//	Update the catalog extent record (making sure it's zeroed out)
+		if (err == noErr) {
+			for (i=0; i < kHFSPlusExtentDensity; i++) {
+				fcb->fcbExtents[i].startBlock = 0;
+				fcb->fcbExtents[i].blockCount = 0;
+			}
+		}
+		goto Done;
+	}
+	
+	//
+	//	Find the extent containing byte (peof-1).  This is the last extent we'll keep.
+	//	(If truncateToExtent is true, we'll keep the whole extent; otherwise, we'll only
+	//	keep up through peof).  The search will tell us how many allocation blocks exist
+	//	in the found extent plus all previous extents.
+	//
+	err = SearchExtentFile(vcb, fcb, peof-1, &key, extentRecord, &extentIndex, &hint, &extentNextBlock);
+	if (err != noErr) goto ErrorExit;
+
+	extentChanged = false;		//	haven't changed the extent yet
+	
+	if (!truncateToExtent) {
+		//
+		//	Shorten this extent.  It may be the case that the entire extent gets
+		//	freed here.
+		//
+		numBlocks = extentNextBlock - nextBlock;	//	How many blocks in this extent to free up
+		if (numBlocks != 0) {
+			//	Compute first volume allocation block to free
+			startBlock = extentRecord[extentIndex].startBlock + extentRecord[extentIndex].blockCount - numBlocks;
+			//	Free the blocks in bitmap
+			err = BlockDeallocate(vcb, startBlock, numBlocks, 0);
+			if (err != noErr) goto ErrorExit;
+			//	Adjust length of this extent
+			extentRecord[extentIndex].blockCount -= numBlocks;
+			//	If extent is empty, set start block to 0
+			if (extentRecord[extentIndex].blockCount == 0)
+				extentRecord[extentIndex].startBlock = 0;
+			//	Remember that we changed the extent record
+			extentChanged = true;
+		}
+	}
+	
+	//
+	//	Now move to the next extent in the record, and set up the file allocation block number
+	//
+	nextBlock = extentNextBlock;		//	Next file allocation block to free
+	++extentIndex;						//	Its index within the extent record
+	
+	//
+	//	Release all following extents in this extent record.  Update the record.
+	//
+	while (extentIndex < numExtentsPerRecord && extentRecord[extentIndex].blockCount != 0) {
+		numBlocks = extentRecord[extentIndex].blockCount;
+		//	Deallocate this extent
+		err = BlockDeallocate(vcb, extentRecord[extentIndex].startBlock, numBlocks, 0);
+		if (err != noErr) goto ErrorExit;
+		//	Update next file allocation block number
+		nextBlock += numBlocks;
+		//	Zero out start and length of this extent to delete it from record
+		extentRecord[extentIndex].startBlock = 0;
+		extentRecord[extentIndex].blockCount = 0;
+		//	Remember that we changed an extent
+		extentChanged = true;
+		//	Move to next extent in record
+		++extentIndex;
+	}
+	
+	//
+	//	If any of the extents in the current record were changed, then update that
+	//	record (in the FCB, or extents file).
+	//
+	if (extentChanged) {
+		err = UpdateExtentRecord(vcb, fcb, deleted, &key, extentRecord, hint);
+		if (err != noErr) goto ErrorExit;
+	}
+	
+	//
+	//	If there are any following allocation blocks, then we need
+	//	to seach for their extent records and delete those allocation
+	//	blocks.
+	//
+	if (nextBlock < physNumBlocks)
+		err = TruncateExtents(vcb, forkType, fileid, nextBlock, &recordDeleted);
+
+Done:
+ErrorExit:
+	if (recordDeleted)
+		(void) FlushExtentFile(vcb);
+
+	return err;
+}
+
+
+/*
+ * HFS Plus only
+ *
+ */
+OSErr HeadTruncateFile (
+	ExtendedVCB  *vcb,
+	FCB  *fcb,
+	u_int32_t  headblks)
+{
+	HFSPlusExtentRecord  extents;
+	HFSPlusExtentRecord  tailExtents;
+	HFSCatalogNodeID  fileID;
+	u_int8_t  forkType;
+	u_int32_t  blkcnt = 0;
+	u_int32_t  startblk;
+	u_int32_t  blksfreed;
+	int  i, j;
+	int  error = 0;
+	int  lockflags;
+
+
+	if (vcb->vcbSigWord != kHFSPlusSigWord)
+		return (-1);
+
+	forkType = FORK_IS_RSRC(fcb) ? kResourceForkType : kDataForkType;
+	fileID = FTOC(fcb)->c_fileid;
+	bzero(tailExtents, sizeof(tailExtents));
+
+	blksfreed = 0;
+	startblk = 0;
+
+	/*
+	 * Process catalog resident extents
+	 */
+	for (i = 0, j = 0; i < kHFSPlusExtentDensity; ++i) {
+		blkcnt = fcb->fcbExtents[i].blockCount;
+		if (blkcnt == 0)
+			break;  /* end of extents */
+
+		if (blksfreed < headblks) {
+			error = BlockDeallocate(vcb, fcb->fcbExtents[i].startBlock, blkcnt, 0);
+			/*
+			 * Any errors after the first BlockDeallocate
+			 * must be ignored so we can put the file in
+			 * a known state.
+			 */
+			if (error ) {
+				if (i == 0)
+					goto ErrorExit;  /* uh oh */
+				else {
+					error = 0;
+					printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n",
+					       FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error);
+				}
+			}
+
+			blksfreed += blkcnt;
+			fcb->fcbExtents[i].startBlock = 0;
+			fcb->fcbExtents[i].blockCount = 0;
+		} else {
+			tailExtents[j].startBlock = fcb->fcbExtents[i].startBlock;
+			tailExtents[j].blockCount = blkcnt;
+			++j;
+		}
+		startblk += blkcnt;	
+	}
+	
+	if (blkcnt == 0)
+		goto CopyExtents;
+
+	lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+	/* 
+	 * Process overflow extents
+	 */
+	for (;;) {
+		u_int32_t  extblks;
+
+		error = FindExtentRecord(vcb, forkType, fileID, startblk, false, NULL, extents, NULL);
+		if (error) {
+			/*
+			 * Any errors after the first BlockDeallocate
+			 * must be ignored so we can put the file in
+			 * a known state.
+			 */
+			if (error != btNotFound)
+				printf("hfs: HeadTruncateFile: problems finding extents %s (%d)\n",
+				       FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error);
+			error = 0;
+			break;
+		}
+
+		for(i = 0, extblks = 0; i < kHFSPlusExtentDensity; ++i) {
+			blkcnt = extents[i].blockCount;
+			if (blkcnt == 0)
+				break;  /* end of extents */
+
+			if (blksfreed < headblks) {
+				error = BlockDeallocate(vcb, extents[i].startBlock, blkcnt, 0);
+				if (error) {
+					printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n",
+					       FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error);
+					error = 0;
+				}
+				blksfreed += blkcnt;
+			} else {
+				tailExtents[j].startBlock = extents[i].startBlock;
+				tailExtents[j].blockCount = blkcnt;
+				++j;
+			}
+			extblks += blkcnt;		
+		}
+		
+		error = DeleteExtentRecord(vcb, forkType, fileID, startblk);
+		if (error) {
+			printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n",
+				FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error);
+			error = 0;
+		}
+		
+		if (blkcnt == 0)
+			break;  /* all done */
+
+		startblk += extblks;
+	}
+	hfs_systemfile_unlock(vcb, lockflags);
+
+CopyExtents:
+	if (blksfreed) {
+		bcopy(tailExtents, fcb->fcbExtents, sizeof(tailExtents));
+		blkcnt = fcb->ff_blocks - headblks;
+		FTOC(fcb)->c_blocks -= headblks;
+		fcb->ff_blocks = blkcnt;
+
+		FTOC(fcb)->c_flag |= C_MODIFIED;
+		FTOC(fcb)->c_touch_chgtime = TRUE;
+
+		(void) FlushExtentFile(vcb);
+	}
+
+ErrorExit:	
+	return MacToVFSError(error);
+}
+
+
+
+//
+//	Routine:	SearchExtentRecord (was XRSearch)
+//
+//	Function: 	Searches extent record for the extent mapping a given file
+//				allocation block number (FABN).
+//
+//	Input:		searchFABN  			-  desired FABN
+//				extentData  			-  pointer to extent data record (xdr)
+//				extentDataStartFABN  	-  beginning FABN for extent record
+//
+//	Output:		foundExtentDataOffset  -  offset to extent entry within xdr
+//							result = noErr, offset to extent mapping desired FABN
+//							result = FXRangeErr, offset to last extent in record
+//				endingFABNPlusOne	-  ending FABN +1
+//				noMoreExtents		- True if the extent was not found, and the
+//									  extent record was not full (so don't bother
+//									  looking in subsequent records); false otherwise.
+//
+//	Result:		noErr = ok
+//				FXRangeErr = desired FABN > last mapped FABN in record
+//
+
+static OSErr SearchExtentRecord(
+	ExtendedVCB		*vcb,
+	u_int32_t				searchFABN,
+	const HFSPlusExtentRecord	extentData,
+	u_int32_t				extentDataStartFABN,
+	u_int32_t				*foundExtentIndex,
+	u_int32_t				*endingFABNPlusOne,
+	Boolean					*noMoreExtents)
+{
+	OSErr	err = noErr;
+	u_int32_t	extentIndex;
+	/* Set it to the HFS std value */
+	u_int32_t	numberOfExtents = kHFSExtentDensity;
+	u_int32_t	numAllocationBlocks;
+	Boolean	foundExtent;
+	
+	*endingFABNPlusOne 	= extentDataStartFABN;
+	*noMoreExtents		= false;
+	foundExtent			= false;
+
+	/* Override numberOfExtents for HFS+/HFSX */
+	if (vcb->vcbSigWord != kHFSSigWord) {
+		numberOfExtents = kHFSPlusExtentDensity;
+	}
+	
+	for( extentIndex = 0; extentIndex < numberOfExtents; ++extentIndex )
+	{
+		
+		// Loop over the extent record and find the search FABN.
+		
+		numAllocationBlocks = extentData[extentIndex].blockCount;
+		if ( numAllocationBlocks == 0 )
+		{
+			break;
+		}
+
+		*endingFABNPlusOne += numAllocationBlocks;
+		
+		if( searchFABN < *endingFABNPlusOne )
+		{
+			// Found the extent.
+			foundExtent = true;
+			break;
+		}
+	}
+	
+	if( foundExtent )
+	{
+		// Found the extent. Note the extent offset
+		*foundExtentIndex = extentIndex;
+	}
+	else
+	{
+		// Did not find the extent. Set foundExtentDataOffset accordingly
+		if( extentIndex > 0 )
+		{
+			*foundExtentIndex = extentIndex - 1;
+		}
+		else
+		{
+			*foundExtentIndex = 0;
+		}
+		
+		// If we found an empty extent, then set noMoreExtents.
+		if (extentIndex < numberOfExtents)
+			*noMoreExtents = true;
+
+		// Finally, return an error to the caller
+		err = fxRangeErr;
+	}
+
+	return( err );
+}
+
+//
+//	Routine:	SearchExtentFile (was XFSearch)
+//
+//	Function: 	Searches extent file (including the FCB resident extent record)
+//				for the extent mapping a given file position.
+//
+//	Input:		vcb  			-  VCB pointer
+//				fcb  			-  FCB pointer
+//				filePosition  	-  file position (byte address)
+//
+// Output:		foundExtentKey  		-  extent key record (xkr)
+//							If extent was found in the FCB's resident extent record,
+//							then foundExtentKey->keyLength will be set to 0.
+//				foundExtentData			-  extent data record(xdr)
+//				foundExtentIndex  	-  index to extent entry in xdr
+//							result =  0, offset to extent mapping desired FABN
+//							result = FXRangeErr, offset to last extent in record
+//									 (i.e., kNumExtentsPerRecord-1)
+//				extentBTreeHint  		-  BTree hint for extent record
+//							kNoHint = Resident extent record
+//				endingFABNPlusOne  		-  ending FABN +1
+//
+//	Result:
+//		noErr			Found an extent that contains the given file position
+//		FXRangeErr		Given position is beyond the last allocated extent
+//		(other)			(some other internal I/O error)
+//
+
+OSErr SearchExtentFile(
+	ExtendedVCB 	*vcb,
+	const FCB	 		*fcb,
+	int64_t 			filePosition,
+	HFSPlusExtentKey	*foundExtentKey,
+	HFSPlusExtentRecord	foundExtentData,
+	u_int32_t			*foundExtentIndex,
+	u_int32_t			*extentBTreeHint,
+	u_int32_t			*endingFABNPlusOne )
+{
+	OSErr				err;
+	u_int32_t			filePositionBlock;
+	int64_t                         temp64;
+	Boolean				noMoreExtents;
+	int  lockflags;
+	
+	temp64 = filePosition / (int64_t)vcb->blockSize;
+	filePositionBlock = (u_int32_t)temp64;
+
+    bcopy ( fcb->fcbExtents, foundExtentData, sizeof(HFSPlusExtentRecord));
+	
+	//	Search the resident FCB first.
+    err = SearchExtentRecord( vcb, filePositionBlock, foundExtentData, 0,
+									foundExtentIndex, endingFABNPlusOne, &noMoreExtents );
+
+	if( err == noErr ) {
+		// Found the extent. Set results accordingly
+		*extentBTreeHint = kNoHint;			// no hint, because not in the BTree
+		foundExtentKey->keyLength = 0;		// 0 = the FCB itself
+		
+		goto Exit;
+	}
+	
+	//	Didn't find extent in FCB.  If FCB's extent record wasn't full, there's no point
+	//	in searching the extents file.  Note that SearchExtentRecord left us pointing at
+	//	the last valid extent (or the first one, if none were valid).  This means we need
+	//	to fill in the hint and key outputs, just like the "if" statement above.
+	if ( noMoreExtents ) {
+		*extentBTreeHint = kNoHint;			// no hint, because not in the BTree
+		foundExtentKey->keyLength = 0;		// 0 = the FCB itself
+		err = fxRangeErr;		// There are no more extents, so must be beyond PEOF
+		goto Exit;
+	}
+	
+	//
+	//	Find the desired record, or the previous record if it is the same fork
+	//
+	lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+	err = FindExtentRecord(vcb, FORK_IS_RSRC(fcb) ? kResourceForkType : kDataForkType,
+						   FTOC(fcb)->c_fileid, filePositionBlock, true, foundExtentKey, foundExtentData, extentBTreeHint);
+	hfs_systemfile_unlock(vcb, lockflags);
+
+	if (err == btNotFound) {
+		//
+		//	If we get here, the desired position is beyond the extents in the FCB, and there are no extents
+		//	in the extents file.  Return the FCB's extents and a range error.
+		//
+		*extentBTreeHint = kNoHint;
+		foundExtentKey->keyLength = 0;
+		err = GetFCBExtentRecord(fcb, foundExtentData);
+		//	Note: foundExtentIndex and endingFABNPlusOne have already been set as a result of the very
+		//	first SearchExtentRecord call in this function (when searching in the FCB's extents, and
+		//	we got a range error).
+		
+		return fxRangeErr;
+	}
+	
+	//
+	//	If we get here, there was either a BTree error, or we found an appropriate record.
+	//	If we found a record, then search it for the correct index into the extents.
+	//
+	if (err == noErr) {
+		//	Find appropriate index into extent record
+		err = SearchExtentRecord(vcb, filePositionBlock, foundExtentData, foundExtentKey->startBlock,
+								 foundExtentIndex, endingFABNPlusOne, &noMoreExtents);
+	}
+
+Exit:
+	return err;
+}
+
+
+
+//============================================================================
+//	Routine:	UpdateExtentRecord
+//
+//	Function: 	Write new extent data to an existing extent record with a given key.
+//				If all of the extents are empty, and the extent record is in the
+//				extents file, then the record is deleted.
+//
+//	Input:		vcb			  			-	the volume containing the extents
+//				fcb						-	the file that owns the extents
+//				deleted					-	whether or not the file is already deleted
+//				extentFileKey  			-	pointer to extent key record (xkr)
+//						If the key length is 0, then the extents are actually part
+//						of the catalog record, stored in the FCB.
+//				extentData  			-	pointer to extent data record (xdr)
+//				extentBTreeHint			-	hint for given key, or kNoHint
+//
+//	Result:		noErr = ok
+//				(other) = error from BTree
+//============================================================================
+
+static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB  *fcb, int deleted,
+								 const HFSPlusExtentKey  *extentFileKey,
+								 const HFSPlusExtentRecord  extentData,
+								 u_int32_t  extentBTreeHint) 
+{
+    OSErr err = noErr;
+	
+	if (extentFileKey->keyLength == 0) {	// keyLength == 0 means the FCB's extent record
+		BlockMoveData(extentData, fcb->fcbExtents, sizeof(HFSPlusExtentRecord));
+		if (!deleted) {
+			FTOC(fcb)->c_flag |= C_MODIFIED;
+		}
+	}
+	else {
+		struct BTreeIterator *btIterator = NULL;
+		FSBufferDescriptor btRecord;
+		u_int16_t btRecordSize;
+		FCB * btFCB;
+		int lockflags;
+
+		//
+		//	Need to find and change a record in Extents BTree
+		//
+		btFCB = GetFileControlBlock(vcb->extentsRefNum);
+		
+		btIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+		/*
+		 * The lock taken by callers of ExtendFileC/TruncateFileC is
+		 * speculative and only occurs when the file already has
+		 * overflow extents. So we need to make sure we have the lock
+		 * here.  The extents btree lock can be nested (its recursive)
+		 * so we always take it here.
+		 */
+		lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+		/* HFS+/HFSX */
+		if (vcb->vcbSigWord != kHFSSigWord) {		//	HFS Plus volume
+			HFSPlusExtentRecord	foundData;		// The extent data actually found
+
+			BlockMoveData(extentFileKey, &btIterator->key, sizeof(HFSPlusExtentKey));
+
+			btIterator->hint.index = 0;
+			btIterator->hint.nodeNum = extentBTreeHint;
+
+			btRecord.bufferAddress = &foundData;
+			btRecord.itemSize = sizeof(HFSPlusExtentRecord);
+			btRecord.itemCount = 1;
+
+			err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator);
+	
+			if (err == noErr) {
+				BlockMoveData(extentData, &foundData, sizeof(HFSPlusExtentRecord));
+				err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize);
+			}
+			(void) BTFlushPath(btFCB);
+		}
+#if CONFIG_HFS_STD
+		else {
+			/* HFS Standard */
+			HFSExtentKey *	key;				// Actual extent key used on disk in HFS
+			HFSExtentRecord	foundData;			// The extent data actually found
+
+			key = (HFSExtentKey*) &btIterator->key;
+			key->keyLength	= kHFSExtentKeyMaximumLength;
+			key->forkType	= extentFileKey->forkType;
+			key->fileID		= extentFileKey->fileID;
+			key->startBlock	= extentFileKey->startBlock;
+
+			btIterator->hint.index = 0;
+			btIterator->hint.nodeNum = extentBTreeHint;
+
+			btRecord.bufferAddress = &foundData;
+			btRecord.itemSize = sizeof(HFSExtentRecord);
+			btRecord.itemCount = 1;
+
+			err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator);
+
+			if (err == noErr)
+				err = HFSPlusToHFSExtents(extentData, (HFSExtentDescriptor *)&foundData);
+
+			if (err == noErr)
+				err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize);
+			(void) BTFlushPath(btFCB);
+
+		}
+#endif
+
+		hfs_systemfile_unlock(vcb, lockflags);
+
+		hfs_free(btIterator, sizeof(*btIterator));
+	}
+	
+	return err;
+}
+
+
+
+#if CONFIG_HFS_STD
+static OSErr HFSPlusToHFSExtents(
+	const HFSPlusExtentRecord	oldExtents,
+	HFSExtentRecord		newExtents)
+{
+	OSErr	err;
+	
+	err = noErr;
+
+	// copy the first 3 extents
+	newExtents[0].startBlock = oldExtents[0].startBlock;
+	newExtents[0].blockCount = oldExtents[0].blockCount;
+	newExtents[1].startBlock = oldExtents[1].startBlock;
+	newExtents[1].blockCount = oldExtents[1].blockCount;
+	newExtents[2].startBlock = oldExtents[2].startBlock;
+	newExtents[2].blockCount = oldExtents[2].blockCount;
+
+	#if DEBUG
+		if (oldExtents[3].startBlock || oldExtents[3].blockCount) {
+			DebugStr("ExtentRecord with > 3 extents is invalid for HFS");
+			err = fsDSIntErr;
+		}
+	#endif
+	
+	return err;
+}
+#endif
+
+
+
+static OSErr GetFCBExtentRecord(
+	const FCB			*fcb,
+	HFSPlusExtentRecord	extents)
+{
+	
+	BlockMoveData(fcb->fcbExtents, extents, sizeof(HFSPlusExtentRecord));
+	
+	return noErr;
+}
+
+
+//_________________________________________________________________________________
+//
+// Routine:		ExtentsAreIntegral
+//
+// Purpose:		Ensure that each extent can hold an integral number of nodes
+//				Called by the NodesAreContiguous function
+//_________________________________________________________________________________
+
+static Boolean ExtentsAreIntegral(
+	const HFSPlusExtentRecord extentRecord,
+	u_int32_t	mask,
+	u_int32_t	*blocksChecked,
+	Boolean		*checkedLastExtent)
+{
+	u_int32_t	blocks;
+	u_int32_t	extentIndex;
+
+	*blocksChecked = 0;
+	*checkedLastExtent = false;
+	
+	for(extentIndex = 0; extentIndex < kHFSPlusExtentDensity; extentIndex++)
+	{		
+		blocks = extentRecord[extentIndex].blockCount;
+		
+		if ( blocks == 0 )
+		{
+			*checkedLastExtent = true;
+			break;
+		}
+
+		*blocksChecked += blocks;
+
+		if (blocks & mask)
+			return false;
+	}
+	
+	return true;
+}
+
+
+//_________________________________________________________________________________
+//
+// Routine:		NodesAreContiguous
+//
+// Purpose:		Ensure that all b-tree nodes are contiguous on disk
+//				Called by BTOpenPath during volume mount
+//_________________________________________________________________________________
+
+Boolean NodesAreContiguous(
+	ExtendedVCB	*vcb,
+	FCB			*fcb,
+	u_int32_t	nodeSize)
+{
+	u_int32_t			mask;
+	u_int32_t			startBlock;
+	u_int32_t			blocksChecked;
+	u_int32_t			hint;
+	HFSPlusExtentKey	key;
+	HFSPlusExtentRecord	extents;
+	OSErr				result;
+	Boolean				lastExtentReached;
+	int  lockflags;
+	
+
+	if (vcb->blockSize >= nodeSize)
+		return TRUE;
+
+	mask = (nodeSize / vcb->blockSize) - 1;
+
+	// check the local extents
+	(void) GetFCBExtentRecord(fcb, extents);
+	if ( !ExtentsAreIntegral(extents, mask, &blocksChecked, &lastExtentReached) )
+		return FALSE;
+
+	if ( lastExtentReached || 
+		 (int64_t)((int64_t)blocksChecked * (int64_t)vcb->blockSize) >= (int64_t)fcb->ff_size)
+		return TRUE;
+
+	startBlock = blocksChecked;
+
+	lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+	// check the overflow extents (if any)
+	while ( !lastExtentReached )
+	{
+		result = FindExtentRecord(vcb, kDataForkType, fcb->ff_cp->c_fileid, startBlock, FALSE, &key, extents, &hint);
+		if (result) break;
+
+		if ( !ExtentsAreIntegral(extents, mask, &blocksChecked, &lastExtentReached) ) {
+			hfs_systemfile_unlock(vcb, lockflags);
+			return FALSE;
+		}
+		startBlock += blocksChecked;
+	}
+	hfs_systemfile_unlock(vcb, lockflags);
+	return TRUE;
+}
+
diff --git a/core/FileIDsServices.c b/core/FileIDsServices.c
new file mode 100644
index 0000000..aba8940
--- /dev/null
+++ b/core/FileIDsServices.c
@@ -0,0 +1,794 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include "hfs_macos_defs.h"
+#include "hfs_format.h"
+
+#include "FileMgrInternal.h"
+#include "HFSUnicodeWrappers.h"
+#include "CatalogPrivate.h"
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <libkern/libkern.h>
+
+#include "hfs_dbg.h"
+
+struct ExtentsRecBuffer {
+	ExtentKey	extentKey;
+	ExtentRecord	extentData;
+};
+typedef struct ExtentsRecBuffer ExtentsRecBuffer;
+
+
+static u_int32_t CheckExtents( void *extents, u_int32_t blocks, Boolean isHFSPlus );
+static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileNumber, int quitEarly, u_int8_t forkType, Boolean isHFSPlus );
+static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, int quitEarly, u_int8_t forkType, Boolean isHFSPlus );
+
+#if CONFIG_HFS_STD
+static void  CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest );
+#endif
+
+static void  CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest );
+static void  CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, u_int16_t bufferCount );
+
+/* 
+ * This function moves the overflow extents associated with srcID into the file associated with dstID.
+ * We should have already verified that 'srcID' has overflow extents. So now we move all of the overflow
+ * extent records.
+ */
+OSErr MoveData( ExtendedVCB *vcb, HFSCatalogNodeID srcID, HFSCatalogNodeID destID, int rsrc) { 
+	
+	OSErr		err;
+	
+	/* 
+	 * Only the source file should have extents, so we just track those.
+	 * We operate on the fork represented by the open FD that was used to call into this
+	 * function
+	 */
+	if (rsrc) {		
+		/* Copy the extent overflow blocks. */
+		err = MoveExtents( vcb, srcID, destID, 1, (u_int8_t)0xff, 1);
+		if ( err != noErr ) {
+			if ( err != dskFulErr ) {
+				return( err );
+			}
+			/* 
+			 * In case of error, we would have probably run into problems
+			 * growing the extents b-tree.  Since the move is actually a copy + delete
+			 * just delete the new entries. Same for below.
+			 */
+			err = DeleteExtents( vcb, destID, 1, (u_int8_t)0xff, 1); 
+			ReturnIfError( err ); //	we are doomed. Just QUIT!
+			goto FlushAndReturn;
+		}
+	}
+	else {		
+		/* Copy the extent overflow blocks. */
+		err = MoveExtents( vcb, srcID, destID, 1, 0, 1);
+		if ( err != noErr ) {
+			if ( err != dskFulErr ) {
+				return( err );
+			}
+			err = DeleteExtents( vcb, destID, 1, 0, 1); 
+			ReturnIfError( err ); //	we are doomed. Just QUIT!
+			goto FlushAndReturn;
+		}
+	}
+	
+FlushAndReturn:
+	/* Write out the catalog and extent overflow B-Tree changes */
+	err = FlushCatalog( vcb );
+	err = FlushExtentFile( vcb );
+	
+	return( err );
+}
+
+
+OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param destName, HFSCatalogNodeID srcID, HFSCatalogNodeID destID, u_int32_t srcHint, u_int32_t destHint )
+{
+	CatalogKey	srcKey;		// 518 bytes
+	CatalogKey	destKey;	// 518 bytes
+	CatalogRecord	srcData;	// 520 bytes
+	CatalogRecord	destData;	// 520 bytes
+	CatalogRecord	swapData;	// 520 bytes
+	int16_t		numSrcExtentBlocks;
+	int16_t		numDestExtentBlocks;
+	OSErr		err;
+	Boolean		isHFSPlus = ( vcb->vcbSigWord == kHFSPlusSigWord );
+
+	err = BuildCatalogKeyUTF8(vcb, srcID, srcName, kUndefinedStrLen, &srcKey);
+	ReturnIfError(err);
+
+	err = BuildCatalogKeyUTF8(vcb, destID, destName, kUndefinedStrLen, &destKey);
+	ReturnIfError(err);
+
+	if ( isHFSPlus )
+	{
+		//--	Step 1: Check the catalog nodes for extents
+		
+		//--	locate the source file, test for extents in extent file, and copy the cat record for later
+		err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
+		ReturnIfError( err );
+	
+		if ( srcData.recordType != kHFSPlusFileRecord )
+			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
+			
+		//--	Check if there are any extents in the source file
+		//	I am only checling the extents in the low 32 bits, routine will fail if files extents after 2 gig are in overflow
+		numSrcExtentBlocks = CheckExtents( srcData.hfsPlusFile.dataFork.extents, srcData.hfsPlusFile.dataFork.totalBlocks, isHFSPlus );
+		if ( numSrcExtentBlocks == 0 )					//	then check the resource fork extents
+			numSrcExtentBlocks = CheckExtents( srcData.hfsPlusFile.resourceFork.extents, srcData.hfsPlusFile.resourceFork.totalBlocks, isHFSPlus );
+
+		//--	Check if there are any extents in the destination file
+		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
+		ReturnIfError( err );
+	
+		if ( destData.recordType != kHFSPlusFileRecord )
+			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
+
+		numDestExtentBlocks = CheckExtents( destData.hfsPlusFile.dataFork.extents, destData.hfsPlusFile.dataFork.totalBlocks, isHFSPlus );
+		if ( numDestExtentBlocks == 0 )					//	then check the resource fork extents
+			numDestExtentBlocks = CheckExtents( destData.hfsPlusFile.resourceFork.extents, destData.hfsPlusFile.resourceFork.totalBlocks, isHFSPlus );
+
+		//--	Step 2: Exchange the Extent key in the extent file
+		
+		//--	Exchange the extents key in the extent file
+		err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+		ReturnIfError( err );
+		
+		if ( numSrcExtentBlocks && numDestExtentBlocks )	//	if both files have extents
+		{
+			//--	Change the source extents file ids to our known bogus value
+			err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, kHFSBogusExtentFileID, 0,0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr ) {
+					return( err );
+                }
+				else {
+                    err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+                    ReturnIfError( err );					//	we are doomed. Just QUIT!
+                    
+                    err = FlushCatalog( vcb );   			//	flush the catalog
+                    err = FlushExtentFile( vcb );			//	flush the extent file (unneeded for common case, but it's cheap)
+                    return( dskFulErr );
+                }
+			}
+			
+			//--	Change the destination extents file id's to the source id's
+			err = MoveExtents( vcb, destData.hfsPlusFile.fileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+ExUndo2aPlus:	err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+                err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+					
+                err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+                ReturnIfError( err );					//	we are doomed. Just QUIT!
+                    
+                err = FlushCatalog( vcb );   			//	flush the catalog
+                err = FlushExtentFile( vcb );			//	flush the extent file (unneeded for common case, but it's cheap)
+                return( dskFulErr );
+
+			}
+			
+			//--	Change the bogus extents file id's to the dest id's
+            err = MoveExtents( vcb, kHFSBogusExtentFileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+				err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+					
+				goto ExUndo2aPlus;
+			}
+			
+		}
+		else if ( numSrcExtentBlocks )	//	just the source file has extents
+		{
+			err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+				err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				goto FlushAndReturn;
+			}
+		}
+		else if ( numDestExtentBlocks )	//	just the destination file has extents
+		{
+			err = MoveExtents( vcb, destData.hfsPlusFile.fileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+				err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				goto FlushAndReturn;
+			}
+		}
+
+		//--	Step 3: Change the data in the catalog nodes
+		
+		//--	find the source cnode and put dest info in it
+		err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
+		if ( err != noErr )
+			return( cmBadNews );
+		
+		BlockMoveData( &srcData, &swapData, sizeof(CatalogRecord) );
+		CopyBigCatalogNodeInfo( &destData, &srcData );
+		
+		err = ReplaceBTreeRecord( vcb->catalogRefNum, &srcKey, srcHint, &srcData, sizeof(HFSPlusCatalogFile), &srcHint );
+		ReturnIfError( err );
+
+		//	find the destination cnode and put source info in it		
+		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
+		if ( err != noErr )
+			return( cmBadNews );
+			
+		CopyBigCatalogNodeInfo( &swapData, &destData );
+		err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSPlusCatalogFile), &destHint );
+		ReturnIfError( err );
+	}
+#if CONFIG_HFS_STD
+	else		//	HFS	//
+	{
+		//--	Step 1: Check the catalog nodes for extents
+		
+		//--	locate the source file, test for extents in extent file, and copy the cat record for later
+		err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
+		ReturnIfError( err );
+	
+		if ( srcData.recordType != kHFSFileRecord )
+			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
+			
+		//--	Check if there are any extents in the source file
+		numSrcExtentBlocks = CheckExtents( srcData.hfsFile.dataExtents, srcData.hfsFile.dataPhysicalSize / vcb->blockSize, isHFSPlus );
+		if ( numSrcExtentBlocks == 0 )					//	then check the resource fork extents
+			numSrcExtentBlocks = CheckExtents( srcData.hfsFile.rsrcExtents, srcData.hfsFile.rsrcPhysicalSize / vcb->blockSize, isHFSPlus );
+		
+		
+		//	Do we save the found source node for later use?
+		
+				
+		//--	Check if there are any extents in the destination file
+		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
+		ReturnIfError( err );
+	
+		if ( destData.recordType != kHFSFileRecord )
+			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
+
+		numDestExtentBlocks = CheckExtents( destData.hfsFile.dataExtents, destData.hfsFile.dataPhysicalSize / vcb->blockSize, isHFSPlus );
+		if ( numDestExtentBlocks == 0 )					//	then check the resource fork extents
+			numDestExtentBlocks = CheckExtents( destData.hfsFile.rsrcExtents, destData.hfsFile.rsrcPhysicalSize / vcb->blockSize, isHFSPlus );
+			
+		//	Do we save the found destination node for later use?
+
+
+		//--	Step 2: Exchange the Extent key in the extent file
+		
+		//--	Exchange the extents key in the extent file
+        err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+		ReturnIfError( err );
+		
+		if ( numSrcExtentBlocks && numDestExtentBlocks )	//	if both files have extents
+		{
+			//--	Change the source extents file ids to our known bogus value
+        err = MoveExtents( vcb, srcData.hfsFile.fileID, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+ExUndo1a:		err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				err = FlushCatalog( vcb );   			//	flush the catalog
+				err = FlushExtentFile( vcb );			//	flush the extent file (unneeded for common case, but it's cheap)			
+				return( dskFulErr );
+			}
+			
+			//--	Change the destination extents file id's to the source id's
+			err = MoveExtents( vcb, destData.hfsFile.fileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+ExUndo2a:		err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+                err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+					
+				goto ExUndo1a;
+			}
+			
+			//--	Change the bogus extents file id's to the dest id's
+            err = MoveExtents( vcb, kHFSBogusExtentFileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+				err = DeleteExtents( vcb, destData.hfsFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+					
+				goto ExUndo2a;
+			}
+			
+		}
+		else if ( numSrcExtentBlocks )	//	just the source file has extents
+		{
+			err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+				err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				goto FlushAndReturn;
+			}
+		}
+		else if ( numDestExtentBlocks )	//	just the destination file has extents
+		{
+			err = MoveExtents( vcb, destData.hfsFile.fileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
+			if ( err != noErr )
+			{
+				if ( err != dskFulErr )
+					return( err );
+
+				err = DeleteExtents( vcb, destData.hfsFile.fileID, 0, 0, isHFSPlus );
+				ReturnIfError( err );					//	we are doomed. Just QUIT!
+
+				goto FlushAndReturn;
+			}
+		}
+
+		//--	Step 3: Change the data in the catalog nodes
+		
+		//--	find the source cnode and put dest info in it
+		err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
+		if ( err != noErr )
+			return( cmBadNews );
+		
+		BlockMoveData( &srcData, &swapData, sizeof(CatalogRecord) );
+		//	Asm source copies from the saved dest catalog node
+		CopyCatalogNodeInfo( &destData, &srcData );
+		
+		err = ReplaceBTreeRecord( vcb->catalogRefNum, &srcKey, srcHint, &srcData, sizeof(HFSCatalogFile), &srcHint );
+		ReturnIfError( err );
+
+		
+		//	find the destination cnode and put source info in it		
+		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
+		if ( err != noErr )
+			return( cmBadNews );
+			
+		CopyCatalogNodeInfo( &swapData, &destData );
+		err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSCatalogFile), &destHint );
+		ReturnIfError( err );
+	}
+#endif
+
+	err = noErr;
+
+	//--	Step 4: Error Handling section
+
+
+FlushAndReturn:
+	err = FlushCatalog( vcb );   			//	flush the catalog
+	err = FlushExtentFile( vcb );			//	flush the extent file (unneeded for common case, but it's cheap)			
+	return( err );
+}
+
+
+#if CONFIG_HFS_STD
+static void  CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest )
+{
+	dest->hfsFile.dataLogicalSize	= src->hfsFile.dataLogicalSize;
+	dest->hfsFile.dataPhysicalSize = src->hfsFile.dataPhysicalSize;
+	dest->hfsFile.rsrcLogicalSize	= src->hfsFile.rsrcLogicalSize;
+	dest->hfsFile.rsrcPhysicalSize = src->hfsFile.rsrcPhysicalSize;
+	dest->hfsFile.modifyDate = src->hfsFile.modifyDate;
+	BlockMoveData( src->hfsFile.dataExtents, dest->hfsFile.dataExtents, sizeof(HFSExtentRecord) );
+	BlockMoveData( src->hfsFile.rsrcExtents, dest->hfsFile.rsrcExtents, sizeof(HFSExtentRecord) );
+}
+#endif
+
+static void  CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest )
+{
+	BlockMoveData( &src->hfsPlusFile.dataFork, &dest->hfsPlusFile.dataFork, sizeof(HFSPlusForkData) );
+	BlockMoveData( &src->hfsPlusFile.resourceFork, &dest->hfsPlusFile.resourceFork, sizeof(HFSPlusForkData) );
+	dest->hfsPlusFile.contentModDate = src->hfsPlusFile.contentModDate;
+}
+
+
+static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, int quitEarly, u_int8_t forkType, Boolean isHFSPlus )
+{
+	FCB *				fcb;
+	ExtentsRecBuffer	extentsBuffer[kNumExtentsToCache];
+	ExtentKey *			extentKeyPtr;
+	ExtentRecord		extentData;
+	struct BTreeIterator *btIterator = NULL;
+	struct BTreeIterator *tmpIterator = NULL;
+	FSBufferDescriptor	btRecord;
+	u_int16_t			btKeySize;
+	u_int16_t			btRecordSize;
+	int16_t				i, j;
+	OSErr				err;
+	
+	btIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+	tmpIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	fcb = GetFileControlBlock(vcb->extentsRefNum);
+	
+	(void) BTInvalidateHint(btIterator);
+	extentKeyPtr = (ExtentKey*) &btIterator->key;
+	btRecord.bufferAddress = &extentData;
+	btRecord.itemCount = 1;
+
+	//--	Collect the extent records
+
+	//
+	//	A search on the following key will cause the BTree to be positioned immediately
+	//	before the first extent record for file #srcFileID, but not actually positioned
+	//	on any record.  This is because there cannot be an extent record with FABN = 0
+	//	(the first extent of the fork, which would be in the catalog entry, not an extent
+	//	record).
+	//
+	//	Using BTIterateRecord with kBTreeNextRecord will then get that first extent record.
+	//
+	if (isHFSPlus) {
+		btRecord.itemSize = sizeof(HFSPlusExtentRecord);
+		btKeySize = sizeof(HFSPlusExtentKey);
+
+		extentKeyPtr->hfsPlus.keyLength	 = kHFSPlusExtentKeyMaximumLength;
+		extentKeyPtr->hfsPlus.forkType	 = forkType;
+		extentKeyPtr->hfsPlus.pad		 = 0;
+		extentKeyPtr->hfsPlus.fileID	 = srcFileID;
+		extentKeyPtr->hfsPlus.startBlock = 0;
+	}
+#if CONFIG_HFS_STD
+	else {
+		btRecord.itemSize = sizeof(HFSExtentRecord);
+		btKeySize = sizeof(HFSExtentKey);
+
+		extentKeyPtr->hfs.keyLength	 = kHFSExtentKeyMaximumLength;
+		extentKeyPtr->hfs.forkType	 = 0;
+		extentKeyPtr->hfs.fileID	 = srcFileID;
+		extentKeyPtr->hfs.startBlock = 0;
+	}
+#else
+    else {
+		hfs_free(tmpIterator, sizeof(*tmpIterator));
+		hfs_free(btIterator, sizeof(*btIterator));
+		return cmBadNews;
+    }
+#endif
+	
+	//
+	//	We do an initial BTSearchRecord to position the BTree's iterator just before any extent
+	//	records for srcFileID.  We then do a few BTIterateRecord and BTInsertRecord of those found
+	//	records, but with destFileID as the file number in the key.  Keep doing this sequence of
+	//	BTIterateRecord and BTInsertRecord until we find an extent for another file, or there are
+	//	no more extent records in the tree.
+	//
+	//	Basically, we're copying records kNumExtentsToCache at a time.  The copies have their file ID
+	//	set to destFileID.
+	//
+	//	This depends on BTInsertRecord not effecting the iterator used by BTIterateRecord.  If it
+	//	_did_ effect the iterator, then we would need to do a BTSearchRecord before each series
+	//	of BTIterateRecord.  We'd need to set up the key for BTSearchRecord to find the last record
+	//	we found, so that BTIterateRecord would get the next one (the first we haven't processed).
+	//
+
+	err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
+	
+	//	We expect a btNotFound here, since there shouldn't be an extent record with FABN = 0.
+	if (err != btNotFound)
+	{
+		hfs_debug("hfs: unexpected error from SearchBTreeRecord\n");
+		
+		if (err == noErr)			//	If we found such a bogus extent record, then the tree is really messed up
+			err = cmBadNews;		//	so return an error that conveys the disk is hosed.
+		
+		hfs_free(tmpIterator, sizeof(*tmpIterator));
+		hfs_free(btIterator, sizeof(*btIterator));
+		return err;
+	}
+
+	do
+	{
+		btRecord.bufferAddress = &extentData;
+		btRecord.itemCount = 1;
+
+		for ( i=0 ; i<kNumExtentsToCache ; i++ )
+		{
+			HFSCatalogNodeID	foundFileID = 0;
+			
+			err = BTIterateRecord(fcb, kBTreeNextRecord, btIterator, &btRecord, &btRecordSize);
+			if ( err == btNotFound )		//	Did we run out of extent records in the extents tree?
+				break;						//	if xkrFNum(A0) is cleared on this error, then this test is bogus!
+			else if ( err != noErr ) {
+				hfs_free(btIterator, sizeof(*btIterator));
+				hfs_free(tmpIterator, sizeof(*tmpIterator));
+				return( err );				//	must be ioError
+			}
+            if (isHFSPlus) {
+                foundFileID = extentKeyPtr->hfsPlus.fileID;
+            }
+#if CONFIG_HFS_STD
+            else {
+                foundFileID = extentKeyPtr->hfs.fileID;
+            }
+#endif
+			if ( foundFileID == srcFileID ) {
+				/* Check if we need to quit early. */
+				if (quitEarly && isHFSPlus) {
+					if (extentKeyPtr->hfsPlus.forkType != forkType) {
+						break;
+					}
+				}
+				CopyExtentInfo(extentKeyPtr, &extentData, extentsBuffer, i);
+			}
+			else{
+				/* The fileID's are of a different file.  We're done here. */
+				break;
+			}
+		}
+		
+		
+		
+		//--	edit each extent key, and reinsert each extent record in the extent file
+		if (isHFSPlus)
+			btRecordSize = sizeof(HFSPlusExtentRecord);
+#if CONFIG_HFS_STD
+		else
+			btRecordSize = sizeof(HFSExtentRecord);
+#endif
+        
+		for ( j=0 ; j<i ; j++ )
+		{
+
+			if (isHFSPlus)
+				extentsBuffer[j].extentKey.hfsPlus.fileID = destFileID;	//	change only the id in the key to dest ID
+#if CONFIG_HFS_STD
+			else
+				extentsBuffer[j].extentKey.hfs.fileID = destFileID;	//	change only the id in the key to dest ID
+#endif
+            
+			// get iterator and buffer descriptor ready...
+			(void) BTInvalidateHint(tmpIterator);
+			BlockMoveData(&(extentsBuffer[j].extentKey), &tmpIterator->key, btKeySize);
+			btRecord.bufferAddress = &(extentsBuffer[j].extentData);
+
+			err = BTInsertRecord(fcb, tmpIterator, &btRecord, btRecordSize);
+			if ( err != noErr ) {								
+				/* Parse the error and free iterators */
+				hfs_free(btIterator, sizeof(*btIterator));
+				hfs_free(tmpIterator, sizeof(*tmpIterator));
+				if ( err == btExists )
+				{
+					hfs_debug("hfs: can't insert record -- already exists\n");
+					return( cmBadNews );
+				}
+				else {
+					return( err );
+				}			
+			}
+		}
+
+		//--	okay, done with this buffered batch, go get the next set of extent records
+		//	If our buffer is not full, we must be done, or recieved an error
+		
+		if ( i != kNumExtentsToCache )			//	if the buffer is not full, we must be done
+		{
+			err = DeleteExtents( vcb, srcFileID, quitEarly, forkType, isHFSPlus );	//	Now delete all the extent entries with the sourceID
+			if (err != noErr )
+				hfs_debug("hfs: error from DeleteExtents (%d)\n", err);
+			break;									//	we're done!
+		}
+	} while ( true );
+	
+	hfs_free(tmpIterator, sizeof(*tmpIterator));
+	hfs_free(btIterator, sizeof(*btIterator));
+
+	return( err );
+}
+
+
+static void  CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, u_int16_t bufferCount )
+{
+	BlockMoveData( key, &(buffer[bufferCount].extentKey), sizeof( ExtentKey ) );
+	BlockMoveData( data, &(buffer[bufferCount].extentData), sizeof( ExtentRecord ) );
+}
+
+
+//--	Delete all extents in extent file that have the ID given.
+static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,  u_int8_t forkType, Boolean isHFSPlus )
+{
+	FCB *				fcb;
+	ExtentKey *			extentKeyPtr;
+	ExtentRecord		extentData;
+	struct BTreeIterator *btIterator = NULL;
+	struct BTreeIterator *tmpIterator = NULL;
+	FSBufferDescriptor	btRecord;
+	u_int16_t			btRecordSize;
+	OSErr				err;
+
+	btIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+	tmpIterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	fcb = GetFileControlBlock(vcb->extentsRefNum);
+
+	(void) BTInvalidateHint(btIterator);
+	extentKeyPtr = (ExtentKey*) &btIterator->key;
+	btRecord.bufferAddress = &extentData;
+	btRecord.itemCount = 1;
+
+	//	The algorithm is to position the BTree just before any extent records for fileID.
+	//	Then just keep getting successive records.  If the record is still for fileID,
+	//	then delete it.
+	
+	if (isHFSPlus) {
+		btRecord.itemSize = sizeof(HFSPlusExtentRecord);
+
+		extentKeyPtr->hfsPlus.keyLength	 = kHFSPlusExtentKeyMaximumLength;
+		extentKeyPtr->hfsPlus.forkType	 = forkType;
+		extentKeyPtr->hfsPlus.pad		 = 0;
+		extentKeyPtr->hfsPlus.fileID	 = fileID;
+		extentKeyPtr->hfsPlus.startBlock = 0;
+	}
+#if CONFIG_HFS_STD
+	else {
+		btRecord.itemSize = sizeof(HFSExtentRecord);
+
+		extentKeyPtr->hfs.keyLength	 = kHFSExtentKeyMaximumLength;
+		extentKeyPtr->hfs.forkType	 = forkType;
+		extentKeyPtr->hfs.fileID	 = fileID;
+		extentKeyPtr->hfs.startBlock = 0;
+	}
+#else 
+	else {
+		err = cmBadNews;
+		goto exit;
+	}
+#endif
+
+	err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator);
+	if ( err != btNotFound )
+	{
+		if (err == noErr) {		//	Did we find a bogus extent record?
+			err = cmBadNews;	//	Yes, so indicate things are messed up.
+		}
+
+		goto exit;
+	}
+
+	do
+	{
+		HFSCatalogNodeID	foundFileID = 0;
+
+		err = BTIterateRecord(fcb, kBTreeNextRecord, btIterator, &btRecord, &btRecordSize);
+		if ( err != noErr )
+		{
+			if (err == btNotFound)	//	If we hit the end of the BTree
+				err = noErr;		//		then it's OK
+				
+			break;					//	We're done now.
+		}
+        if (isHFSPlus) {
+            foundFileID = extentKeyPtr->hfsPlus.fileID;
+        }
+#if CONFIG_HFS_STD
+        else {
+            foundFileID = extentKeyPtr->hfs.fileID;
+        }
+#endif
+        
+		if ( foundFileID != fileID ) {
+			break;					//	numbers don't match, we must be done
+		}
+		if (quitEarly && isHFSPlus) {
+			/* If we're only deleting one type of fork, then quit early if it doesn't match */
+			if (extentKeyPtr->hfsPlus.forkType != forkType) {
+				break;
+			}
+		}
+		
+		*tmpIterator = *btIterator;
+		err = BTDeleteRecord( fcb, tmpIterator );
+		if (err != noErr)
+			break;
+	}	while ( true );
+
+exit:
+	
+	hfs_free(tmpIterator, sizeof(*tmpIterator));
+	hfs_free(btIterator, sizeof(*btIterator));
+
+	return( err );
+}
+
+
+//	Check if there are extents represented in the extents overflow file.
+static u_int32_t  CheckExtents( void *extents, u_int32_t totalBlocks, Boolean isHFSPlus )
+{
+	u_int32_t		extentAllocationBlocks;
+	u_int16_t		i;
+
+
+	if ( totalBlocks == 0 )
+		return( 0 );
+		
+	extentAllocationBlocks = 0;
+	
+	if ( isHFSPlus )
+	{
+		for ( i = 0 ; i < kHFSPlusExtentDensity ; i++ )
+		{
+			extentAllocationBlocks += ((HFSPlusExtentDescriptor *)extents)[i].blockCount;
+			if ( extentAllocationBlocks >= totalBlocks )		//	greater than or equal (extents can add past eof if 'Close" crashes w/o truncating new clump)
+				return( 0 );
+		}
+	}
+#if CONFIG_HFS_STD
+	else
+	{
+		for ( i = 0 ; i < kHFSExtentDensity ; i++ )
+		{
+			extentAllocationBlocks += ((HFSExtentDescriptor *)extents)[i].blockCount;
+			if ( extentAllocationBlocks >= totalBlocks )		//	greater than or equal (extents can add past eof if 'Close" crashes w/o truncating new clump)
+				return( 0 );
+		}
+	}
+#endif
+	
+	return( extentAllocationBlocks );
+}
diff --git a/core/FileMgrInternal.h b/core/FileMgrInternal.h
new file mode 100644
index 0000000..b54daf3
--- /dev/null
+++ b/core/FileMgrInternal.h
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		FilesInternal.h
+
+	Contains:	IPI for File Manager (HFS Plus)
+
+	Version:	HFS Plus 1.0
+
+	Copyright:	(c) 1996-2001 by Apple Inc., all rights reserved.
+
+*/
+#ifndef __FILEMGRINTERNAL__
+#define __FILEMGRINTERNAL__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#include <sys/param.h>
+#include <sys/vnode.h>
+
+#if !HFS_ALLOC_TEST
+
+#include "hfs.h"
+#include "hfs_macos_defs.h"
+#include "hfs_format.h"
+#include "hfs_cnode.h"
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* CatalogNodeID is used to track catalog objects */
+typedef u_int32_t		HFSCatalogNodeID;
+
+/* internal error codes*/
+
+#if TARGET_API_MACOS_X
+  #define ERR_BASE	-32767
+#else
+  #define ERR_BASE	0
+#endif
+
+enum {
+																/* FXM errors*/
+	fxRangeErr					= ERR_BASE + 16,				/* file position beyond mapped range*/
+	fxOvFlErr					= ERR_BASE + 17,				/* extents file overflow*/
+																/* Unicode errors*/
+	uniTooLongErr				= ERR_BASE + 24,				/* Unicode string too long to convert to Str31*/
+	uniBufferTooSmallErr		= ERR_BASE + 25,				/* Unicode output buffer too small*/
+	uniNotMappableErr			= ERR_BASE + 26,				/* Unicode string can't be mapped to given script*/
+																/* BTree Manager errors*/
+	btNotFound					= ERR_BASE + 32,				/* record not found*/
+	btExists					= ERR_BASE + 33,				/* record already exists*/
+	btNoSpaceAvail				= ERR_BASE + 34,				/* no available space*/
+	btNoFit						= ERR_BASE + 35,				/* record doesn't fit in node */
+	btBadNode					= ERR_BASE + 36,				/* bad node detected*/
+	btBadHdr					= ERR_BASE + 37,				/* bad BTree header record detected*/
+	dsBadRotate					= ERR_BASE + 64,				/* bad BTree rotate*/
+																/* Catalog Manager errors*/
+	cmNotFound					= ERR_BASE + 48,				/* CNode not found*/
+	cmExists					= ERR_BASE + 49,				/* CNode already exists*/
+	cmNotEmpty					= ERR_BASE + 50,				/* directory CNode not empty (valence = 0)*/
+	cmRootCN					= ERR_BASE + 51,				/* invalid reference to root CNode*/
+	cmBadNews					= ERR_BASE + 52,				/* detected bad catalog structure*/
+	cmFThdDirErr				= ERR_BASE + 53,				/* thread belongs to a directory not a file*/
+	cmFThdGone					= ERR_BASE + 54,				/* file thread doesn't exist*/
+	cmParentNotFound			= ERR_BASE + 55,				/* CNode for parent ID does not exist*/
+																/* TFS internal errors*/
+	fsDSIntErr					= -127							/* Internal file system error*/
+};
+
+
+/* internal flags*/
+
+enum {
+	kEFAllMask      = 0x01,   /* allocate all requested bytes or none */
+	kEFContigMask   = 0x02,   /* force contiguous allocation */
+	kEFReserveMask  = 0x04,   /* keep block reserve */
+	kEFDeferMask    = 0x08,   /* defer file block allocations */
+	kEFNoClumpMask  = 0x10,   /* don't round up to clump size */
+	kEFMetadataMask  = 0x20,  /* metadata allocation */
+
+	kTFTrunExtBit				= 0,							/*	truncate to the extent containing new PEOF*/
+	kTFTrunExtMask				= 1
+};
+
+enum {
+	kUndefinedStrLen			= 0,							/* Unknown string length */
+	kNoHint						= 0,
+
+																/*	FileIDs variables*/
+	kNumExtentsToCache			= 4								/*	just guessing for ExchangeFiles*/
+};
+
+
+/* Universal Extent Key */
+
+union ExtentKey {
+	HFSExtentKey 					hfs;
+	HFSPlusExtentKey 				hfsPlus;
+};
+typedef union ExtentKey					ExtentKey;
+/* Universal extent descriptor */
+
+union ExtentDescriptor {
+	HFSExtentDescriptor 			hfs;
+	HFSPlusExtentDescriptor 		hfsPlus;
+};
+typedef union ExtentDescriptor			ExtentDescriptor;
+/* Universal extent record */
+
+union ExtentRecord {
+	HFSExtentRecord 				hfs;
+	HFSPlusExtentRecord 			hfsPlus;
+};
+typedef union ExtentRecord				ExtentRecord;
+
+
+enum {
+	CMMaxCName					= kHFSMaxFileNameChars
+};
+
+
+
+/* Universal catalog name*/
+
+union CatalogName {
+	Str31 							pstr;
+	HFSUniStr255 					ustr;
+};
+typedef union CatalogName CatalogName;
+
+
+/*
+ * MacOS accessor routines
+ */
+#define GetFileControlBlock(fref)		VTOF((fref))
+#define GetFileRefNumFromFCB(fcb)		FTOV((fcb))
+
+/*	Test for error and return if error occurred*/
+EXTERN_API_C( void )
+ReturnIfError					(OSErr 					result);
+
+#define	ReturnIfError(result)				do {	if ( (result) != noErr ) return (result); } while(0)
+
+/*	Exit function on error*/
+EXTERN_API_C( void )
+ExitOnError						(OSErr 					result);
+
+#define	ExitOnError( result )				do {	if ( ( result ) != noErr )	goto ErrorExit; } while(0)
+
+
+
+/* Catalog Manager Routines (IPI)*/
+
+EXTERN_API_C( OSErr )
+ExchangeFileIDs					(ExtendedVCB *			volume,
+								 ConstUTF8Param			srcName,
+								 ConstUTF8Param			destName,
+								 HFSCatalogNodeID		srcID,
+								 HFSCatalogNodeID		destID,
+								 u_int32_t				srcHint,
+								 u_int32_t				destHint );
+
+EXTERN_API_C( OSErr )
+MoveData( ExtendedVCB *vcb, HFSCatalogNodeID srcID, HFSCatalogNodeID destID, int rsrc);
+
+/* BTree Manager Routines*/
+
+typedef CALLBACK_API_C( int32_t , KeyCompareProcPtr )(void *a, void *b);
+
+
+EXTERN_API_C( OSErr )
+ReplaceBTreeRecord				(FileReference 				refNum,
+								 const void *			key,
+								 u_int32_t 				hint,
+								 void *					newData,
+								 u_int16_t 				dataSize,
+								 u_int32_t *			newHint);
+
+
+/*	Prototypes for exported routines in VolumeAllocation.c*/
+
+/* 
+ * Flags for BlockAllocate(), BlockDeallocate() and hfs_block_alloc.
+ * Some of these are for internal use only.  See the comment at the
+ * top of hfs_alloc_int for more details on the semantics of these
+ * flags.
+ */ 
+#define HFS_ALLOC_FORCECONTIG		0x001	//force contiguous block allocation; minblocks must be allocated
+#define HFS_ALLOC_METAZONE			0x002	//can use metazone blocks
+#define HFS_ALLOC_SKIPFREEBLKS		0x004	//skip checking/updating freeblocks during alloc/dealloc
+#define HFS_ALLOC_FLUSHTXN			0x008	//pick best fit for allocation, even if a jnl flush is req'd
+#define HFS_ALLOC_TENTATIVE			0x010	//reserved allocation that can be claimed back
+#define HFS_ALLOC_LOCKED			0x020	//reserved allocation that can't be claimed back
+#define HFS_ALLOC_IGNORE_TENTATIVE	0x040	//Steal tentative blocks if necessary
+#define HFS_ALLOC_IGNORE_RESERVED	0x080	//Ignore tentative/committed blocks
+#define HFS_ALLOC_USE_TENTATIVE		0x100	//Use the supplied tentative range (if possible)
+#define HFS_ALLOC_COMMIT			0x200	//Commit the supplied extent to disk
+#define HFS_ALLOC_TRY_HARD			0x400	//Search hard to try and get maxBlocks; implies HFS_ALLOC_FLUSHTXN
+#define HFS_ALLOC_ROLL_BACK			0x800	//Reallocate blocks that were just deallocated
+#define HFS_ALLOC_FAST_DEV          0x1000  //Prefer fast device for allocation
+
+typedef uint32_t hfs_block_alloc_flags_t;
+
+struct rl_entry;
+EXTERN_API_C( OSErr )
+BlockAllocate					(ExtendedVCB *			 vcb,
+								 u_int32_t 				 startingBlock,
+								 u_int32_t 				 minBlocks,
+								 u_int32_t 				 maxBlocks,
+								 hfs_block_alloc_flags_t flags,
+								 u_int32_t *			 startBlock,
+								 u_int32_t *			 actualBlocks);
+
+typedef struct hfs_alloc_extra_args {
+	// Used with HFS_ALLOC_TRY_HARD and HFS_ALLOC_FORCECONTIG
+	uint32_t				max_blocks;
+
+	// Used with with HFS_ALLOC_USE_TENTATIVE & HFS_ALLOC_COMMIT
+	struct rl_entry		  **reservation_in;
+
+	// Used with HFS_ALLOC_TENTATIVE & HFS_ALLOC_LOCKED
+	struct rl_entry		  **reservation_out;
+
+	/*
+	 * If the maximum cannot be returned, the allocation will be
+	 * trimmed to the specified alignment after taking
+	 * @alignment_offset into account.  @alignment and
+	 * @alignment_offset are both in terms of blocks, *not* bytes.
+	 * The result will be such that:
+	 *
+	 *   (block_count + @alignment_offset) % @alignment == 0
+	 *
+	 * Alignment is *not* guaranteed.
+	 *
+	 * One example where alignment might be useful is in the case
+	 * where the page size is greater than the allocation block size
+	 * and I/O is being performed in multiples of the page size.
+	 */
+	int						alignment;
+	int						alignment_offset;
+} hfs_alloc_extra_args_t;
+
+/*
+ * Same as BlockAllocate but slightly different API.
+ * @extent.startBlock is a hint for where to start searching and
+ * @extent.blockCount is the minimum number of blocks acceptable.
+ * Additional arguments can be passed in @extra_args and use will
+ * depend on @flags.  See comment at top of hfs_block_alloc_int for
+ * more information.
+ */
+errno_t hfs_block_alloc(hfsmount_t *hfsmp,
+						HFSPlusExtentDescriptor *extent,
+						hfs_block_alloc_flags_t flags,
+						hfs_alloc_extra_args_t *extra_args);
+
+EXTERN_API_C( OSErr )
+BlockDeallocate					(ExtendedVCB *			 vcb,
+								 u_int32_t 				 firstBlock,
+								 u_int32_t 				 numBlocks,
+								 hfs_block_alloc_flags_t flags);
+
+EXTERN_API_C ( void )
+ResetVCBFreeExtCache(struct hfsmount *hfsmp);
+
+EXTERN_API_C( OSErr )
+BlockMarkAllocated(ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlocks);
+
+EXTERN_API_C( OSErr )
+BlockMarkFree( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlocks);
+
+EXTERN_API_C( OSErr )
+BlockMarkFreeUnused( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlocks);
+
+EXTERN_API_C( u_int32_t )
+MetaZoneFreeBlocks(ExtendedVCB *vcb);
+	
+EXTERN_API_C( u_int32_t )
+UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block);
+
+EXTERN_API_C( u_int32_t )
+ScanUnmapBlocks(struct hfsmount *hfsmp);
+
+EXTERN_API_C( int )
+hfs_init_summary (struct hfsmount *hfsmp);
+
+errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
+							  void (*callback)(void *data, off_t), void *callback_arg);
+
+void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation);
+void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation);
+
+/*	File Extent Mapping routines*/
+EXTERN_API_C( OSErr )
+FlushExtentFile					(ExtendedVCB *			vcb);
+
+#if CONFIG_HFS_STD
+EXTERN_API_C( int32_t )
+CompareExtentKeys				(const HFSExtentKey *	searchKey,
+								 const HFSExtentKey *	trialKey);
+#endif
+
+EXTERN_API_C( int32_t )
+CompareExtentKeysPlus			(const HFSPlusExtentKey *searchKey,
+								 const HFSPlusExtentKey *trialKey);
+
+OSErr SearchExtentFile(ExtendedVCB			*vcb,
+					   const FCB	 		*fcb,
+					   int64_t 				 filePosition,
+					   HFSPlusExtentKey		*foundExtentKey,
+					   HFSPlusExtentRecord	 foundExtentData,
+					   u_int32_t			*foundExtentDataIndex,
+					   u_int32_t			*extentBTreeHint,
+					   u_int32_t			*endingFABNPlusOne );
+
+EXTERN_API_C( OSErr )
+TruncateFileC (ExtendedVCB *vcb, FCB *fcb, int64_t peof, int deleted, 
+			   int rsrc, uint32_t fileid, Boolean truncateToExtent);
+	
+EXTERN_API_C( OSErr )
+ExtendFileC						(ExtendedVCB *			vcb,
+								 FCB *					fcb,
+								 int64_t 				bytesToAdd,
+								 u_int32_t 				blockHint,
+								 u_int32_t 				flags,
+								 int64_t *				actualBytesAdded);
+
+EXTERN_API_C( OSErr )
+MapFileBlockC					(ExtendedVCB *			vcb,
+								 FCB *					fcb,
+								 size_t 				numberOfBytes,
+								 off_t 					offset,
+								 daddr64_t *				startBlock,
+								 size_t *				availableBytes);
+
+OSErr HeadTruncateFile(ExtendedVCB  *vcb, FCB  *fcb, u_int32_t  headblks);
+
+EXTERN_API_C( int )
+AddFileExtent (ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockCount);
+
+#if TARGET_API_MACOS_X
+EXTERN_API_C( Boolean )
+NodesAreContiguous				(ExtendedVCB *			vcb,
+								 FCB *					fcb,
+								 u_int32_t				nodeSize);
+#endif
+
+/*	Get the current time in UTC (GMT)*/
+EXTERN_API_C( u_int32_t )
+GetTimeUTC						(void);
+
+EXTERN_API_C( u_int32_t )
+LocalToUTC						(u_int32_t 				localTime);
+
+EXTERN_API_C( u_int32_t )
+UTCToLocal						(u_int32_t 				utcTime);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* __FILEMGRINTERNAL__ */
+
diff --git a/core/HFSUnicodeWrappers.h b/core/HFSUnicodeWrappers.h
new file mode 100644
index 0000000..35a394b
--- /dev/null
+++ b/core/HFSUnicodeWrappers.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2000-2003, 2005-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		HFSUnicodeWrappers.h
+
+	Contains:	IPI to Unicode routines used by File Manager.
+
+	Version:	HFS Plus 1.0
+
+	Written by:	Mark Day
+
+	Copyright:	(c) 1996-1997 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				xxx put dri here xxx
+
+		Other Contact:		xxx put other contact here xxx
+
+		Technology:			xxx put technology here xxx
+
+	Writers:
+
+		(DSH)	Deric Horn
+		(msd)	Mark Day
+		(djb)	Don Brady
+
+	Change History (most recent first):
+
+	  <CS11>	11/16/97	djb		Change Unicode.h to UnicodeConverter.h.
+	  <CS10>	 11/7/97	msd		Remove prototype for CompareUnicodeNames(). Add prototype for
+									FastUnicodeCompare().
+	   <CS9>	10/13/97	djb		Add encoding/index macros and add prototypes for new Get/Set
+									encodding routines.
+	   <CS8>	 9/15/97	djb		InitUnicodeConverter now takes a boolean.
+	   <CS7>	 9/10/97	msd		Add prototype for InitializeEncodingContext.
+	   <CS6>	 6/26/97	DSH		Include  "MockConverter" prototype for DFA usage.
+	   <CS5>	 6/25/97	DSH		Removed Prototype definitions, and checked in Unicode.h and
+									TextCommon.h from Julio Gonzales into InternalInterfaces.
+	   <CS4>	 6/25/97	msd		Add prototypes for some new Unicode routines that haven't
+									appeared in MasterInterfaces yet.
+	   <CS3>	 6/18/97	djb		Add more ConversionContexts routines.
+	   <CS2>	 6/13/97	djb		Switched to ConvertUnicodeToHFSName, ConvertHFSNameToUnicode, &
+									CompareUnicodeNames.
+	   <CS1>	 4/28/97	djb		first checked in
+	  <HFS1>	12/12/96	msd		first checked in
+
+*/
+#ifndef _HFSUNICODEWRAPPERS_
+#define _HFSUNICODEWRAPPERS_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#include "hfs_macos_defs.h"
+#include "hfs_format.h"
+
+
+extern OSErr ConvertUnicodeToUTF8Mangled ( ByteCount srcLen,
+									ConstUniCharArrayPtr srcStr,
+									ByteCount maxDstLen,
+					 				ByteCount *actualDstLen,
+									unsigned char* dstStr ,
+									HFSCatalogNodeID cnid);
+
+/*
+	This routine compares two Unicode names based on an ordering defined by the HFS Plus B-tree.
+	This ordering must stay fixed for all time.
+	
+	Output:
+		-n		name1 < name2	(i.e. name 1 sorts before name 2)
+		 0		name1 = name2
+		+n		name1 > name2
+	
+	NOTE: You should not depend on the magnitude of the result, just its sign.  That is, when name1 < name2, then any
+	negative number may be returned.
+*/
+
+extern int32_t FastUnicodeCompare(register ConstUniCharArrayPtr str1, register ItemCount length1,
+								 register ConstUniCharArrayPtr str2, register ItemCount length2);
+
+extern int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount length1,
+								 register ConstUniCharArrayPtr str2, register ItemCount length2);
+
+extern int32_t FastRelString( ConstStr255Param str1, ConstStr255Param str2 );
+
+
+extern HFSCatalogNodeID GetEmbeddedFileID( ConstStr31Param filename, u_int32_t length, u_int32_t *prefixLength );
+extern u_int32_t CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length );
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* _HFSUNICODEWRAPPERS_ */
diff --git a/core/MacOSStubs.c b/core/MacOSStubs.c
new file mode 100644
index 0000000..abfd208
--- /dev/null
+++ b/core/MacOSStubs.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/types.h>
+#include <pexpert/pexpert.h>
+#include <IOKit/IOLib.h>
+
+#include "hfs.h"
+#include "hfs_dbg.h"
+#include "FileMgrInternal.h"
+
+/* 
+ * gTimeZone should only be used for HFS volumes!
+ * It is initialized when an HFS volume is mounted.
+ */
+struct timezone gTimeZone = {8*60,1};
+
+/*
+ * GetTimeUTC - get the GMT Mac OS time (in seconds since 1/1/1904)
+ *
+ * called by the Catalog Manager when creating/updating HFS Plus records
+ */
+u_int32_t GetTimeUTC(void)
+{
+	struct timeval tv;
+
+	microtime(&tv);
+
+	return (tv.tv_sec + MAC_GMT_FACTOR);
+}
+
+
+/*
+ * LocalToUTC - convert from Mac OS local time to Mac OS GMT time.
+ * This should only be called for HFS volumes (not for HFS Plus).
+ */
+u_int32_t LocalToUTC(u_int32_t localTime)
+{
+	u_int32_t gtime = localTime;
+	
+	if (gtime != 0) {
+		gtime += (gTimeZone.tz_minuteswest * 60);
+	/*
+	 * We no longer do DST adjustments here since we don't
+	 * know if time supplied needs adjustment!
+	 *
+	 * if (gTimeZone.tz_dsttime)
+	 *     gtime -= 3600;
+	 */
+	}
+    return (gtime);
+}
+
+/*
+ * UTCToLocal - convert from Mac OS GMT time to Mac OS local time.
+ * This should only be called for HFS volumes (not for HFS Plus).
+ */
+u_int32_t UTCToLocal(u_int32_t utcTime)
+{
+	u_int32_t ltime = utcTime;
+	
+	if (ltime != 0) {
+		ltime -= (gTimeZone.tz_minuteswest * 60);
+	/*
+	 * We no longer do DST adjustments here since we don't
+	 * know if time supplied needs adjustment!
+	 *
+	 * if (gTimeZone.tz_dsttime)
+	 *     ltime += 3600;
+	 */
+	}
+    return (ltime);
+}
+
+/*
+ * to_bsd_time - convert from Mac OS time (seconds since 1/1/1904)
+ *		 to BSD time (seconds since 1/1/1970)
+ */
+time_t to_bsd_time(u_int32_t hfs_time)
+{
+	u_int32_t gmt = hfs_time;
+
+	if (gmt > MAC_GMT_FACTOR)
+		gmt -= MAC_GMT_FACTOR;
+	else
+		gmt = 0;	/* don't let date go negative! */
+
+	return (time_t)gmt;
+}
+
+/*
+ * to_hfs_time - convert from BSD time (seconds since 1/1/1970)
+ *		 to Mac OS time (seconds since 1/1/1904)
+ */
+u_int32_t to_hfs_time(time_t bsd_time)
+{
+	u_int32_t hfs_time = (u_int32_t)bsd_time;
+
+	/* don't adjust zero - treat as uninitialzed */
+	if (hfs_time != 0)
+		hfs_time += MAC_GMT_FACTOR;
+
+	return (hfs_time);
+}
+
+void
+DebugStr(
+	const char * debuggerMsg
+	)
+{
+    kprintf ("*** Mac OS Debugging Message: %s\n", debuggerMsg);
+#if DEBUG
+	Debugger(debuggerMsg);
+#endif
+}
diff --git a/core/UCStringCompareData.h b/core/UCStringCompareData.h
new file mode 100644
index 0000000..7322837
--- /dev/null
+++ b/core/UCStringCompareData.h
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2000-2002, 2005 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		UCStringCompareData.h
+
+	Contains:	xxx put contents here xxx
+
+	Version:	HFS Plus 1.0
+
+	Copyright:	(c) 1997-1999 by Apple Inc., all rights reserved.
+
+	File Ownership:
+
+		DRI:				Mark Day
+
+		Other Contact:		xxx put other contact here xxx
+
+		Technology:			xxx put technology here xxx
+
+	Writers:
+
+		(djb)	Don Brady
+		(msd)	Mark Day
+
+	Change History (most recent first):
+
+	   <CS4>	11/16/97	djb		msd. Updated lower case table with ignorable mappings and less
+									aggressive case folding. Added a trailing comma to make the
+									StreamEdit script work right. Removed Unicode decomposition
+									tables. Case folding tables convert u+0000 to 0xFFFF so that the
+									NUL character can appear in names, while still allowing a zero
+									value to be a sentinel. (From Andy Daniels, 11/10/97)
+	   <CS3>	 8/26/97	djb		Tweak gLowerCaseTable to make it faster.
+	   <CS2>	 8/14/97	djb		Add RelString compare table...
+	   <CS1>	 4/24/97	djb		first checked in
+	  <HFS1>	 2/27/97	msd		first checked in
+*/
+
+#ifndef _UCSTRINGCOMPAREDATA_
+#define _UCSTRINGCOMPAREDATA_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+/*
+ * For better performance, the case folding table for basic latin
+ * is seperate from the others.  This eliminates the extra lookup
+ * to get the offset to this table.
+ *
+ * Note: 0x0000 now maps to 0 so that it will be ignored
+ */
+u_int16_t gLatinCaseFold[] = {
+	/* 0 */	0xFFFF, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+	/* 1 */	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+	/* 2 */	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+	/* 3 */	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+	/* 4 */	0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+	/* 5 */	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+	/* 6 */	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+	/* 7 */	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+	/* 8 */	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+	/* 9 */	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+	/* A */	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+	/* B */	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+	/* C */	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00E6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+	/* D */	0x00F0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00F8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00FE, 0x00DF,
+	/* E */	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+	/* F */	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF,
+};
+
+/*	The lower case table consists of a 256-entry high-byte table followed by some number of
+	256-entry subtables. The high-byte table contains either an offset to the subtable for
+	characters with that high byte or zero, which means that there are no case mappings or
+	ignored characters in that block. Ignored characters are mapped to zero.
+ */
+
+u_int16_t gLowerCaseTable[] = {
+
+	/* High-byte indices ( == 0 iff no case mapping and no ignorables ) */
+
+	/* 0 */	0x0000, 0x0100, 0x0000, 0x0200, 0x0300, 0x0400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 1 */	0x0500, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 2 */	0x0600, 0x0700, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 3 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 4 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 5 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 6 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 7 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 8 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 9 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* A */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* B */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* C */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* D */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* E */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* F */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0800, 0x0900,
+
+	/* Table 1 (for high byte 0x01) */
+
+	/* 0 */	0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x010A, 0x010B, 0x010C, 0x010D, 0x010E, 0x010F,
+	/* 1 */	0x0111, 0x0111, 0x0112, 0x0113, 0x0114, 0x0115, 0x0116, 0x0117, 0x0118, 0x0119, 0x011A, 0x011B, 0x011C, 0x011D, 0x011E, 0x011F,
+	/* 2 */	0x0120, 0x0121, 0x0122, 0x0123, 0x0124, 0x0125, 0x0127, 0x0127, 0x0128, 0x0129, 0x012A, 0x012B, 0x012C, 0x012D, 0x012E, 0x012F,
+	/* 3 */	0x0130, 0x0131, 0x0133, 0x0133, 0x0134, 0x0135, 0x0136, 0x0137, 0x0138, 0x0139, 0x013A, 0x013B, 0x013C, 0x013D, 0x013E, 0x0140,
+	/* 4 */	0x0140, 0x0142, 0x0142, 0x0143, 0x0144, 0x0145, 0x0146, 0x0147, 0x0148, 0x0149, 0x014B, 0x014B, 0x014C, 0x014D, 0x014E, 0x014F,
+	/* 5 */	0x0150, 0x0151, 0x0153, 0x0153, 0x0154, 0x0155, 0x0156, 0x0157, 0x0158, 0x0159, 0x015A, 0x015B, 0x015C, 0x015D, 0x015E, 0x015F,
+	/* 6 */	0x0160, 0x0161, 0x0162, 0x0163, 0x0164, 0x0165, 0x0167, 0x0167, 0x0168, 0x0169, 0x016A, 0x016B, 0x016C, 0x016D, 0x016E, 0x016F,
+	/* 7 */	0x0170, 0x0171, 0x0172, 0x0173, 0x0174, 0x0175, 0x0176, 0x0177, 0x0178, 0x0179, 0x017A, 0x017B, 0x017C, 0x017D, 0x017E, 0x017F,
+	/* 8 */	0x0180, 0x0253, 0x0183, 0x0183, 0x0185, 0x0185, 0x0254, 0x0188, 0x0188, 0x0256, 0x0257, 0x018C, 0x018C, 0x018D, 0x01DD, 0x0259,
+	/* 9 */	0x025B, 0x0192, 0x0192, 0x0260, 0x0263, 0x0195, 0x0269, 0x0268, 0x0199, 0x0199, 0x019A, 0x019B, 0x026F, 0x0272, 0x019E, 0x0275,
+	/* A */	0x01A0, 0x01A1, 0x01A3, 0x01A3, 0x01A5, 0x01A5, 0x01A6, 0x01A8, 0x01A8, 0x0283, 0x01AA, 0x01AB, 0x01AD, 0x01AD, 0x0288, 0x01AF,
+	/* B */	0x01B0, 0x028A, 0x028B, 0x01B4, 0x01B4, 0x01B6, 0x01B6, 0x0292, 0x01B9, 0x01B9, 0x01BA, 0x01BB, 0x01BD, 0x01BD, 0x01BE, 0x01BF,
+	/* C */	0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C6, 0x01C6, 0x01C6, 0x01C9, 0x01C9, 0x01C9, 0x01CC, 0x01CC, 0x01CC, 0x01CD, 0x01CE, 0x01CF,
+	/* D */	0x01D0, 0x01D1, 0x01D2, 0x01D3, 0x01D4, 0x01D5, 0x01D6, 0x01D7, 0x01D8, 0x01D9, 0x01DA, 0x01DB, 0x01DC, 0x01DD, 0x01DE, 0x01DF,
+	/* E */	0x01E0, 0x01E1, 0x01E2, 0x01E3, 0x01E5, 0x01E5, 0x01E6, 0x01E7, 0x01E8, 0x01E9, 0x01EA, 0x01EB, 0x01EC, 0x01ED, 0x01EE, 0x01EF,
+	/* F */	0x01F0, 0x01F3, 0x01F3, 0x01F3, 0x01F4, 0x01F5, 0x01F6, 0x01F7, 0x01F8, 0x01F9, 0x01FA, 0x01FB, 0x01FC, 0x01FD, 0x01FE, 0x01FF,
+
+	/* Table 2 (for high byte 0x03) */
+
+	/* 0 */	0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
+	/* 1 */	0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
+	/* 2 */	0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
+	/* 3 */	0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
+	/* 4 */	0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
+	/* 5 */	0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
+	/* 6 */	0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
+	/* 7 */	0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, 0x0378, 0x0379, 0x037A, 0x037B, 0x037C, 0x037D, 0x037E, 0x037F,
+	/* 8 */	0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0386, 0x0387, 0x0388, 0x0389, 0x038A, 0x038B, 0x038C, 0x038D, 0x038E, 0x038F,
+	/* 9 */	0x0390, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
+	/* A */	0x03C0, 0x03C1, 0x03A2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
+	/* B */	0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
+	/* C */	0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x03CF,
+	/* D */	0x03D0, 0x03D1, 0x03D2, 0x03D3, 0x03D4, 0x03D5, 0x03D6, 0x03D7, 0x03D8, 0x03D9, 0x03DA, 0x03DB, 0x03DC, 0x03DD, 0x03DE, 0x03DF,
+	/* E */	0x03E0, 0x03E1, 0x03E3, 0x03E3, 0x03E5, 0x03E5, 0x03E7, 0x03E7, 0x03E9, 0x03E9, 0x03EB, 0x03EB, 0x03ED, 0x03ED, 0x03EF, 0x03EF,
+	/* F */	0x03F0, 0x03F1, 0x03F2, 0x03F3, 0x03F4, 0x03F5, 0x03F6, 0x03F7, 0x03F8, 0x03F9, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF,
+
+	/* Table 3 (for high byte 0x04) */
+
+	/* 0 */	0x0400, 0x0401, 0x0452, 0x0403, 0x0454, 0x0455, 0x0456, 0x0407, 0x0458, 0x0459, 0x045A, 0x045B, 0x040C, 0x040D, 0x040E, 0x045F,
+	/* 1 */	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0419, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+	/* 2 */	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+	/* 3 */	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+	/* 4 */	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+	/* 5 */	0x0450, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x045D, 0x045E, 0x045F,
+	/* 6 */	0x0461, 0x0461, 0x0463, 0x0463, 0x0465, 0x0465, 0x0467, 0x0467, 0x0469, 0x0469, 0x046B, 0x046B, 0x046D, 0x046D, 0x046F, 0x046F,
+	/* 7 */	0x0471, 0x0471, 0x0473, 0x0473, 0x0475, 0x0475, 0x0476, 0x0477, 0x0479, 0x0479, 0x047B, 0x047B, 0x047D, 0x047D, 0x047F, 0x047F,
+	/* 8 */	0x0481, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048D, 0x048E, 0x048F,
+	/* 9 */	0x0491, 0x0491, 0x0493, 0x0493, 0x0495, 0x0495, 0x0497, 0x0497, 0x0499, 0x0499, 0x049B, 0x049B, 0x049D, 0x049D, 0x049F, 0x049F,
+	/* A */	0x04A1, 0x04A1, 0x04A3, 0x04A3, 0x04A5, 0x04A5, 0x04A7, 0x04A7, 0x04A9, 0x04A9, 0x04AB, 0x04AB, 0x04AD, 0x04AD, 0x04AF, 0x04AF,
+	/* B */	0x04B1, 0x04B1, 0x04B3, 0x04B3, 0x04B5, 0x04B5, 0x04B7, 0x04B7, 0x04B9, 0x04B9, 0x04BB, 0x04BB, 0x04BD, 0x04BD, 0x04BF, 0x04BF,
+	/* C */	0x04C0, 0x04C1, 0x04C2, 0x04C4, 0x04C4, 0x04C5, 0x04C6, 0x04C8, 0x04C8, 0x04C9, 0x04CA, 0x04CC, 0x04CC, 0x04CD, 0x04CE, 0x04CF,
+	/* D */	0x04D0, 0x04D1, 0x04D2, 0x04D3, 0x04D4, 0x04D5, 0x04D6, 0x04D7, 0x04D8, 0x04D9, 0x04DA, 0x04DB, 0x04DC, 0x04DD, 0x04DE, 0x04DF,
+	/* E */	0x04E0, 0x04E1, 0x04E2, 0x04E3, 0x04E4, 0x04E5, 0x04E6, 0x04E7, 0x04E8, 0x04E9, 0x04EA, 0x04EB, 0x04EC, 0x04ED, 0x04EE, 0x04EF,
+	/* F */	0x04F0, 0x04F1, 0x04F2, 0x04F3, 0x04F4, 0x04F5, 0x04F6, 0x04F7, 0x04F8, 0x04F9, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF,
+
+	/* Table 4 (for high byte 0x05) */
+
+	/* 0 */	0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050A, 0x050B, 0x050C, 0x050D, 0x050E, 0x050F,
+	/* 1 */	0x0510, 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0516, 0x0517, 0x0518, 0x0519, 0x051A, 0x051B, 0x051C, 0x051D, 0x051E, 0x051F,
+	/* 2 */	0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, 0x0528, 0x0529, 0x052A, 0x052B, 0x052C, 0x052D, 0x052E, 0x052F,
+	/* 3 */	0x0530, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567, 0x0568, 0x0569, 0x056A, 0x056B, 0x056C, 0x056D, 0x056E, 0x056F,
+	/* 4 */	0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, 0x0578, 0x0579, 0x057A, 0x057B, 0x057C, 0x057D, 0x057E, 0x057F,
+	/* 5 */	0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0557, 0x0558, 0x0559, 0x055A, 0x055B, 0x055C, 0x055D, 0x055E, 0x055F,
+	/* 6 */	0x0560, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567, 0x0568, 0x0569, 0x056A, 0x056B, 0x056C, 0x056D, 0x056E, 0x056F,
+	/* 7 */	0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, 0x0578, 0x0579, 0x057A, 0x057B, 0x057C, 0x057D, 0x057E, 0x057F,
+	/* 8 */	0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0587, 0x0588, 0x0589, 0x058A, 0x058B, 0x058C, 0x058D, 0x058E, 0x058F,
+	/* 9 */	0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
+	/* A */	0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
+	/* B */	0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
+	/* C */	0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x05C5, 0x05C6, 0x05C7, 0x05C8, 0x05C9, 0x05CA, 0x05CB, 0x05CC, 0x05CD, 0x05CE, 0x05CF,
+	/* D */	0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
+	/* E */	0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05EB, 0x05EC, 0x05ED, 0x05EE, 0x05EF,
+	/* F */	0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x05F5, 0x05F6, 0x05F7, 0x05F8, 0x05F9, 0x05FA, 0x05FB, 0x05FC, 0x05FD, 0x05FE, 0x05FF,
+
+	/* Table 5 (for high byte 0x10) */
+
+	/* 0 */	0x1000, 0x1001, 0x1002, 0x1003, 0x1004, 0x1005, 0x1006, 0x1007, 0x1008, 0x1009, 0x100A, 0x100B, 0x100C, 0x100D, 0x100E, 0x100F,
+	/* 1 */	0x1010, 0x1011, 0x1012, 0x1013, 0x1014, 0x1015, 0x1016, 0x1017, 0x1018, 0x1019, 0x101A, 0x101B, 0x101C, 0x101D, 0x101E, 0x101F,
+	/* 2 */	0x1020, 0x1021, 0x1022, 0x1023, 0x1024, 0x1025, 0x1026, 0x1027, 0x1028, 0x1029, 0x102A, 0x102B, 0x102C, 0x102D, 0x102E, 0x102F,
+	/* 3 */	0x1030, 0x1031, 0x1032, 0x1033, 0x1034, 0x1035, 0x1036, 0x1037, 0x1038, 0x1039, 0x103A, 0x103B, 0x103C, 0x103D, 0x103E, 0x103F,
+	/* 4 */	0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049, 0x104A, 0x104B, 0x104C, 0x104D, 0x104E, 0x104F,
+	/* 5 */	0x1050, 0x1051, 0x1052, 0x1053, 0x1054, 0x1055, 0x1056, 0x1057, 0x1058, 0x1059, 0x105A, 0x105B, 0x105C, 0x105D, 0x105E, 0x105F,
+	/* 6 */	0x1060, 0x1061, 0x1062, 0x1063, 0x1064, 0x1065, 0x1066, 0x1067, 0x1068, 0x1069, 0x106A, 0x106B, 0x106C, 0x106D, 0x106E, 0x106F,
+	/* 7 */	0x1070, 0x1071, 0x1072, 0x1073, 0x1074, 0x1075, 0x1076, 0x1077, 0x1078, 0x1079, 0x107A, 0x107B, 0x107C, 0x107D, 0x107E, 0x107F,
+	/* 8 */	0x1080, 0x1081, 0x1082, 0x1083, 0x1084, 0x1085, 0x1086, 0x1087, 0x1088, 0x1089, 0x108A, 0x108B, 0x108C, 0x108D, 0x108E, 0x108F,
+	/* 9 */	0x1090, 0x1091, 0x1092, 0x1093, 0x1094, 0x1095, 0x1096, 0x1097, 0x1098, 0x1099, 0x109A, 0x109B, 0x109C, 0x109D, 0x109E, 0x109F,
+	/* A */	0x10D0, 0x10D1, 0x10D2, 0x10D3, 0x10D4, 0x10D5, 0x10D6, 0x10D7, 0x10D8, 0x10D9, 0x10DA, 0x10DB, 0x10DC, 0x10DD, 0x10DE, 0x10DF,
+	/* B */	0x10E0, 0x10E1, 0x10E2, 0x10E3, 0x10E4, 0x10E5, 0x10E6, 0x10E7, 0x10E8, 0x10E9, 0x10EA, 0x10EB, 0x10EC, 0x10ED, 0x10EE, 0x10EF,
+	/* C */	0x10F0, 0x10F1, 0x10F2, 0x10F3, 0x10F4, 0x10F5, 0x10C6, 0x10C7, 0x10C8, 0x10C9, 0x10CA, 0x10CB, 0x10CC, 0x10CD, 0x10CE, 0x10CF,
+	/* D */	0x10D0, 0x10D1, 0x10D2, 0x10D3, 0x10D4, 0x10D5, 0x10D6, 0x10D7, 0x10D8, 0x10D9, 0x10DA, 0x10DB, 0x10DC, 0x10DD, 0x10DE, 0x10DF,
+	/* E */	0x10E0, 0x10E1, 0x10E2, 0x10E3, 0x10E4, 0x10E5, 0x10E6, 0x10E7, 0x10E8, 0x10E9, 0x10EA, 0x10EB, 0x10EC, 0x10ED, 0x10EE, 0x10EF,
+	/* F */	0x10F0, 0x10F1, 0x10F2, 0x10F3, 0x10F4, 0x10F5, 0x10F6, 0x10F7, 0x10F8, 0x10F9, 0x10FA, 0x10FB, 0x10FC, 0x10FD, 0x10FE, 0x10FF,
+
+	/* Table 6 (for high byte 0x20) */
+
+	/* 0 */	0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 1 */	0x2010, 0x2011, 0x2012, 0x2013, 0x2014, 0x2015, 0x2016, 0x2017, 0x2018, 0x2019, 0x201A, 0x201B, 0x201C, 0x201D, 0x201E, 0x201F,
+	/* 2 */	0x2020, 0x2021, 0x2022, 0x2023, 0x2024, 0x2025, 0x2026, 0x2027, 0x2028, 0x2029, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x202F,
+	/* 3 */	0x2030, 0x2031, 0x2032, 0x2033, 0x2034, 0x2035, 0x2036, 0x2037, 0x2038, 0x2039, 0x203A, 0x203B, 0x203C, 0x203D, 0x203E, 0x203F,
+	/* 4 */	0x2040, 0x2041, 0x2042, 0x2043, 0x2044, 0x2045, 0x2046, 0x2047, 0x2048, 0x2049, 0x204A, 0x204B, 0x204C, 0x204D, 0x204E, 0x204F,
+	/* 5 */	0x2050, 0x2051, 0x2052, 0x2053, 0x2054, 0x2055, 0x2056, 0x2057, 0x2058, 0x2059, 0x205A, 0x205B, 0x205C, 0x205D, 0x205E, 0x205F,
+	/* 6 */	0x2060, 0x2061, 0x2062, 0x2063, 0x2064, 0x2065, 0x2066, 0x2067, 0x2068, 0x2069, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	/* 7 */	0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x207A, 0x207B, 0x207C, 0x207D, 0x207E, 0x207F,
+	/* 8 */	0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087, 0x2088, 0x2089, 0x208A, 0x208B, 0x208C, 0x208D, 0x208E, 0x208F,
+	/* 9 */	0x2090, 0x2091, 0x2092, 0x2093, 0x2094, 0x2095, 0x2096, 0x2097, 0x2098, 0x2099, 0x209A, 0x209B, 0x209C, 0x209D, 0x209E, 0x209F,
+	/* A */	0x20A0, 0x20A1, 0x20A2, 0x20A3, 0x20A4, 0x20A5, 0x20A6, 0x20A7, 0x20A8, 0x20A9, 0x20AA, 0x20AB, 0x20AC, 0x20AD, 0x20AE, 0x20AF,
+	/* B */	0x20B0, 0x20B1, 0x20B2, 0x20B3, 0x20B4, 0x20B5, 0x20B6, 0x20B7, 0x20B8, 0x20B9, 0x20BA, 0x20BB, 0x20BC, 0x20BD, 0x20BE, 0x20BF,
+	/* C */	0x20C0, 0x20C1, 0x20C2, 0x20C3, 0x20C4, 0x20C5, 0x20C6, 0x20C7, 0x20C8, 0x20C9, 0x20CA, 0x20CB, 0x20CC, 0x20CD, 0x20CE, 0x20CF,
+	/* D */	0x20D0, 0x20D1, 0x20D2, 0x20D3, 0x20D4, 0x20D5, 0x20D6, 0x20D7, 0x20D8, 0x20D9, 0x20DA, 0x20DB, 0x20DC, 0x20DD, 0x20DE, 0x20DF,
+	/* E */	0x20E0, 0x20E1, 0x20E2, 0x20E3, 0x20E4, 0x20E5, 0x20E6, 0x20E7, 0x20E8, 0x20E9, 0x20EA, 0x20EB, 0x20EC, 0x20ED, 0x20EE, 0x20EF,
+	/* F */	0x20F0, 0x20F1, 0x20F2, 0x20F3, 0x20F4, 0x20F5, 0x20F6, 0x20F7, 0x20F8, 0x20F9, 0x20FA, 0x20FB, 0x20FC, 0x20FD, 0x20FE, 0x20FF,
+
+	/* Table 7 (for high byte 0x21) */
+
+	/* 0 */	0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210A, 0x210B, 0x210C, 0x210D, 0x210E, 0x210F,
+	/* 1 */	0x2110, 0x2111, 0x2112, 0x2113, 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211A, 0x211B, 0x211C, 0x211D, 0x211E, 0x211F,
+	/* 2 */	0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212A, 0x212B, 0x212C, 0x212D, 0x212E, 0x212F,
+	/* 3 */	0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213A, 0x213B, 0x213C, 0x213D, 0x213E, 0x213F,
+	/* 4 */	0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214A, 0x214B, 0x214C, 0x214D, 0x214E, 0x214F,
+	/* 5 */	0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
+	/* 6 */	0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0x217A, 0x217B, 0x217C, 0x217D, 0x217E, 0x217F,
+	/* 7 */	0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0x217A, 0x217B, 0x217C, 0x217D, 0x217E, 0x217F,
+	/* 8 */	0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x218A, 0x218B, 0x218C, 0x218D, 0x218E, 0x218F,
+	/* 9 */	0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x219A, 0x219B, 0x219C, 0x219D, 0x219E, 0x219F,
+	/* A */	0x21A0, 0x21A1, 0x21A2, 0x21A3, 0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC, 0x21AD, 0x21AE, 0x21AF,
+	/* B */	0x21B0, 0x21B1, 0x21B2, 0x21B3, 0x21B4, 0x21B5, 0x21B6, 0x21B7, 0x21B8, 0x21B9, 0x21BA, 0x21BB, 0x21BC, 0x21BD, 0x21BE, 0x21BF,
+	/* C */	0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4, 0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21CD, 0x21CE, 0x21CF,
+	/* D */	0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9, 0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21DE, 0x21DF,
+	/* E */	0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF,
+	/* F */	0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF,
+
+	/* Table 8 (for high byte 0xFE) */
+
+	/* 0 */	0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07, 0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F,
+	/* 1 */	0xFE10, 0xFE11, 0xFE12, 0xFE13, 0xFE14, 0xFE15, 0xFE16, 0xFE17, 0xFE18, 0xFE19, 0xFE1A, 0xFE1B, 0xFE1C, 0xFE1D, 0xFE1E, 0xFE1F,
+	/* 2 */	0xFE20, 0xFE21, 0xFE22, 0xFE23, 0xFE24, 0xFE25, 0xFE26, 0xFE27, 0xFE28, 0xFE29, 0xFE2A, 0xFE2B, 0xFE2C, 0xFE2D, 0xFE2E, 0xFE2F,
+	/* 3 */	0xFE30, 0xFE31, 0xFE32, 0xFE33, 0xFE34, 0xFE35, 0xFE36, 0xFE37, 0xFE38, 0xFE39, 0xFE3A, 0xFE3B, 0xFE3C, 0xFE3D, 0xFE3E, 0xFE3F,
+	/* 4 */	0xFE40, 0xFE41, 0xFE42, 0xFE43, 0xFE44, 0xFE45, 0xFE46, 0xFE47, 0xFE48, 0xFE49, 0xFE4A, 0xFE4B, 0xFE4C, 0xFE4D, 0xFE4E, 0xFE4F,
+	/* 5 */	0xFE50, 0xFE51, 0xFE52, 0xFE53, 0xFE54, 0xFE55, 0xFE56, 0xFE57, 0xFE58, 0xFE59, 0xFE5A, 0xFE5B, 0xFE5C, 0xFE5D, 0xFE5E, 0xFE5F,
+	/* 6 */	0xFE60, 0xFE61, 0xFE62, 0xFE63, 0xFE64, 0xFE65, 0xFE66, 0xFE67, 0xFE68, 0xFE69, 0xFE6A, 0xFE6B, 0xFE6C, 0xFE6D, 0xFE6E, 0xFE6F,
+	/* 7 */	0xFE70, 0xFE71, 0xFE72, 0xFE73, 0xFE74, 0xFE75, 0xFE76, 0xFE77, 0xFE78, 0xFE79, 0xFE7A, 0xFE7B, 0xFE7C, 0xFE7D, 0xFE7E, 0xFE7F,
+	/* 8 */	0xFE80, 0xFE81, 0xFE82, 0xFE83, 0xFE84, 0xFE85, 0xFE86, 0xFE87, 0xFE88, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C, 0xFE8D, 0xFE8E, 0xFE8F,
+	/* 9 */	0xFE90, 0xFE91, 0xFE92, 0xFE93, 0xFE94, 0xFE95, 0xFE96, 0xFE97, 0xFE98, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C, 0xFE9D, 0xFE9E, 0xFE9F,
+	/* A */	0xFEA0, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8, 0xFEA9, 0xFEAA, 0xFEAB, 0xFEAC, 0xFEAD, 0xFEAE, 0xFEAF,
+	/* B */	0xFEB0, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC, 0xFEBD, 0xFEBE, 0xFEBF,
+	/* C */	0xFEC0, 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4, 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8, 0xFEC9, 0xFECA, 0xFECB, 0xFECC, 0xFECD, 0xFECE, 0xFECF,
+	/* D */	0xFED0, 0xFED1, 0xFED2, 0xFED3, 0xFED4, 0xFED5, 0xFED6, 0xFED7, 0xFED8, 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC, 0xFEDD, 0xFEDE, 0xFEDF,
+	/* E */	0xFEE0, 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4, 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8, 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC, 0xFEED, 0xFEEE, 0xFEEF,
+	/* F */	0xFEF0, 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4, 0xFEF5, 0xFEF6, 0xFEF7, 0xFEF8, 0xFEF9, 0xFEFA, 0xFEFB, 0xFEFC, 0xFEFD, 0xFEFE, 0x0000,
+
+	/* Table 9 (for high byte 0xFF) */
+
+	/* 0 */	0xFF00, 0xFF01, 0xFF02, 0xFF03, 0xFF04, 0xFF05, 0xFF06, 0xFF07, 0xFF08, 0xFF09, 0xFF0A, 0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E, 0xFF0F,
+	/* 1 */	0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0xFF1A, 0xFF1B, 0xFF1C, 0xFF1D, 0xFF1E, 0xFF1F,
+	/* 2 */	0xFF20, 0xFF41, 0xFF42, 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F,
+	/* 3 */	0xFF50, 0xFF51, 0xFF52, 0xFF53, 0xFF54, 0xFF55, 0xFF56, 0xFF57, 0xFF58, 0xFF59, 0xFF5A, 0xFF3B, 0xFF3C, 0xFF3D, 0xFF3E, 0xFF3F,
+	/* 4 */	0xFF40, 0xFF41, 0xFF42, 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F,
+	/* 5 */	0xFF50, 0xFF51, 0xFF52, 0xFF53, 0xFF54, 0xFF55, 0xFF56, 0xFF57, 0xFF58, 0xFF59, 0xFF5A, 0xFF5B, 0xFF5C, 0xFF5D, 0xFF5E, 0xFF5F,
+	/* 6 */	0xFF60, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
+	/* 7 */	0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77, 0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
+	/* 8 */	0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
+	/* 9 */	0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97, 0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
+	/* A */	0xFFA0, 0xFFA1, 0xFFA2, 0xFFA3, 0xFFA4, 0xFFA5, 0xFFA6, 0xFFA7, 0xFFA8, 0xFFA9, 0xFFAA, 0xFFAB, 0xFFAC, 0xFFAD, 0xFFAE, 0xFFAF,
+	/* B */	0xFFB0, 0xFFB1, 0xFFB2, 0xFFB3, 0xFFB4, 0xFFB5, 0xFFB6, 0xFFB7, 0xFFB8, 0xFFB9, 0xFFBA, 0xFFBB, 0xFFBC, 0xFFBD, 0xFFBE, 0xFFBF,
+	/* C */	0xFFC0, 0xFFC1, 0xFFC2, 0xFFC3, 0xFFC4, 0xFFC5, 0xFFC6, 0xFFC7, 0xFFC8, 0xFFC9, 0xFFCA, 0xFFCB, 0xFFCC, 0xFFCD, 0xFFCE, 0xFFCF,
+	/* D */	0xFFD0, 0xFFD1, 0xFFD2, 0xFFD3, 0xFFD4, 0xFFD5, 0xFFD6, 0xFFD7, 0xFFD8, 0xFFD9, 0xFFDA, 0xFFDB, 0xFFDC, 0xFFDD, 0xFFDE, 0xFFDF,
+	/* E */	0xFFE0, 0xFFE1, 0xFFE2, 0xFFE3, 0xFFE4, 0xFFE5, 0xFFE6, 0xFFE7, 0xFFE8, 0xFFE9, 0xFFEA, 0xFFEB, 0xFFEC, 0xFFED, 0xFFEE, 0xFFEF,
+	/* F */	0xFFF0, 0xFFF1, 0xFFF2, 0xFFF3, 0xFFF4, 0xFFF5, 0xFFF6, 0xFFF7, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
+};
+
+
+/* RelString case folding table */
+
+unsigned short gCompareTable[] = {
+
+	/* 0 */	0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500, 0x0600, 0x0700, 0x0800, 0x0900, 0x0A00, 0x0B00, 0x0C00, 0x0D00, 0x0E00, 0x0F00, 
+	/* 1 */	0x1000, 0x1100, 0x1200, 0x1300, 0x1400, 0x1500, 0x1600, 0x1700, 0x1800, 0x1900, 0x1A00, 0x1B00, 0x1C00, 0x1D00, 0x1E00, 0x1F00, 
+	/* 2 */	0x2000, 0x2100, 0x2200, 0x2300, 0x2400, 0x2500, 0x2600, 0x2700, 0x2800, 0x2900, 0x2A00, 0x2B00, 0x2C00, 0x2D00, 0x2E00, 0x2F00, 
+	/* 3 */	0x3000, 0x3100, 0x3200, 0x3300, 0x3400, 0x3500, 0x3600, 0x3700, 0x3800, 0x3900, 0x3A00, 0x3B00, 0x3C00, 0x3D00, 0x3E00, 0x3F00, 
+	/* 4 */	0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00, 0x4B00, 0x4C00, 0x4D00, 0x4E00, 0x4F00, 
+	/* 5 */	0x5000, 0x5100, 0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x5C00, 0x5D00, 0x5E00, 0x5F00, 
+
+	// 0x60 maps to 'a'
+	// range 0x61 to 0x7a ('a' to 'z') map to upper case
+
+	/* 6 */	0x4180, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00, 0x4B00, 0x4C00, 0x4D00, 0x4E00, 0x4F00, 
+	/* 7 */	0x5000, 0x5100, 0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x7B00, 0x7C00, 0x7D00, 0x7E00, 0x7F00, 
+
+	// range 0x80 to 0xd8 gets mapped...
+	
+	/* 8 */	0x4108, 0x410C, 0x4310, 0x4502, 0x4E0A, 0x4F08, 0x5508, 0x4182, 0x4104, 0x4186, 0x4108, 0x410A, 0x410C, 0x4310, 0x4502, 0x4584,
+	/* 9 */	0x4586, 0x4588, 0x4982, 0x4984, 0x4986, 0x4988, 0x4E0A, 0x4F82, 0x4F84, 0x4F86, 0x4F08, 0x4F0A, 0x5582, 0x5584, 0x5586, 0x5508,
+	/* A */	0xA000, 0xA100, 0xA200, 0xA300, 0xA400, 0xA500, 0xA600, 0x5382, 0xA800, 0xA900, 0xAA00, 0xAB00, 0xAC00, 0xAD00, 0x4114, 0x4F0E,
+	/* B */	0xB000, 0xB100, 0xB200, 0xB300, 0xB400, 0xB500, 0xB600, 0xB700, 0xB800, 0xB900, 0xBA00, 0x4192, 0x4F92, 0xBD00, 0x4114, 0x4F0E,
+	/* C */	0xC000, 0xC100, 0xC200, 0xC300, 0xC400, 0xC500, 0xC600, 0x2206, 0x2208, 0xC900, 0x2000, 0x4104, 0x410A, 0x4F0A, 0x4F14, 0x4F14,
+	/* D */	0xD000, 0xD100, 0x2202, 0x2204, 0x2702, 0x2704, 0xD600, 0xD700, 0x5988, 0xD900, 0xDA00, 0xDB00, 0xDC00, 0xDD00, 0xDE00, 0xDF00,
+
+	/* E */	0xE000, 0xE100, 0xE200, 0xE300, 0xE400, 0xE500, 0xE600, 0xE700, 0xE800, 0xE900, 0xEA00, 0xEB00, 0xEC00, 0xED00, 0xEE00, 0xEF00, 
+	/* F */	0xF000, 0xF100, 0xF200, 0xF300, 0xF400, 0xF500, 0xF600, 0xF700, 0xF800, 0xF900, 0xFA00, 0xFB00, 0xFC00, 0xFD00, 0xFE00, 0xFF00, 
+
+};
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* _UCSTRINGCOMPAREDATA_ */
diff --git a/core/UnicodeWrappers.c b/core/UnicodeWrappers.c
new file mode 100644
index 0000000..8e5b6e6
--- /dev/null
+++ b/core/UnicodeWrappers.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		UnicodeWrappers.c
+
+	Contains:	Wrapper routines for Unicode conversion and comparison.
+
+*/
+
+#include <sys/param.h>
+#include <sys/utfconv.h>
+
+#include "hfs_macos_defs.h"
+#include "UCStringCompareData.h"
+
+#include "FileMgrInternal.h"
+#include "HFSUnicodeWrappers.h"
+
+enum {
+	kMinFileExtensionChars = 1,	/* does not include dot */
+	kMaxFileExtensionChars = 5	/* does not include dot */
+};
+
+
+#define EXTENSIONCHAR(c)	(((c) >= 0x61 && (c) <= 0x7A) || \
+				 ((c) >= 0x41 && (c) <= 0x5A) || \
+				 ((c) >= 0x30 && (c) <= 0x39))
+
+
+#define IsHexDigit(c)		(((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
+				 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
+
+
+static void	GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
+
+
+static u_int32_t	HexStringToInteger( u_int32_t length, const u_int8_t *hexStr );
+
+
+/*
+ * Get filename extension (if any) as a C string
+ */
+static void
+GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
+{
+	u_int32_t	i;
+	UniChar	c;
+	u_int16_t	extChars;	/* number of extension chars (excluding dot) */
+	u_int16_t	maxExtChars;
+	Boolean	foundExtension;
+
+	extStr[0] = '\0';	/* assume there's no extension */
+
+	if ( length < 3 )
+		return;		/* "x.y" is smallest possible extension */
+	
+	if ( length < (kMaxFileExtensionChars + 2) )
+		maxExtChars = length - 2;	/* save room for prefix + dot */
+	else
+		maxExtChars = kMaxFileExtensionChars;
+
+	i = length;
+	extChars = 0;
+	foundExtension = false;
+
+	while ( extChars <= maxExtChars ) {
+		c = unicodeStr[--i];
+
+		/* look for leading dot */
+		if ( c == (UniChar) '.' ) {
+			if ( extChars > 0 )	/* cannot end with a dot */
+				foundExtension = true;
+			break;
+		}
+
+		if ( EXTENSIONCHAR(c) )
+			++extChars;
+		else
+			break;
+	}
+	
+	/* if we found one then copy it */
+	if ( foundExtension ) {
+		u_int8_t *extStrPtr = (u_int8_t *)extStr;
+		const UniChar *unicodeStrPtr = &unicodeStr[i];
+		
+		for ( i = 0; i <= extChars; ++i )
+			*(extStrPtr++) = (u_int8_t) *(unicodeStrPtr++);
+		extStr[extChars + 1] = '\0';	/* terminate extension + dot */
+	}
+}
+
+
+
+/*
+ * Count filename extension characters (if any)
+ */
+u_int32_t
+CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length )
+{
+	u_int32_t	i;
+	UniChar	c;
+	u_int32_t	extChars;	/* number of extension chars (excluding dot) */
+	u_int16_t	maxExtChars;
+	Boolean	foundExtension;
+
+	if ( length < 3 )
+		return 0;	/* "x.y" is smallest possible extension	*/
+	
+	if ( length < (kMaxFileExtensionChars + 2) )
+		maxExtChars = length - 2;	/* save room for prefix + dot */
+	else
+		maxExtChars = kMaxFileExtensionChars;
+
+	extChars = 0;		/* assume there's no extension */
+	i = length - 1;		/* index to last ascii character */
+	foundExtension = false;
+
+	while ( extChars <= maxExtChars ) {
+		c = filename[i--];
+
+		/* look for leading dot */
+		if ( c == (u_int8_t) '.' )	{
+			if ( extChars > 0 )	/* cannot end with a dot */
+				return (extChars);
+
+			break;
+		}
+
+		if ( EXTENSIONCHAR(c) )
+			++extChars;
+		else
+			break;
+	}
+	
+	return 0;
+}
+
+
+/*
+ * extract the file id from a mangled name
+ */
+HFSCatalogNodeID
+GetEmbeddedFileID(const unsigned char * filename, u_int32_t length, u_int32_t *prefixLength)
+{
+	short	extChars;
+	short	i;
+	u_int8_t	c;
+
+	*prefixLength = 0;
+
+	if ( filename == NULL )
+		return 0;
+
+	if ( length < 28 )
+		return 0;	/* too small to have been mangled */
+
+	/* big enough for a file ID (#10) and an extension (.x) ? */
+	if ( length > 5 )
+		extChars = CountFilenameExtensionChars(filename, length);
+	else
+		extChars = 0;
+
+	/* skip over dot plus extension characters */
+	if ( extChars > 0 )
+		length -= (extChars + 1);	
+
+	/* scan for file id digits */
+	for ( i = length - 1; i >= 0; --i) {
+		c = filename[i];
+
+		/* look for file ID marker */
+		if ( c == '#' ) {
+			if ( (length - i) < 3 )
+				break;	/* too small to be a file ID */
+
+			*prefixLength = i;
+			return HexStringToInteger(length - i - 1, &filename[i+1]);
+		}
+
+		if ( !IsHexDigit(c) )
+			break;	/* file ID string must have hex digits */
+	}
+
+	return 0;
+}
+
+
+
+static u_int32_t
+HexStringToInteger(u_int32_t length, const u_int8_t *hexStr)
+{
+	u_int32_t		value;
+	u_int32_t		i;
+	u_int8_t		c;
+	const u_int8_t	*p;
+
+	value = 0;
+	p = hexStr;
+
+	for ( i = 0; i < length; ++i ) {
+		c = *p++;
+
+		if (c >= '0' && c <= '9') {
+			value = value << 4;
+			value += (u_int32_t) c - (u_int32_t) '0';
+		} else if (c >= 'A' && c <= 'F') {
+			value = value << 4;
+			value += 10 + ((unsigned int) c - (unsigned int) 'A');
+		} else {
+			return 0;	/* bad character */
+		}
+	}
+
+	return value;
+}
+
+
+/*
+ * Routine:	FastRelString
+ *
+ * Output:	returns -1 if str1 < str2
+ *		returns  1 if str1 > str2
+ *		return	 0 if equal
+ *
+ */
+int32_t	FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
+{
+	u_int16_t*		compareTable;
+	int32_t	 		bestGuess;
+	u_int8_t 	 	length, length2;
+	u_int8_t 	 	delta;
+
+	delta = 0;
+	length = *(str1++);
+	length2 = *(str2++);
+
+	if (length == length2)
+		bestGuess = 0;
+	else if (length < length2)
+	{
+		bestGuess = -1;
+		delta = length2 - length;
+	}
+	else
+	{
+		bestGuess = 1;
+		length = length2;
+	}
+
+	compareTable = (u_int16_t*) gCompareTable;
+
+	while (length--)
+	{
+		u_int8_t	aChar, bChar;
+
+		aChar = *(str1++);
+		bChar = *(str2++);
+		
+		if (aChar != bChar)		//	If they don't match exacly, do case conversion
+		{	
+			u_int16_t	aSortWord, bSortWord;
+
+			aSortWord = compareTable[aChar];
+			bSortWord = compareTable[bChar];
+
+			if (aSortWord > bSortWord)
+				return 1;
+
+			if (aSortWord < bSortWord)
+				return -1;
+		}
+		
+		//	If characters match exactly, then go on to next character immediately without
+		//	doing any extra work.
+	}
+	
+	//	if you got to here, then return bestGuess
+	return bestGuess;
+}	
+
+
+
+//
+//	FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
+//
+//	    IF				RESULT
+//	--------------------------
+//	str1 < str2		=>	-1
+//	str1 = str2		=>	 0
+//	str1 > str2		=>	+1
+//
+//	The lower case table starts with 256 entries (one for each of the upper bytes
+//	of the original Unicode char).  If that entry is zero, then all characters with
+//	that upper byte are already case folded.  If the entry is non-zero, then it is
+//	the _index_ (not byte offset) of the start of the sub-table for the characters
+//	with that upper byte.  All ignorable characters are folded to the value zero.
+//
+//	In pseudocode:
+//
+//		Let c = source Unicode character
+//		Let table[] = lower case table
+//
+//		lower = table[highbyte(c)]
+//		if (lower == 0)
+//			lower = c
+//		else
+//			lower = table[lower+lowbyte(c)]
+//
+//		if (lower == 0)
+//			ignore this character
+//
+//	To handle ignorable characters, we now need a loop to find the next valid character.
+//	Also, we can't pre-compute the number of characters to compare; the string length might
+//	be larger than the number of non-ignorable characters.  Further, we must be able to handle
+//	ignorable characters at any point in the string, including as the first or last characters.
+//	We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
+//	Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
+//	the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
+//	an invalid Unicode character).
+//
+//	Pseudocode:
+//
+//		while (1) {
+//			c1 = GetNextValidChar(str1)			//	returns zero if at end of string
+//			c2 = GetNextValidChar(str2)
+//
+//			if (c1 != c2) break					//	found a difference
+//
+//			if (c1 == 0)						//	reached end of string on both strings at once?
+//				return 0;						//	yes, so strings are equal
+//		}
+//
+//		// When we get here, c1 != c2.  So, we just need to determine which one is less.
+//		if (c1 < c2)
+//			return -1;
+//		else
+//			return 1;
+//
+
+int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
+							register ConstUniCharArrayPtr str2, register ItemCount length2)
+{
+	register u_int16_t		c1,c2;
+	register u_int16_t		temp;
+	register u_int16_t*	lowerCaseTable;
+
+	lowerCaseTable = (u_int16_t*) gLowerCaseTable;
+
+	while (1) {
+		/* Set default values for c1, c2 in case there are no more valid chars */
+		c1 = 0;
+		c2 = 0;
+		
+		/* Find next non-ignorable char from str1, or zero if no more */
+		while (length1 && c1 == 0) {
+			c1 = *(str1++);
+			--length1;
+			/* check for basic latin first */
+			if (c1 < 0x0100) {
+				c1 = gLatinCaseFold[c1];
+				break;
+			}
+			/* case fold if neccessary */
+			if ((temp = lowerCaseTable[c1>>8]) != 0)
+				c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
+		}
+		
+		
+		/* Find next non-ignorable char from str2, or zero if no more */
+		while (length2 && c2 == 0) {
+			c2 = *(str2++);
+			--length2;
+			/* check for basic latin first */
+			if (c2 < 0x0100) {
+				c2 = gLatinCaseFold[c2];
+				break;
+			}
+			/* case fold if neccessary */
+			if ((temp = lowerCaseTable[c2>>8]) != 0)
+				c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
+		}
+		
+		if (c1 != c2)		//	found a difference, so stop looping
+			break;
+		
+		if (c1 == 0)		//	did we reach the end of both strings at the same time?
+			return 0;		//	yes, so strings are equal
+	}
+	
+	if (c1 < c2)
+		return -1;
+	else
+		return 1;
+}
+
+/*
+ * UnicodeBinaryCompare
+ * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
+ * 
+ * Results are emitted like FastUnicodeCompare:
+ * 
+ * 
+ *	    IF				RESULT
+ *	--------------------------
+ *	str1 < str2		=>	-1
+ *	str1 = str2		=>	 0
+ *	str1 > str2		=>	+1
+ *
+ * The case matching source code is greatly simplified due to the lack of case-folding
+ * in this comparison routine. We compare, in order: the lengths, then do character-by-
+ * character comparisons.
+ * 
+ */
+int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount len1,
+							register ConstUniCharArrayPtr str2, register ItemCount len2) {
+	uint16_t c1;
+	uint16_t c2;
+	int string_length;
+	int32_t result = 0;
+	
+	/* Set default values for the two character pointers */
+	c1 = 0;
+	c2 = 0;
+
+	/* First generate the string length (for comparison purposes) */
+	if (len1 < len2) {
+		string_length = len1;
+		--result;
+	}
+	else if (len1 > len2) {
+		string_length = len2;
+		++result;
+	}
+	else {
+		string_length = len1;
+	}
+
+	/* now compare the two string pointers */
+	while (string_length--) {
+		c1 = *(str1++);
+		c2 = *(str2++);
+
+		if (c1 > c2) {
+			result = 1;
+			break;
+		}
+		
+		if (c1 < c2) {
+			result = -1;
+			break;
+		}
+		/* If equal, iterate to the next two respective chars */		
+	}
+
+	return result;
+}
+
+
+OSErr
+ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
+					 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
+{
+	ByteCount subMaxLen;
+	size_t utf8len;
+	char fileIDStr[15];
+	char extStr[15];
+
+	snprintf(fileIDStr, sizeof(fileIDStr), "#%X", cnid);
+	GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
+
+	/* remove extension chars from source */
+	srcLen -= strlen(extStr) * sizeof(UniChar);
+	subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
+
+	(void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
+
+	strlcat((char *)dstStr, fileIDStr, maxDstLen);
+	strlcat((char *)dstStr, extStr, maxDstLen);
+	*actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
+
+	return noErr;
+}
diff --git a/core/VolumeAllocation.c b/core/VolumeAllocation.c
new file mode 100644
index 0000000..f26811c
--- /dev/null
+++ b/core/VolumeAllocation.c
@@ -0,0 +1,6198 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		VolumeAllocation.c
+
+	Contains:	Routines for accessing and modifying the volume bitmap.
+
+	Version:	HFS Plus 1.0
+
+	Copyright:	(c) 1996-2009 by Apple Inc., all rights reserved.
+
+*/
+
+/*
+Public routines:
+	BlockAllocate / hfs_block_alloc
+					Allocate space on a volume.  Can allocate space contiguously.
+					If not contiguous, then allocation may be less than what was
+					asked for.  Returns the starting block number, and number of
+					blocks.  It will only return a single extent.
+
+	BlockDeallocate
+					Deallocate a contiguous run of allocation blocks.
+ 
+	BlockMarkAllocated
+					Exported wrapper to mark blocks as in-use.  This will correctly determine
+					whether or not the red-black tree is enabled and call the appropriate function 
+					if applicable.
+	BlockMarkFree
+					Exported wrapper to mark blocks as freed.  This will correctly determine whether or
+					not the red-black tree is enabled and call the appropriate function if applicable.
+
+ 
+	ResetVCBFreeExtCache
+					Since the red-black tree obviates the need to maintain the free extent cache, we do
+					not update it if the tree is also live.  As a result, if we ever need to destroy the trees
+					we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
+					bitmap scanning allocator.
+					We also reset and disable the free extent cache when volume resizing is 
+					in flight.
+ 
+	UpdateAllocLimit 
+					Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
+					allocations from occupying space in the region we are modifying during a filesystem resize.  
+					At other times, it should be consistent with the total number of allocation blocks in the 
+					filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
+					know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
+					number of items in the tree that we can allocate from.
+
+	ScanUnmapBlocks	
+					Traverse the entire allocation bitmap.  Potentially issue DKIOCUNMAPs to the device as it 
+					tracks unallocated ranges when iterating the volume bitmap.  Additionally, build up the in-core
+					summary table of the allocation bitmap.
+ 
+Internal routines:
+	BlockMarkFreeInternal
+					Mark a contiguous range of blocks as free.  The corresponding
+					bits in the volume bitmap will be cleared.  This will actually do the work
+					of modifying the bitmap for us.
+					
+	BlockMarkAllocatedInternal
+					Mark a contiguous range of blocks as allocated.  The cor-
+					responding bits in the volume bitmap are set.  Also tests to see
+					if any of the blocks were previously unallocated.  
+	BlockFindContiguous
+					Find a contiguous range of blocks of a given size.  The caller
+					specifies where to begin the search (by block number).  The
+					block number of the first block in the range is returned.  This is only
+					called by the bitmap scanning logic as the red-black tree should be able
+					to do this internally by searching its tree. 
+	BlockFindAny
+					Find and allocate a contiguous range of blocks up to a given size.  The
+					first range of contiguous free blocks found are allocated, even if there
+					are fewer blocks than requested (and even if a contiguous range of blocks
+					of the given size exists elsewhere).
+	BlockFindAnyBitmap
+					Finds a range of blocks per the above requirements without using the 
+					Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
+					any valid range of free space needed.
+	BlockFindContig
+					Find a contiguous range of blocks of a given size.
+					If the minimum cannot be satisfied, nothing is
+					returned.
+	BlockFindKnown
+					Try to allocate space from known free space in the volume's
+					free extent cache.
+	ReadBitmapBlock
+					Given an allocation block number, read the bitmap block that
+					contains that allocation block into a caller-supplied buffer.
+
+	ReleaseBitmapBlock
+					Release a bitmap block back into the buffer cache.
+	
+	ReadBitmapRange
+					Given an allocation block number, read a range of bitmap that
+					must begin at that allocation block into a caller supplied buffer.
+
+	ReleaseBitmapRange
+					Release and invalidate a buf_t corresponding to the bitmap
+					back into the UBC in order to prevent coherency issues.
+
+	remove_free_extent_cache
+					Remove an extent from the free extent cache.  Handles overlaps
+					with multiple extents in the cache, and handles splitting an
+					extent in the cache if the extent to be removed is in the middle
+					of a cached extent.
+	
+	add_free_extent_cache
+					Add an extent to the free extent cache.  It will merge the
+					input extent with extents already in the cache.
+	CheckUnmappedBytes
+					Check whether or not the current transaction
+					has allocated blocks that were recently freed. This may have data safety implications.
+
+
+ 
+Debug/Test Routines
+	hfs_isallocated
+					Test to see if any blocks in a range are allocated.  Journal or
+					allocation file lock must be held.
+ 
+	hfs_isallocated_scan
+					Test to see if any blocks in a range are allocated.  Releases and
+					invalidates the block used when finished.
+	 
+Optimization Routines 
+	hfs_alloc_scan_block
+					Given a starting allocation block number, figures out which physical block contains that 
+					allocation block's bit, and scans it from the starting bit until either the ending bit or
+					the end of the block.  Free space extents are inserted into the appropriate red-black tree.
+					
+*/
+
+
+#include <sys/types.h>
+#include <sys/buf.h>
+
+#if !HFS_ALLOC_TEST
+
+#include "hfs_macos_defs.h"
+#include <sys/systm.h>
+#include <sys/ubc.h>
+/* For VM Page size */
+#include <libkern/libkern.h>
+#include "hfs_journal.h"
+#include "hfs.h"
+#include "hfs_endian.h"
+#include "FileMgrInternal.h"
+
+#endif // !HFS_ALLOC_TEST
+
+#include <sys/sysctl.h>
+#include <sys/disk.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+
+#include "hfs_dbg.h"
+#include "hfs_format.h"
+#include "hfs_kdebug.h"
+#include "rangelist.h"
+#include "hfs_extents.h"
+
+/* Headers for unmap-on-mount support */
+#include <sys/disk.h>
+
+/*
+ * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
+ * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
+ * disabled by default because there can be a lot of these events,
+ * and we don't want to overwhelm the kernel debug buffer.  If you
+ * want to watch these events in particular, just set the sysctl.)
+ */
+static int hfs_kdebug_allocation = 0;
+SYSCTL_DECL(_vfs_generic);
+HFS_SYSCTL(NODE, _vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system")
+HFS_SYSCTL(NODE, _vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug")
+HFS_SYSCTL(INT, _vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations")
+enum {
+	/*
+	 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
+	 * BlockDeallocate, including the internal BlockAllocateXxx
+	 * routines so we can see how an allocation was satisfied.
+	 *
+	 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
+	 * free extent cache.
+	 *
+	 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
+	 *
+	 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
+	 * or clearing bits, scanning the bitmap).
+	 */
+	HFSDBG_ALLOC_ENABLED		= 1,
+	HFSDBG_EXT_CACHE_ENABLED	= 2,
+	HFSDBG_UNMAP_ENABLED		= 4,
+	HFSDBG_BITMAP_ENABLED		= 8
+};
+
+enum {
+	kBytesPerWord			=	4,
+	kBitsPerByte			=	8,
+	kBitsPerWord			=	32,
+
+	kBitsWithinWordMask		=	kBitsPerWord-1
+};
+
+#define kLowBitInWordMask	0x00000001ul
+#define kHighBitInWordMask	0x80000000ul
+#define kAllBitsSetInWord	0xFFFFFFFFul
+
+#define HFS_MIN_SUMMARY_BLOCKSIZE 4096
+
+#define ALLOC_DEBUG 0
+
+static OSErr ReadBitmapBlock(
+		ExtendedVCB		*vcb,
+		u_int32_t		bit,
+		u_int32_t		**buffer,
+		uintptr_t		*blockRef,
+		hfs_block_alloc_flags_t flags);
+
+static OSErr ReleaseBitmapBlock(
+		ExtendedVCB		*vcb,
+		uintptr_t		blockRef,
+		Boolean			dirty);
+
+static OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+								 HFSPlusExtentDescriptor *extent,
+								 hfs_block_alloc_flags_t flags,
+								 hfs_alloc_extra_args_t *ap);
+
+static OSErr BlockFindAny(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		endingBlock,
+		u_int32_t		maxBlocks,
+		hfs_block_alloc_flags_t flags,
+		Boolean			trustSummary,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks);
+
+static OSErr BlockFindAnyBitmap(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		endingBlock,
+		u_int32_t		maxBlocks,
+		hfs_block_alloc_flags_t flags,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks);
+
+static OSErr BlockFindContig(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		minBlocks,
+		u_int32_t		maxBlocks,
+		hfs_block_alloc_flags_t flags,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks);
+
+static OSErr BlockFindContiguous(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		endingBlock,
+		u_int32_t		minBlocks,
+		u_int32_t		maxBlocks,
+		Boolean			useMetaZone,
+		Boolean			trustSummary,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks,
+		hfs_block_alloc_flags_t flags);
+
+static OSErr BlockFindKnown(
+		ExtendedVCB		*vcb,
+		u_int32_t		maxBlocks,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks);
+
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+								HFSPlusExtentDescriptor *extent,
+								uint32_t max_blocks,
+								hfs_block_alloc_flags_t flags);
+
+static OSErr BlockMarkAllocatedInternal (
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		numBlocks,
+		hfs_block_alloc_flags_t flags);
+
+static OSErr BlockMarkFreeInternal(
+		ExtendedVCB	*vcb,
+		u_int32_t	startingBlock,
+		u_int32_t	numBlocks, 
+		Boolean 	do_validate);
+
+
+static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize,
+		uint32_t **buffer, struct buf **blockRef);
+
+static OSErr ReleaseScanBitmapRange( struct buf *bp );
+
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset, 
+		u_int32_t numBlocks, struct jnl_trim_list *list);
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list);
+
+static int hfs_alloc_scan_range(struct hfsmount *hfsmp, 
+		u_int32_t startbit, 
+		u_int32_t *bitToScan,
+		struct jnl_trim_list *list);
+
+static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize);
+static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next);
+
+/* Bitmap Re-use Detection */
+static inline int extents_overlap (uint32_t start1, uint32_t len1,
+		uint32_t start2, uint32_t len2) {
+	return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) );
+}
+
+
+int hfs_isallocated_scan (struct hfsmount *hfsmp,
+		u_int32_t startingBlock,
+		u_int32_t *bp_buf);
+
+/* Summary Table Functions */
+static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse);
+static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index);
+static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
+static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc);
+static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length);
+static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks);
+static int hfs_rebuild_summary (struct hfsmount *hfsmp);
+
+#if 0
+static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock);
+#endif
+
+/* Used in external mount code to initialize the summary table */
+int hfs_init_summary (struct hfsmount *hfsmp);
+
+#if ALLOC_DEBUG 
+void hfs_validate_summary (struct hfsmount *hfsmp);
+#endif
+
+
+/* Functions for manipulating free extent cache */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
+
+static void hfs_release_reserved(hfsmount_t *hfsmp, struct rl_entry *range, int list);
+
+/* Functions for getting free exents */
+
+typedef struct bitmap_context {
+	void			*bitmap;				// current bitmap chunk
+	uint32_t		run_offset;				// offset (in bits) from start of bitmap to start of current run
+	uint32_t		chunk_current;			// next bit to scan in the chunk
+	uint32_t		chunk_end;				// number of valid bits in this chunk
+	struct hfsmount *hfsmp;
+	struct buf		*bp;
+	uint32_t		last_free_summary_bit;	// last marked free summary bit
+	int				lockflags;
+	uint64_t		lock_start;
+} bitmap_context_t;
+
+
+static errno_t get_more_bits(bitmap_context_t *bitmap_ctx);
+static int bit_count_set(void *bitmap, int start, int end);
+static int bit_count_clr(void *bitmap, int start, int end);
+static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count);
+static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count);
+static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count);
+static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set);
+static int clzll(uint64_t x);
+
+#if ALLOC_DEBUG
+/*
+ * Validation Routine to verify that the TRIM list maintained by the journal
+ * is in good shape relative to what we think the bitmap should have.  We should
+ * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
+ * we panic.  
+ */
+int trim_validate_bitmap (struct hfsmount *hfsmp);
+int trim_validate_bitmap (struct hfsmount *hfsmp) {
+	u_int64_t blockno_offset;
+	u_int64_t numblocks;
+	int i;
+	int count;
+	u_int32_t startblk;
+	u_int32_t blks;
+	int err = 0;
+	uint32_t alloccount = 0;
+
+	if (hfsmp->jnl) {
+		struct journal *jnl = (struct journal*)hfsmp->jnl;
+		if (jnl->active_tr) {
+			struct jnl_trim_list *trim = &(jnl->active_tr->trim);
+			count = trim->extent_count;
+			for (i = 0; i < count; i++) {
+				blockno_offset = trim->extents[i].offset;
+				blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
+				blockno_offset = blockno_offset / hfsmp->blockSize;
+				numblocks = trim->extents[i].length / hfsmp->blockSize;
+
+				startblk = (u_int32_t)blockno_offset;
+				blks = (u_int32_t) numblocks;
+				err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
+
+				if (err == 0 && alloccount != 0) {
+					panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+#endif
+
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		hfs_unmap_free_extent
+ ;
+ ; Function:		Make note of a range of allocation blocks that should be
+ ;				unmapped (trimmed).  That is, the given range of blocks no
+ ;				longer have useful content, and the device can unmap the
+ ;				previous contents.  For example, a solid state disk may reuse
+ ;				the underlying storage for other blocks.
+ ;
+ ;				This routine is only supported for journaled volumes.  The extent
+ ;				being freed is passed to the journal code, and the extent will
+ ;				be unmapped after the current transaction is written to disk.
+ ;
+ ; Input Arguments:
+ ;	hfsmp			- The volume containing the allocation blocks.
+ ;	startingBlock	- The first allocation block of the extent being freed.
+ ;	numBlocks		- The number of allocation blocks of the extent being freed.
+ ;________________________________________________________________________________
+ */
+static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+	u_int64_t offset;
+	u_int64_t length;
+	u_int64_t device_sz;
+	int err = 0;
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+
+	if (ALLOC_DEBUG) {
+		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+			panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks);
+		}
+	}
+
+	if (hfsmp->jnl != NULL) {
+		device_sz = hfsmp->hfs_logical_bytes;
+		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+		length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+		/* Validate that the trim is in a valid range of bytes */
+		if ((offset >= device_sz) || ((offset + length) > device_sz)) {
+			printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length);
+			err = EINVAL;
+		}
+
+		if (err == 0) {
+			err = journal_trim_add_extent(hfsmp->jnl, offset, length);
+			if (err) {
+				printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN);
+			}
+		}
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		hfs_track_unmap_blocks
+ ;
+ ; Function:	Make note of a range of allocation blocks that should be
+ ;				unmapped (trimmed).  That is, the given range of blocks no
+ ;				longer have useful content, and the device can unmap the
+ ;				previous contents.  For example, a solid state disk may reuse
+ ;				the underlying storage for other blocks.
+ ;
+ ;				This routine is only supported for journaled volumes.  
+ ; 
+ ;              *****NOTE*****: 
+ ;              This function should *NOT* be used when the volume is fully 
+ ;              mounted.  This function is intended to support a bitmap iteration
+ ;              at mount time to fully inform the SSD driver of the state of all blocks
+ ;              at mount time, and assumes that there is no allocation/deallocation
+ ;              interference during its iteration.,
+ ;
+ ; Input Arguments:
+ ;	hfsmp			- The volume containing the allocation blocks.
+ ;	offset          - The first allocation block of the extent being freed.
+ ;	numBlocks		- The number of allocation blocks of the extent being freed.
+ ;  list            - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, 
+		u_int32_t numBlocks, struct jnl_trim_list *list) {
+
+	u_int64_t offset;
+	u_int64_t length;
+	int error = 0;
+
+	if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL) && list->allocated_count && list->extents != NULL) {
+		int extent_no = list->extent_count;
+		offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+		length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+
+		list->extents[extent_no].offset = offset;
+		list->extents[extent_no].length = length;
+		list->extent_count++;
+		if (list->extent_count == list->allocated_count) {
+			error = hfs_issue_unmap (hfsmp, list);
+		}
+	}
+
+	return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		hfs_issue_unmap
+ ;
+ ; Function:	Issue a DKIOCUNMAP for all blocks currently tracked by the jnl_trim_list
+ ;
+ ; Input Arguments:
+ ;	hfsmp			- The volume containing the allocation blocks.
+ ;  list            - The list of currently tracked trim ranges.
+ ;________________________________________________________________________________
+ */
+
+static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) 
+{
+	dk_unmap_t unmap;
+	int error = 0;
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
+	}
+
+	if (list->extent_count > 0 && list->extents != NULL) {
+		bzero(&unmap, sizeof(unmap));
+		unmap.extents = list->extents;
+		unmap.extentsCount = list->extent_count;
+
+		if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+			KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_NONE, hfsmp->hfs_raw_dev, unmap.extentsCount, 0, 0, 0);
+		}
+	
+#if CONFIG_PROTECT
+		/* 
+		 * If we have not yet completed the first scan through the bitmap, then
+		 * optionally inform the block driver below us that this is an initialization
+		 * TRIM scan, if it can deal with this information.
+		 */
+		if ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED) == 0) {
+			unmap.options |= _DK_UNMAP_INITIALIZE;	
+		}
+#endif
+		/* Issue a TRIM and flush them out */
+		error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
+
+		bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t)));
+		bzero (&unmap, sizeof(unmap));
+		list->extent_count = 0;
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN_TRIM | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
+	}
+
+	return error;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		hfs_unmap_alloc_extent
+ ;
+ ; Function:		Make note of a range of allocation blocks, some of
+ ;				which may have previously been passed to hfs_unmap_free_extent,
+ ;				is now in use on the volume.  The given blocks will be removed
+ ;				from any pending DKIOCUNMAP.
+ ;
+ ; Input Arguments:
+ ;	hfsmp			- The volume containing the allocation blocks.
+ ;	startingBlock	- The first allocation block of the extent being allocated.
+ ;	numBlocks		- The number of allocation blocks being allocated.
+ ;________________________________________________________________________________
+ */
+
+static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+	u_int64_t offset;
+	u_int64_t length;
+	int err = 0;
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+
+	if (hfsmp->jnl != NULL) {
+		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+		length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+		err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
+		if (err) {
+			printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN);
+		}
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		hfs_trim_callback
+;
+; Function:		This function is called when a transaction that freed extents
+;				(via hfs_unmap_free_extent/journal_trim_add_extent) has been
+;				written to the on-disk journal.  This routine will add those
+;				extents to the free extent cache so that they can be reused.
+;
+;				CAUTION: This routine is called while the journal's trim lock
+;				is held shared, so that no other thread can reuse any portion
+;				of those extents.  We must be very careful about which locks
+;				we take from within this callback, to avoid deadlock.  The
+;				call to add_free_extent_cache will end up taking the cache's
+;				lock (just long enough to add these extents to the cache).
+;
+;				CAUTION: If the journal becomes invalid (eg., due to an I/O
+;				error when trying to write to the journal), this callback
+;				will stop getting called, even if extents got freed before
+;				the journal became invalid!
+;
+; Input Arguments:
+;	arg				- The hfsmount of the volume containing the extents.
+;	extent_count	- The number of extents freed in the transaction.
+;	extents			- An array of extents (byte ranges) that were freed.
+;________________________________________________________________________________
+*/
+
+void
+hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
+{
+	uint32_t i;
+	uint32_t startBlock, numBlocks;
+	struct hfsmount *hfsmp = arg;
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
+
+	for (i=0; i<extent_count; ++i) {
+		/* Convert the byte range in *extents back to a range of allocation blocks. */
+		startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
+		numBlocks = extents[i].length / hfsmp->blockSize;
+		(void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+}
+
+
+/*
+   ;________________________________________________________________________________
+   ;
+   ; Routine:		CheckUnmappedBytes
+   ;
+   ; Function:	From the specified inputs, determine if the extent in question overlaps 
+   ;				space that was recently freed, where the recently freed space may still be
+   ;				lingering in an uncommitted journal transaction.  This may have data safety 
+   ;				implications.  The intended use is to decide whether or not to force a journal flush
+   ;				before allowing file data I/O to be issued.  If we did not do this
+   ;				then it would be possible to issue the file I/O ahead of the
+   ;				journal, resulting in data being overwritten if the transaction either
+   ;				is not committed or cannot be replayed.
+   ;
+   ;		NOTE: This function assumes that the journal and catalog/extent locks are held.
+   ;
+   ; Input Arguments:
+   ;	hfsmp			- The volume containing the allocation blocks.
+   ;	foffset			- start of the extent in question (in allocation blocks)
+   ;	numbytes		- number of blocks in the extent.
+   ;  recently_freed:	- output pointer containing whether or not the blocks were freed recently
+   ;  overlap_end 		- end of the overlap between the argument extent and the trim list (in allocation blocks)
+   ;
+   ; Output:
+   ;
+   ; 		Returns 0 if we could determine extent validity for this (or a previous transaction)
+   ; 		Returns errno if there was an error
+   ;
+   ;		If returned 0, then recently freed will contain a boolean that indicates
+   ;		that it was recently freed.
+   ;________________________________________________________________________________
+ */
+
+u_int32_t
+CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) {
+	uint64_t device_offset;
+	uint64_t numbytes;
+	uint32_t err = 0;
+	uint64_t lba_overlap_end;
+
+	if (hfsmp->jnl != NULL) {
+		/*
+		 * Convert the allocation block # and the number of blocks into device-relative
+		 * offsets so that they can be compared using the TRIM list.
+		 */
+		uint64_t device_sz = hfsmp->hfs_logical_bytes;
+		device_offset = blockno * ((uint64_t)hfsmp->blockSize);
+		device_offset += hfsmp->hfsPlusIOPosOffset;
+		numbytes = (((uint64_t)hfsmp->blockSize) * numblocks);
+
+		/* 
+		 * Since we check that the device_offset isn't too large, it's safe to subtract it
+		 * from the size in the second check.
+		 */
+		if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) {
+			return EINVAL;
+		}
+
+		/* Ask the journal if this extent overlaps with any pending TRIMs */
+		if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) {
+			*recently_freed = 1;
+
+			/* Convert lba_overlap_end back into allocation blocks */
+			uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset;
+			end_offset = end_offset / ((uint64_t) hfsmp->blockSize);
+			*overlap_end = (uint32_t) end_offset;
+		}
+		else {
+			*recently_freed = 0;
+		}
+		err = 0;
+	}
+	else {
+		/* There may not be a journal.  In that case, always return success.  */
+		*recently_freed = 0;
+	}
+	return err;
+
+}
+
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		ScanUnmapBlocks
+ ;
+ ; Function:	Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying
+ ;				device as needed so that the underlying disk device is as
+ ;				up-to-date as possible with which blocks are unmapped.
+ ;				Additionally build up the summary table as needed.
+ ;
+ ;				This function reads the bitmap in large block size 
+ ; 				(up to 1MB) unlike the runtime which reads the bitmap 
+ ; 				in 4K block size.  So if this function is being called 
+ ;				after the volume is mounted and actively modified, the 
+ ;				caller needs to invalidate all of the existing buffers 
+ ;				associated with the bitmap vnode before calling this 
+ ; 				function.  If the buffers are not invalidated, it can 
+ ;				cause buf_t collision and potential data corruption.
+ ;  
+ ; Input Arguments:
+ ;	hfsmp			- The volume containing the allocation blocks.
+ ;________________________________________________________________________________
+ */
+
+u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) 
+{
+	u_int32_t blocks_scanned = 0;
+	int error = 0;
+	struct jnl_trim_list trimlist;
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_START, hfsmp->hfs_raw_dev, 0, 0, 0, 0);
+	}
+
+	/*
+	 *struct jnl_trim_list {
+	 uint32_t    allocated_count;
+	 uint32_t    extent_count;
+	 dk_extent_t *extents;
+	 };
+	 */
+	bzero (&trimlist, sizeof(trimlist));
+
+	/* 
+	 * Any trim related work should be tied to whether the underlying
+	 * storage media supports UNMAP, as any solid state device would
+	 * on desktop or embedded.
+	 *
+	 * We do this because we may want to scan the full bitmap on
+	 * desktop for spinning media for the purposes of building up the
+	 * summary table.
+	 * 
+	 * We also avoid sending TRIMs down to the underlying media if the
+	 * mount is read-only.
+	 */
+
+	if ((hfsmp->hfs_flags & HFS_UNMAP) && 
+			((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+		/* If the underlying device supports unmap and the mount is read-write, initialize */
+		int alloc_count = PAGE_SIZE / sizeof(dk_extent_t);
+		void *extents = hfs_malloc(alloc_count * sizeof(dk_extent_t));
+		trimlist.extents = (dk_extent_t*)extents;
+		trimlist.allocated_count = alloc_count;
+		trimlist.extent_count = 0;
+	}
+
+	while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){
+
+		error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist);
+
+		if (error) {
+			printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN);
+			break;
+		}
+	}
+
+	if ((hfsmp->hfs_flags & HFS_UNMAP) && 
+			((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
+		if (error == 0) {
+			hfs_issue_unmap(hfsmp, &trimlist);
+		}
+		if (trimlist.extents) {
+			hfs_free(trimlist.extents, trimlist.allocated_count * sizeof(dk_extent_t));
+		}
+	}
+
+	/* 
+	 * This is in an #if block because hfs_validate_summary prototype and function body
+	 * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly.
+	 */
+#if ALLOC_DEBUG
+	sanity_check_free_ext(hfsmp, 1);
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		/* Validate the summary table too! */
+		hfs_validate_summary(hfsmp);
+		printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN);
+	}
+#endif
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_SCAN | DBG_FUNC_END, error, hfsmp->hfs_raw_dev, 0, 0, 0);
+	}
+
+	return error;
+}
+
+static void add_to_reserved_list(hfsmount_t *hfsmp, uint32_t start, 
+								 uint32_t count, int list, 
+								 struct rl_entry **reservation)
+{
+	struct rl_entry *range, *next_range;
+
+	if (list == HFS_TENTATIVE_BLOCKS) {
+		int nranges = 0;
+		// Don't allow more than 4 tentative reservations
+		TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+						   rl_link, next_range) {
+			if (++nranges > 3)
+				hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+		}
+	}
+
+	range = hfs_malloc(sizeof(*range));
+	range->rl_start = start;
+	range->rl_end = start + count - 1;
+	TAILQ_INSERT_HEAD(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+	*reservation = range;
+}
+
+static void hfs_release_reserved(hfsmount_t *hfsmp,
+								 struct rl_entry *range,
+								 int list)
+{
+	if (range->rl_start == -1)
+		return;
+
+	TAILQ_REMOVE(&hfsmp->hfs_reserved_ranges[list], range, rl_link);
+
+	if (rl_len(range) > 0) {
+		if (list == HFS_TENTATIVE_BLOCKS)
+			hfsmp->tentativeBlocks -= rl_len(range);
+		else {
+			/*
+			 * We don't need to unmap tentative blocks because we won't have
+			 * written to them, but we might have written to reserved blocks.
+			 * Nothing can refer to those blocks so this doesn't have to be
+			 * via the journal. If this proves to be too expensive, we could
+			 * consider not sending down the unmap or we could require this
+			 * to always be called within a transaction and then we can use
+			 * the journal.
+			 */
+			dk_extent_t extent = {
+				.offset = (hfs_blk_to_bytes(range->rl_start, hfsmp->blockSize)
+						   + hfsmp->hfsPlusIOPosOffset),
+				.length = hfs_blk_to_bytes(rl_len(range), hfsmp->blockSize)
+			};
+			dk_unmap_t unmap = {
+				.extents = &extent,
+				.extentsCount = 1,
+			};
+			VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap,
+					   0, vfs_context_kernel());
+			hfs_assert(hfsmp->lockedBlocks >= rl_len(range));
+			hfsmp->lockedBlocks -= rl_len(range);
+		}
+		hfs_release_summary(hfsmp, range->rl_start, rl_len(range));
+		add_free_extent_cache(hfsmp, range->rl_start, rl_len(range));
+	}
+
+	range->rl_start = -1;
+	range->rl_end   = -2;
+}
+
+static void hfs_free_locked_internal(hfsmount_t *hfsmp,
+									   struct rl_entry **reservation,
+									   int list)
+{
+	if (*reservation) {
+		hfs_release_reserved(hfsmp, *reservation, list);
+		hfs_free(*reservation, sizeof(**reservation));
+		*reservation = NULL;
+	}
+}
+
+void hfs_free_tentative(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+	hfs_free_locked_internal(hfsmp, reservation, HFS_TENTATIVE_BLOCKS);
+}
+
+void hfs_free_locked(hfsmount_t *hfsmp, struct rl_entry **reservation)
+{
+	hfs_free_locked_internal(hfsmp, reservation, HFS_LOCKED_BLOCKS);
+}
+
+OSErr BlockAllocate (
+		 hfsmount_t		*hfsmp,				/* which volume to allocate space on */
+		 u_int32_t		startingBlock,		/* preferred starting block, or 0 for no preference */
+		 u_int32_t		minBlocks,		/* desired number of blocks to allocate */
+		 u_int32_t		maxBlocks,		/* maximum number of blocks to allocate */
+		 hfs_block_alloc_flags_t flags,			/* option flags */
+		 u_int32_t		*actualStartBlock,	/* actual first block of allocation */
+		 u_int32_t		*actualNumBlocks)
+{
+	hfs_alloc_extra_args_t extra_args = {
+		.max_blocks = maxBlocks
+	};
+
+	HFSPlusExtentDescriptor extent = { startingBlock, minBlocks };
+
+	OSErr err = hfs_block_alloc_int(hfsmp, &extent, flags, &extra_args);
+
+	*actualStartBlock = extent.startBlock;
+	*actualNumBlocks  = extent.blockCount;
+
+	return err;
+}
+
+errno_t hfs_block_alloc(hfsmount_t *hfsmp,
+						HFSPlusExtentDescriptor *extent,
+						hfs_block_alloc_flags_t flags,
+						hfs_alloc_extra_args_t *ap)
+{
+	return MacToVFSError(hfs_block_alloc_int(hfsmp, extent, flags, ap));
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:	   hfs_block_alloc_int
+ ;
+ ; Function:   Allocate space on a volume.	If contiguous allocation is requested,
+ ;			   at least the requested number of bytes will be allocated or an
+ ;			   error will be returned.	If contiguous allocation is not forced,
+ ;			   the space will be allocated with the first largest extent available 
+ ;			   at the requested starting allocation block.  If there is not enough
+ ;			   room there, a block allocation of less than the requested size will be
+ ;			   allocated.
+ ;
+ ;			   If the requested starting block is 0 (for new file allocations),
+ ;			   the volume's allocation block pointer will be used as a starting
+ ;			   point.
+ ;
+ ; Input Arguments:
+ ;   hfsmp           - Pointer to the HFS mount structure.
+ ;   extent          - startBlock indicates the block to start
+ ;                     searching from and blockCount is the number of
+ ;                     blocks required.  Depending on the flags used,
+ ;                     more or less blocks may be returned.  The
+ ;                     allocated extent is returned via this
+ ;                     parameter.
+ ;   flags           - Flags to specify options like contiguous, use
+ ;                     metadata zone, skip free block check, etc.
+ ;   ap              - Additional arguments used depending on flags.
+ ;                     See hfs_alloc_extra_args_t and below.
+ ;
+ ; Output:
+ ;   (result)        - Error code, zero for successful allocation
+ ;   extent          - If successful, the allocated extent.
+ ;
+ ; Side effects:
+ ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
+ ;
+ ; HFS_ALLOC_TENTATIVE
+ ; Blocks will be reserved but not marked allocated.  They can be
+ ; stolen if free space is limited.  Tentative blocks can be used by
+ ; passing HFS_ALLOC_USE_TENTATIVE and passing in the resevation.
+ ; @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_USE_TENTATIVE
+ ; Use blocks previously returned with HFS_ALLOC_TENTATIVE.
+ ; @ap->reservation_in should be set to whatever @ap->reservation_out
+ ; was set to when HFS_ALLOC_TENTATIVE was used.  If the tentative
+ ; reservation was stolen, a normal allocation will take place.
+ ;
+ ; HFS_ALLOC_LOCKED
+ ; Blocks will be reserved but not marked allocated.  Unlike tentative
+ ; reservations they cannot be stolen.  It is safe to write to these
+ ; blocks.  @ap->reservation_out is used to store the reservation.
+ ;
+ ; HFS_ALLOC_COMMIT
+ ; This will take blocks previously returned with HFS_ALLOC_LOCKED and
+ ; mark them allocated on disk.  @ap->reservation_in is used.
+ ;
+ ; HFS_ALLOC_ROLL_BACK
+ ; Take blocks that were just recently deallocated and mark them
+ ; allocated.  This is for roll back situations.  Blocks got
+ ; deallocated and then something went wrong and we need to roll back
+ ; by marking the blocks allocated.
+ ;
+ ; HFS_ALLOC_FORCECONTIG
+ ; It will not return fewer than @min_blocks.
+ ;
+ ; HFS_ALLOC_TRY_HARD
+ ; We will perform an exhaustive search to try and find @max_blocks.
+ ; It will not return fewer than @min_blocks.
+ ;
+ ;________________________________________________________________________________
+ */
+OSErr hfs_block_alloc_int(hfsmount_t *hfsmp,
+						  HFSPlusExtentDescriptor *extent,
+						  hfs_block_alloc_flags_t flags,
+						  hfs_alloc_extra_args_t *ap)
+{
+	u_int32_t  freeBlocks;
+	OSErr			err = 0;
+	Boolean			updateAllocPtr = false;		//	true if nextAllocation needs to be updated
+	Boolean forceContiguous = false;
+	Boolean forceFlush;
+
+	uint32_t startingBlock = extent->startBlock;
+	uint32_t minBlocks = extent->blockCount;
+	uint32_t maxBlocks = (ap && ap->max_blocks) ? ap->max_blocks : minBlocks;
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
+
+	if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+		extent->startBlock = (*ap->reservation_in)->rl_start;
+		extent->blockCount = rl_len(*ap->reservation_in);
+		goto mark_allocated;
+	}
+
+	if (ISSET(flags, HFS_ALLOC_ROLL_BACK))
+		goto mark_allocated;
+
+	freeBlocks = hfs_freeblks(hfsmp, 0);
+
+	if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+		struct rl_entry *range = *ap->reservation_in;
+
+		if (range && range->rl_start != -1) {
+			/*
+			 * It's possible that we have a tentative reservation
+			 * but there aren't enough free blocks due to loaned blocks
+			 * or insufficient space in the backing store.
+			 */
+			uint32_t count = min(min(maxBlocks, rl_len(range)), freeBlocks);
+
+			if (count >= minBlocks) {
+				extent->startBlock = range->rl_start;
+				extent->blockCount = count;
+
+				// Should we go straight to commit?
+				if (!ISSET(flags, HFS_ALLOC_LOCKED))
+					SET(flags, HFS_ALLOC_COMMIT);
+
+				goto mark_allocated;
+			}
+		}
+
+		/*
+		 * We can't use the tentative reservation so free it and allocate
+		 * normally.
+		 */
+		hfs_free_tentative(hfsmp, ap->reservation_in);
+		CLR(flags, HFS_ALLOC_USE_TENTATIVE);
+	}
+
+	if (ISSET(flags, HFS_ALLOC_FORCECONTIG | HFS_ALLOC_TRY_HARD))
+		forceContiguous = true;
+
+	if (flags & HFS_ALLOC_FLUSHTXN) {
+		forceFlush = true;
+	}
+	else {
+		forceFlush = false;
+	}
+
+	hfs_assert(hfsmp->freeBlocks >= hfsmp->tentativeBlocks);
+
+	// See if we have to steal tentative blocks
+	if (freeBlocks < hfsmp->tentativeBlocks + minBlocks)
+		SET(flags, HFS_ALLOC_IGNORE_TENTATIVE);
+
+	/* Skip free block check if blocks are being allocated for relocating 
+	 * data during truncating a volume.
+	 * 
+	 * During hfs_truncatefs(), the volume free block count is updated 
+	 * before relocating data to reflect the total number of free blocks 
+	 * that will exist on the volume after resize is successful.  This 
+	 * means that we have reserved allocation blocks required for relocating 
+	 * the data and hence there is no need to check the free blocks.
+	 * It will also prevent resize failure when the number of blocks in 
+	 * an extent being relocated is more than the free blocks that will 
+	 * exist after the volume is resized.
+	 */
+	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+		//	If the disk is already full, don't bother.
+		if (freeBlocks == 0) {
+			err = dskFulErr;
+			goto exit;
+		}
+		if (forceContiguous && freeBlocks < minBlocks) {
+			err = dskFulErr;
+			goto exit;
+		}
+
+		/*
+		 * Clip if necessary so we don't over-subscribe the free blocks.
+		 */
+		if (minBlocks > freeBlocks) {
+			minBlocks = freeBlocks;
+		}
+		if (maxBlocks > freeBlocks) {
+			maxBlocks = freeBlocks;
+		}
+	}
+
+	if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+		err = hfs_alloc_try_hard(hfsmp, extent, maxBlocks, flags);
+		if (err)
+			goto exit;
+
+		goto mark_allocated;
+	}
+
+	//
+	//	If caller didn't specify a starting block number, then use the volume's
+	//	next block to allocate from.
+	//
+	if (startingBlock == 0) {
+		hfs_lock_mount (hfsmp);
+
+		/* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			startingBlock = hfsmp->sparseAllocation;
+		} 
+		else {
+			startingBlock = hfsmp->nextAllocation;
+		}
+		hfs_unlock_mount(hfsmp);
+		updateAllocPtr = true;
+	}
+
+
+	if (startingBlock >= hfsmp->allocLimit) {
+		startingBlock = 0; /* overflow so start at beginning */
+	}
+
+	//
+	//	If the request must be contiguous, then find a sequence of free blocks
+	//	that is long enough.  Otherwise, find the first free block.
+	//
+	if (forceContiguous) {
+		err = BlockFindContig(hfsmp, startingBlock, minBlocks, maxBlocks,
+				flags, &extent->startBlock, &extent->blockCount);
+		/*
+		 * If we allocated from a new position then also update the roving allocator.  
+		 * This will keep the roving allocation pointer up-to-date even 
+		 * if we are using the new R/B tree allocator, since
+		 * it doesn't matter to us here, how the underlying allocator found 
+		 * the block to vend out.
+		 */
+		if ((err == noErr) &&
+				(extent->startBlock > startingBlock) &&
+				((extent->startBlock < hfsmp->hfs_metazone_start) ||
+				 (extent->startBlock > hfsmp->hfs_metazone_end))) {
+			updateAllocPtr = true;
+		}
+	} else {					
+		/*
+		 * Scan the bitmap once, gather the N largest free extents, then
+		 * allocate from these largest extents.  Repeat as needed until
+		 * we get all the space we needed.  We could probably build up
+		 * that list when the higher level caller tried (and failed) a
+		 * contiguous allocation first.
+		 *
+		 * Note that the free-extent cache will be cease to be updated if
+		 * we are using the red-black tree for allocations.  If we jettison 
+		 * the tree, then we will reset the free-extent cache and start over.
+		 */
+
+		/* Disable HFS_ALLOC_FLUSHTXN if needed */
+		if (forceFlush) {
+			flags &= ~HFS_ALLOC_FLUSHTXN;
+		}
+
+		/* 
+		 * BlockFindKnown only examines the free extent cache; anything in there will
+		 * have been committed to stable storage already.
+		 */
+		err = BlockFindKnown(hfsmp, maxBlocks, &extent->startBlock,
+							&extent->blockCount);
+
+		/* dskFulErr out of BlockFindKnown indicates an empty Free Extent Cache */
+
+		if (err == dskFulErr) {
+			/* 
+			 * Now we have to do a bigger scan.  Start at startingBlock and go up until the
+			 * allocation limit.  We 'trust' the summary bitmap in this call, if it tells us
+			 * that it could not find any free space.
+			 */
+			err = BlockFindAny(hfsmp, startingBlock, hfsmp->allocLimit,
+					maxBlocks, flags, true, 
+					&extent->startBlock, &extent->blockCount);
+		}
+		if (err == dskFulErr) {
+			/*
+			 * Vary the behavior here if the summary table is on or off.  
+			 * If it is on, then we don't trust it it if we get into this case and
+			 * basically do a full scan for maximum coverage.
+			 * If it is off, then we trust the above and go up until the startingBlock.
+			 */
+			if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+				err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
+						flags, false, 
+						&extent->startBlock, &extent->blockCount);
+			}
+			else {
+				err = BlockFindAny(hfsmp, 1, startingBlock, maxBlocks,
+						flags, false, 
+						&extent->startBlock, &extent->blockCount);
+			}	
+
+			/*
+		     * Last Resort: Find/use blocks that may require a journal flush.
+	 		 */		 
+			if (err == dskFulErr && forceFlush) {
+				flags |= HFS_ALLOC_FLUSHTXN;
+				err = BlockFindAny(hfsmp, 1, hfsmp->allocLimit, maxBlocks,
+						flags, false, 
+						&extent->startBlock, &extent->blockCount);
+			}
+		}
+	}
+
+	if (err)
+		goto exit;
+
+mark_allocated:
+
+	// Handle alignment
+	if (ap && ap->alignment && extent->blockCount < ap->max_blocks) {
+		/*
+		 * See the comment in FileMgrInternal.h for alignment
+		 * semantics.
+		 */
+		uint32_t rounding = ((extent->blockCount + ap->alignment_offset)
+							 % ap->alignment);
+
+		// @minBlocks is still the minimum
+		if (extent->blockCount >= minBlocks + rounding)
+			extent->blockCount -= rounding;
+	}
+
+	err = BlockMarkAllocatedInternal(hfsmp, extent->startBlock,
+									 extent->blockCount, flags);
+
+	if (err)
+		goto exit;
+
+	if (ISSET(hfsmp->hfs_flags, HFS_CS) && extent->blockCount != 0
+		&& !ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+		if (ISSET(flags, HFS_ALLOC_FAST_DEV)) {
+#if !HFS_ALLOC_TEST        /* need this guard because this file is compiled outside of the kernel */
+			hfs_pin_block_range(hfsmp, HFS_PIN_IT,
+								extent->startBlock, extent->blockCount);
+#endif
+		} else {
+			_dk_cs_map_t cm = {
+				.cm_extent = {
+					(hfs_blk_to_bytes(extent->startBlock, hfsmp->blockSize)
+					 + hfsmp->hfsPlusIOPosOffset),
+					hfs_blk_to_bytes(extent->blockCount, hfsmp->blockSize)
+				}
+			};
+
+			errno_t err2 = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP,
+									  (caddr_t)&cm, 0, vfs_context_current());
+
+			/*
+			 * Ignore errors for now; we are fully provisioned so in
+			 * theory CoreStorage should be able to handle this
+			 * allocation.  Should we want to change this in future, then
+			 * we should think carefully how we handle errors.  Allowing
+			 * CoreStorage to truncate our allocation is problematic
+			 * because we might have minimum and alignment requirements
+			 * and backing out changes we have already made is
+			 * non-trivial.
+			 */
+
+			if (err2 || cm.cm_bytes_mapped < cm.cm_extent.length) {
+				printf("hfs: _DKIOCCSMAP error: %d, bytes_mapped: %llu\n",
+					   err2, cm.cm_bytes_mapped);
+			}
+		}
+	}
+
+	// if we actually allocated something then go update the
+	// various bits of state that we maintain regardless of
+	// whether there was an error (i.e. partial allocations
+	// still need to update things like the free block count).
+	//
+	if (extent->blockCount != 0) {
+		//
+		//	If we used the volume's roving allocation pointer, then we need to update it.
+		//	Adding in the length of the current allocation might reduce the next allocate
+		//	call by avoiding a re-scan of the already allocated space.  However, the clump
+		//	just allocated can quite conceivably end up being truncated or released when
+		//	the file is closed or its EOF changed.  Leaving the allocation pointer at the
+		//	start of the last allocation will avoid unnecessary fragmentation in this case.
+		//
+		hfs_lock_mount (hfsmp);
+
+		if (!ISSET(flags, HFS_ALLOC_USE_TENTATIVE | HFS_ALLOC_COMMIT)) {
+			lck_spin_lock(&hfsmp->vcbFreeExtLock);
+			if (hfsmp->vcbFreeExtCnt == 0 && hfsmp->hfs_freed_block_count == 0) {
+				hfsmp->sparseAllocation = extent->startBlock;
+			}
+			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+			if (extent->blockCount < hfsmp->hfs_freed_block_count) {
+				hfsmp->hfs_freed_block_count -= extent->blockCount;
+			} else {
+				hfsmp->hfs_freed_block_count = 0;
+			}
+
+			if (updateAllocPtr &&
+				((extent->startBlock < hfsmp->hfs_metazone_start) ||
+				 (extent->startBlock > hfsmp->hfs_metazone_end))) {
+				HFS_UPDATE_NEXT_ALLOCATION(hfsmp, extent->startBlock);
+			}
+
+			(void) remove_free_extent_cache(hfsmp, extent->startBlock, extent->blockCount);
+		}
+
+		if (ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+			(*ap->reservation_in)->rl_start += extent->blockCount;
+			hfsmp->tentativeBlocks -= extent->blockCount;
+			if (rl_len(*ap->reservation_in) <= 0)
+				hfs_free_tentative(hfsmp, ap->reservation_in);
+		} else if (ISSET(flags, HFS_ALLOC_COMMIT)) {
+			// Handle committing locked extents
+			hfs_assert(hfsmp->lockedBlocks >= extent->blockCount);
+			(*ap->reservation_in)->rl_start += extent->blockCount;
+			hfsmp->lockedBlocks -= extent->blockCount;
+			hfs_free_locked(hfsmp, ap->reservation_in);
+		}
+
+		/* 
+		 * Update the number of free blocks on the volume 
+		 *
+		 * Skip updating the free blocks count if the block are 
+		 * being allocated to relocate data as part of hfs_truncatefs()
+		 */
+
+		if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+			hfsmp->tentativeBlocks += extent->blockCount;
+		} else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+			hfsmp->lockedBlocks += extent->blockCount;
+		} else if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+			hfsmp->freeBlocks -= extent->blockCount;
+		}
+		MarkVCBDirty(hfsmp);
+		hfs_unlock_mount(hfsmp);
+
+		hfs_generate_volume_notifications(hfsmp);
+
+		if (ISSET(flags, HFS_ALLOC_TENTATIVE)) {
+			hfs_assert(ap);
+			add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount,
+								 0, ap->reservation_out);
+		} else if (ISSET(flags, HFS_ALLOC_LOCKED)) {
+			hfs_assert(ap);
+			add_to_reserved_list(hfsmp, extent->startBlock, extent->blockCount, 
+								 1, ap->reservation_out);
+		}
+
+		if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+			/*
+			 * See if we used tentative blocks.  Note that we cannot
+			 * free the reservations here because we don't have access
+			 * to the external pointers.  All we can do is update the
+			 * reservations and they'll be cleaned up when whatever is
+			 * holding the pointers calls us back.
+			 *
+			 * We use the rangelist code to detect overlaps and
+			 * constrain the tentative block allocation.  Note that
+			 * @end is inclusive so that our rangelist code will
+			 * resolve the various cases for us.  As a result, we need
+			 * to ensure that we account for it properly when removing
+			 * the blocks from the tentative count in the mount point
+			 * and re-inserting the remainder (either head or tail)
+			 */
+			struct rl_entry *range, *next_range;
+			struct rl_head *ranges = &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS];
+			const uint32_t start = extent->startBlock;
+			const uint32_t end = start + extent->blockCount - 1;
+			TAILQ_FOREACH_SAFE(range, ranges, rl_link, next_range) {
+				switch (rl_overlap(range, start, end)) {
+					case RL_OVERLAPCONTAINSRANGE:
+						// Keep the bigger part
+						if (start - range->rl_start > range->rl_end - end) {
+							// Discard the tail
+							hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+							hfs_release_summary(hfsmp, end + 1, range->rl_end - end);
+							const uint32_t old_end = range->rl_end;
+							range->rl_end = start - 1;
+							add_free_extent_cache(hfsmp, end + 1, old_end - end);
+						} else {
+							// Discard the head
+							hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+							hfs_release_summary(hfsmp, range->rl_start,
+												start - range->rl_start);
+							const uint32_t old_start = range->rl_start;
+							range->rl_start = end + 1;
+							add_free_extent_cache(hfsmp, old_start,
+												  start - old_start);
+						}
+						hfs_assert(range->rl_end >= range->rl_start);
+						break;
+					case RL_MATCHINGOVERLAP:
+					case RL_OVERLAPISCONTAINED:
+						hfsmp->tentativeBlocks -= rl_len(range);
+						range->rl_end = range->rl_start - 1;
+						hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+						break;
+					case RL_OVERLAPSTARTSBEFORE:
+						hfsmp->tentativeBlocks -= range->rl_end + 1 - start;
+						range->rl_end = start - 1;
+						hfs_assert(range->rl_end >= range->rl_start);
+						break;
+					case RL_OVERLAPENDSAFTER:
+						hfsmp->tentativeBlocks -= end + 1 - range->rl_start;
+						range->rl_start = end + 1;
+						hfs_assert(range->rl_end >= range->rl_start);
+						break;
+					case RL_NOOVERLAP:
+						break;
+				}
+			}
+		}
+	}
+
+exit:
+
+	if (ALLOC_DEBUG) {
+		if (err == noErr) {
+			if (extent->startBlock >= hfsmp->totalBlocks) {
+				panic ("BlockAllocate: vending invalid blocks!");
+			}
+			if (extent->startBlock >= hfsmp->allocLimit) {
+				panic ("BlockAllocate: vending block past allocLimit!");
+			}
+
+			if ((extent->startBlock + extent->blockCount) >= hfsmp->totalBlocks) {	
+				panic ("BlockAllocate: vending too many invalid blocks!");
+			}
+
+			if ((extent->startBlock + extent->blockCount) >= hfsmp->allocLimit) {	
+				panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
+			}
+		}
+	}
+
+	if (err) {
+		// Just to be safe...
+		extent->startBlock = 0;
+		extent->blockCount = 0;
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, extent->startBlock, extent->blockCount, 0, 0);
+
+	return err;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:	   BlockDeallocate
+;
+; Function:    Update the bitmap to deallocate a run of disk allocation blocks
+;
+; Input Arguments:
+;	 vcb		- Pointer to ExtendedVCB for the volume to free space on
+;	 firstBlock	- First allocation block to be freed
+;	 numBlocks	- Number of allocation blocks to free up (must be > 0!)
+;
+; Output:
+;	 (result)	- Result code
+;
+; Side effects:
+;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
+;	 The Allocator's red-black trees may also be modified as a result.
+;
+;________________________________________________________________________________
+*/
+
+OSErr BlockDeallocate (
+		ExtendedVCB		*vcb,			//	Which volume to deallocate space on
+		u_int32_t		firstBlock,		//	First block in range to deallocate
+		u_int32_t		numBlocks, 		//	Number of contiguous blocks to deallocate
+		hfs_block_alloc_flags_t flags)
+{
+	if (ISSET(flags, HFS_ALLOC_TENTATIVE | HFS_ALLOC_LOCKED))
+		return 0;
+
+	OSErr			err;
+	struct hfsmount *hfsmp;
+	hfsmp = VCBTOHFS(vcb);
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
+
+	//
+	//	If no blocks to deallocate, then exit early
+	//
+	if (numBlocks == 0) {
+		err = noErr;
+		goto Exit;
+	}
+
+
+	if (ALLOC_DEBUG) {
+		if (firstBlock >= hfsmp->totalBlocks) {
+			panic ("BlockDeallocate: freeing invalid blocks!");
+		}
+
+		if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {	
+			panic ("BlockDeallocate: freeing too many invalid blocks!");
+		}			
+	}
+
+	/*
+	 * If we're using the summary bitmap, then try to mark the bits
+	 * as potentially usable/free before actually deallocating them.
+	 * It is better to be slightly speculative here for correctness.
+	 */
+
+	(void) hfs_release_summary (hfsmp, firstBlock, numBlocks);
+
+	err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
+
+	if (err) {
+		goto Exit;
+	}
+
+	//
+	//	Update the volume's free block count, and mark the VCB as dirty.
+	//
+	hfs_lock_mount(hfsmp);
+	/* 
+	 * Do not update the free block count.  This flags is specified 
+	 * when a volume is being truncated.  
+	 */
+	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
+		vcb->freeBlocks += numBlocks;
+	}
+
+	vcb->hfs_freed_block_count += numBlocks;
+
+	if (vcb->nextAllocation == (firstBlock + numBlocks)) {
+		HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
+	}
+
+	if (hfsmp->jnl == NULL) {
+		/*
+		 * In the journal case, we'll add the free extent once the journal
+		 * calls us back to tell us it wrote the transaction to disk.
+		 */
+		(void) add_free_extent_cache(vcb, firstBlock, numBlocks);
+
+		/*
+		 * If the journal case, we'll only update sparseAllocation once the
+		 * free extent cache becomes empty (when we remove the last entry
+		 * from the cache).  Skipping it here means we're less likely to
+		 * find a recently freed extent via the bitmap before it gets added
+		 * to the free extent cache.
+		 */
+		if (firstBlock < vcb->sparseAllocation) {
+			vcb->sparseAllocation = firstBlock;
+		}
+	}
+
+	MarkVCBDirty(vcb);
+	hfs_unlock_mount(hfsmp);
+
+	hfs_generate_volume_notifications(VCBTOHFS(vcb));
+Exit:
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+	return err;
+}
+
+
+u_int8_t freebitcount[16] = {
+	4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
+	3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
+};
+
+u_int32_t
+MetaZoneFreeBlocks(ExtendedVCB *vcb)
+{
+	u_int32_t freeblocks;
+	u_int32_t *currCache;
+	uintptr_t blockRef;
+	u_int32_t bit;
+	u_int32_t lastbit;
+	int bytesleft;
+	int bytesperblock;
+	u_int8_t byte;
+	u_int8_t *buffer;
+
+	blockRef = 0;
+	bytesleft = freeblocks = 0;
+	buffer = NULL;
+	bit = VCBTOHFS(vcb)->hfs_metazone_start;
+	if (bit == 1)
+		bit = 0;
+
+	lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
+	bytesperblock = vcb->vcbVBMIOSize;
+
+	/*
+	 *  Count all the bits from bit to lastbit.
+	 */
+	while (bit < lastbit) {
+		/*
+		 *  Get next bitmap block.
+		 */
+		if (bytesleft == 0) {
+			if (blockRef) {
+				(void) ReleaseBitmapBlock(vcb, blockRef, false);
+				blockRef = 0;
+			}
+			if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef, 
+								HFS_ALLOC_IGNORE_TENTATIVE) != 0) {
+				return (0);
+			}
+			buffer = (u_int8_t *)currCache;
+			bytesleft = bytesperblock;
+		}
+		byte = *buffer++;
+		freeblocks += freebitcount[byte & 0x0F];
+		freeblocks += freebitcount[(byte >> 4) & 0x0F];
+		bit += kBitsPerByte;
+		--bytesleft;
+	}
+	if (blockRef)
+		(void) ReleaseBitmapBlock(vcb, blockRef, false);
+
+	return (freeblocks);
+}
+
+
+/*
+ * Obtain the next allocation block (bit) that's
+ * outside the metadata allocation zone.
+ */
+static u_int32_t NextBitmapBlock(
+		ExtendedVCB		*vcb,
+		u_int32_t		bit)
+{
+	struct  hfsmount *hfsmp = VCBTOHFS(vcb);
+
+	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
+		return (bit);
+	/*
+	 * Skip over metadata allocation zone.
+	 */
+	if ((bit >= hfsmp->hfs_metazone_start) &&
+			(bit <= hfsmp->hfs_metazone_end)) {
+		bit = hfsmp->hfs_metazone_end + 1;
+	}
+	return (bit);
+}
+
+
+// Assumes @bitmap is aligned to 8 bytes and multiple of 8 bytes.
+static void bits_set(void *bitmap, int start, int end)
+{
+	const int start_bit = start & 63;
+	const int end_bit   = end   & 63;
+
+#define LEFT_MASK(bit)	OSSwapHostToBigInt64(0xffffffffffffffffull << (64 - bit))
+#define RIGHT_MASK(bit)	OSSwapHostToBigInt64(0xffffffffffffffffull >> bit)
+
+	uint64_t *p = (uint64_t *)bitmap + start / 64;
+
+	if ((start & ~63) == (end & ~63)) {
+		// Start and end in same 64 bits
+		*p |= RIGHT_MASK(start_bit) & LEFT_MASK(end_bit);
+	} else {
+		*p++ |= RIGHT_MASK(start_bit);
+
+		int nquads = (end - end_bit - start - 1) / 64;
+
+		while (nquads--)
+			*p++ = 0xffffffffffffffffull;
+
+		if (end_bit)
+			*p |= LEFT_MASK(end_bit);
+	}
+}
+
+// Modifies the buffer and applies any reservations that we might have
+static buf_t process_reservations(hfsmount_t *hfsmp, buf_t bp, off_t offset,
+								  hfs_block_alloc_flags_t flags,
+								  bool always_copy)
+{
+	bool taken_copy = false;
+	void *buffer = (void *)buf_dataptr(bp);
+	const uint32_t nbytes = buf_count(bp);
+	const off_t end = offset + nbytes * 8 - 1;
+
+	for (int i = (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+				  ? HFS_LOCKED_BLOCKS : HFS_TENTATIVE_BLOCKS); i < 2; ++i) {
+		struct rl_entry *entry;
+		TAILQ_FOREACH(entry, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+			uint32_t a, b;
+
+			enum rl_overlaptype overlap_type = rl_overlap(entry, offset, end);
+
+			if (overlap_type == RL_NOOVERLAP)
+				continue;
+
+			/*
+			 * If always_copy is false, we only take a copy if B_LOCKED is
+			 * set because ReleaseScanBitmapRange doesn't invalidate the
+			 * buffer in that case.
+			 */
+			if (!taken_copy && (always_copy || ISSET(buf_flags(bp), B_LOCKED))) {
+				buf_t new_bp = buf_create_shadow(bp, true, 0, NULL, NULL);
+				buf_brelse(bp);
+				bp = new_bp;
+				buf_setflags(bp, B_NOCACHE);
+				buffer = (void *)buf_dataptr(bp);
+				taken_copy = true;
+			}
+
+			switch (overlap_type) {
+			case RL_OVERLAPCONTAINSRANGE:
+			case RL_MATCHINGOVERLAP:
+				memset(buffer, 0xff, nbytes);
+				return bp;
+			case RL_OVERLAPISCONTAINED:
+				a = entry->rl_start;
+				b = entry->rl_end;
+				break;
+			case RL_OVERLAPSTARTSBEFORE:
+				a = offset;
+				b = entry->rl_end;
+				break;
+			case RL_OVERLAPENDSAFTER:
+				a = entry->rl_start;
+				b = end;
+				break;
+			case RL_NOOVERLAP:
+				__builtin_unreachable();
+			}
+
+			a -= offset;
+			b -= offset;
+
+			hfs_assert(a < buf_count(bp) * 8);
+			hfs_assert(b < buf_count(bp) * 8);
+			hfs_assert(b >= a);
+
+			// b is inclusive
+			bits_set(buffer, a, b + 1);
+		}
+	} // for (;;)
+
+	return bp;
+}
+
+/*
+;_______________________________________________________________________
+;
+; Routine:	ReadBitmapBlock
+;
+; Function:	Read in a bitmap block corresponding to a given allocation
+;			block (bit).  Return a pointer to the bitmap block.
+;
+; Inputs:
+;	vcb			--	Pointer to ExtendedVCB
+;	bit			--	Allocation block whose bitmap block is desired
+;
+; Outputs:
+;	buffer		--	Pointer to bitmap block corresonding to "block"
+;	blockRef
+;_______________________________________________________________________
+*/
+static OSErr ReadBitmapBlock(ExtendedVCB		*vcb,
+							 u_int32_t		bit,
+							 u_int32_t		**buffer,
+							 uintptr_t		*blockRef,
+							 hfs_block_alloc_flags_t flags)
+{
+	OSErr			err;
+	struct buf *bp = NULL;
+	struct vnode *vp = NULL;
+	daddr64_t block;
+	u_int32_t blockSize;
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
+
+	/*
+	 * volume bitmap blocks are protected by the allocation file lock
+	 */
+	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);	
+
+	blockSize = (u_int32_t)vcb->vcbVBMIOSize;
+	block = (daddr64_t)(bit / (blockSize * kBitsPerByte));
+
+	/* HFS+ / HFSX */
+	if (vcb->vcbSigWord != kHFSSigWord) {
+		vp = vcb->hfs_allocation_vp;	/* use allocation file vnode */
+	} 
+#if CONFIG_HFS_STD
+	else {
+		/* HFS Standard */	
+		vp = VCBTOHFS(vcb)->hfs_devvp;	/* use device I/O vnode */
+		block += vcb->vcbVBMSt;			/* map to physical block */
+	}
+#endif
+
+	err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);
+
+	if (bp) {
+		if (err) {
+			buf_brelse(bp);
+			*blockRef = 0;
+			*buffer = NULL;
+		} else {
+			if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED)) {
+				bp = process_reservations(vcb, bp, block * blockSize * 8,
+										  flags, /* always_copy: */ true);
+			}
+
+			buf_setfsprivate(bp, (void *)(uintptr_t)flags);
+
+			*blockRef = (uintptr_t)bp;
+			*buffer = (u_int32_t *)buf_dataptr(bp);
+		}
+	} else
+		hfs_assert(err);
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+	return err;
+}
+
+
+/*
+;_______________________________________________________________________
+;
+; Routine:	ReadBitmapRange
+;
+; Function:	Read in a range of the bitmap starting at the given offset. 
+;			Use the supplied size to determine the amount of I/O to generate
+;			against the bitmap file. Return a pointer to the bitmap block.
+;
+; Inputs:
+;	hfsmp		--	Pointer to hfs mount
+;	offset		--	byte offset into the bitmap file 
+;	size		--  How much I/O to generate against the bitmap file.
+;
+; Outputs:
+;	buffer		--	Pointer to bitmap block data corresonding to "block"
+;	blockRef	--  struct 'buf' pointer which MUST be released in a subsequent call.
+;_______________________________________________________________________
+*/
+static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset,
+		uint32_t iosize, uint32_t **buffer, struct buf **blockRef)
+{
+
+	OSErr			err;
+	struct buf *bp = NULL;
+	struct vnode *vp = NULL;
+	daddr64_t block;
+
+	/* This function isn't supported for HFS standard */
+	if (hfsmp->vcbSigWord != kHFSPlusSigWord) {
+		return EINVAL;
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0);
+	}
+
+	/*
+	 * volume bitmap blocks are protected by the allocation file lock
+	 */
+	REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
+
+	vp = hfsmp->hfs_allocation_vp;	/* use allocation file vnode */
+
+	/*
+	 * The byte offset argument must be converted into bitmap-relative logical 
+	 * block numbers before using it in buf_meta_bread.
+	 * 
+	 * buf_meta_bread (and the things it calls) will eventually try to
+	 * reconstruct the byte offset into the file by multiplying the logical 
+	 * block number passed in below by the vcbVBMIOSize field in the mount
+	 * point.  So we prepare for that by converting the byte offset back into
+	 * logical blocks in terms of VBMIOSize units.
+	 * 
+	 * The amount of I/O requested and the byte offset should be computed 
+	 * based on the helper function in the frame that called us, so we can
+	 * get away with just doing a simple divide here.
+	 */
+	block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize);
+
+	err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp);
+
+	if (bp) {
+		if (err) {
+			buf_brelse(bp);
+			*blockRef = 0;
+			*buffer = NULL;
+		} else {
+			bp = process_reservations(hfsmp, bp, (offset * 8), 0,
+									  /* always_copy: */ false);
+
+			*blockRef = bp;
+			*buffer = (u_int32_t *)buf_dataptr(bp);
+		}
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0);
+	}
+
+	return err;
+}
+
+
+/*
+;_______________________________________________________________________
+;
+; Routine:	ReleaseBitmapBlock
+;
+; Function:	Relase a bitmap block. 
+;
+; Inputs:
+;	vcb
+;	blockRef
+;	dirty
+;_______________________________________________________________________
+*/
+static OSErr ReleaseBitmapBlock(
+		ExtendedVCB		*vcb,
+		uintptr_t		blockRef,
+		Boolean			dirty)
+{
+	struct buf *bp = (struct buf *)blockRef;
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
+
+	if (blockRef == 0) {
+		if (dirty)
+			panic("hfs: ReleaseBitmapBlock: missing bp");
+		return (0);
+	}
+
+	if (bp) {
+		if (dirty) {
+			hfs_block_alloc_flags_t flags = (uintptr_t)buf_fsprivate(bp);
+
+			if (!ISSET(flags, HFS_ALLOC_IGNORE_RESERVED))
+				panic("Modified read-only bitmap buffer!");
+
+			struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+			if (hfsmp->jnl) {
+				journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+			} else {
+				buf_bdwrite(bp);
+			}
+		} else {
+			buf_brelse(bp);
+		}
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+	return (0);
+}
+
+/*
+ * ReleaseScanBitmapRange
+ *
+ * This is used to release struct bufs that were created for use by 
+ * bitmap scanning code.  Because they may be of sizes different than the
+ * typical runtime manipulation code, we want to force them to be purged out 
+ * of the buffer cache ASAP, so we'll release them differently than in the 
+ * ReleaseBitmapBlock case.  
+ *
+ * Additionally, because we know that we're only reading the blocks and that they
+ * should have been clean prior to reading them, we will never 
+ * issue a write to them (thus dirtying them).
+ */
+
+static OSErr ReleaseScanBitmapRange(struct buf *bp ) {
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0);
+	}
+
+	if (bp) {
+		/* Mark the buffer invalid if it isn't locked, then release it */
+		if ((buf_flags(bp) & B_LOCKED) == 0) {
+			buf_markinvalid(bp);
+		}
+		buf_brelse(bp);
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) {
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0);
+	}
+
+	return (0);
+}
+
+/* 
+ * @extent.startBlock, on input, contains a preferred block for the
+ * allocation.  @extent.blockCount, on input, contains the minimum
+ * number of blocks acceptable.  Upon success, the result is conveyed
+ * in @extent.
+ */
+static OSErr hfs_alloc_try_hard(hfsmount_t *hfsmp,
+								HFSPlusExtentDescriptor *extent,
+								uint32_t max_blocks,
+								hfs_block_alloc_flags_t flags)
+{
+	OSErr err = dskFulErr;
+
+	const uint32_t min_blocks = extent->blockCount;
+
+	// It's > rather than >= because the last block is always reserved
+	if (extent->startBlock > 0 && extent->startBlock < hfsmp->allocLimit
+		&& hfsmp->allocLimit - extent->startBlock > max_blocks) {
+		/*
+		 * This is just checking to see if there's an extent starting
+		 * at extent->startBlock that will suit.  We only check for
+		 * @max_blocks here; @min_blocks is ignored.
+		 */
+
+		err = BlockFindContiguous(hfsmp, extent->startBlock, extent->startBlock + max_blocks,
+								  max_blocks, max_blocks, true, true,
+								  &extent->startBlock, &extent->blockCount, flags);
+
+		if (err != dskFulErr)
+			return err;
+	}
+
+	err = BlockFindKnown(hfsmp, max_blocks, &extent->startBlock,
+						&extent->blockCount);
+
+	if (!err) {
+		if (extent->blockCount >= max_blocks)
+			return 0;
+	} else if (err != dskFulErr)
+		return err;
+
+	// Try a more exhaustive search
+	return BlockFindContiguous(hfsmp, 1, hfsmp->allocLimit,
+							   min_blocks, max_blocks,
+							   /* useMetaZone: */ true,
+							   /* trustSummary: */ true,
+							   &extent->startBlock, &extent->blockCount, flags);
+}
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockFindContig
+
+Function:   Find a contiguous group of allocation blocks.  If the
+			minimum cannot be satisfied, nothing is returned.  The
+			caller guarantees that there are enough free blocks
+			(though they may not be contiguous, in which case this
+			call will fail).
+
+Inputs:
+	vcb				Pointer to volume where space is to be allocated
+	startingBlock	Preferred first block for allocation
+	minBlocks		Minimum number of contiguous blocks to allocate
+	maxBlocks		Maximum number of contiguous blocks to allocate
+	flags
+
+Outputs:
+	actualStartBlock	First block of range allocated, or 0 if error
+	actualNumBlocks		Number of blocks allocated, or 0 if error
+_______________________________________________________________________
+*/
+static OSErr BlockFindContig(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		minBlocks,
+		u_int32_t		maxBlocks,
+		hfs_block_alloc_flags_t flags,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks)
+{
+	OSErr retval = noErr;
+	uint32_t currentStart = startingBlock;
+
+	uint32_t foundStart = 0; // values to emit to caller
+	uint32_t foundCount = 0;
+
+	uint32_t collision_start = 0;  // if we have to re-allocate a recently deleted extent, use this
+	uint32_t collision_count = 0;
+
+	int err;
+	int allowReuse = (flags & HFS_ALLOC_FLUSHTXN);
+	Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
+
+	int recently_deleted = 0;
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
+
+	while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) {
+
+		/* Try and find something that works. */
+
+		/*
+		 * NOTE: If the only contiguous free extent of at least minBlocks
+		 * crosses startingBlock (i.e. starts before, ends after), then we
+		 * won't find it. Earlier versions *did* find this case by letting
+		 * the second search look past startingBlock by minBlocks.  But
+		 * with the free extent cache, this can lead to duplicate entries
+		 * in the cache, causing the same blocks to be allocated twice.
+		 */
+		retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, 
+				maxBlocks, useMetaZone, true, &foundStart, &foundCount, flags);
+
+		if (retval == dskFulErr && currentStart != 0) {
+			/*
+			 * We constrain the endingBlock so we don't bother looking for ranges
+			 * that would overlap those found in the previous call, if the summary bitmap
+			 * is not on for this volume.  If it is, then we assume that it was not trust
+			 * -worthy and do a full scan.
+			 */
+			if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+				retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, 
+						maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+			}
+			else {
+				retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks,
+						maxBlocks, useMetaZone, false, &foundStart, &foundCount, flags);
+			}
+		}
+
+		if (retval != noErr) {
+			goto bailout;
+		}
+
+		/* Do we overlap with the recently found collision extent? */
+		if (collision_start) {
+			if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) {
+				/* 
+				 * We've looped around, and the only thing we could use was the collision extent.
+				 * Since we are allowed to use it, go ahead and do so now.
+				 */
+				if(allowReuse) {
+					/* 
+					 * then we couldn't find anything except values which might have been 
+					 * recently deallocated. just return our cached value if we are allowed to.
+					 */
+					foundStart = collision_start;
+					foundCount = collision_count;
+					goto bailout;
+				}
+				else {
+					/* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */
+					retval = dskFulErr;
+					goto bailout;
+				}	
+			}
+		}
+
+		/* OK, we know we must not have collided . See if this one is recently deleted */
+		if (hfsmp->jnl) {
+			recently_deleted = 0;
+			uint32_t nextStart;
+			err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart,
+					(uint64_t) foundCount, &recently_deleted, &nextStart);
+			if (err == 0) {
+				if(recently_deleted != 0) {
+					/* 
+					 * these blocks were recently deleted/deallocated.  Cache the extent, but
+					 * but keep searching to see if we can find one that won't collide here. 
+					 */
+					if (collision_start == 0) {
+						collision_start = foundStart;
+						collision_count = foundCount;
+					}
+					recently_deleted = 0;
+
+					/* 
+					 * advance currentStart to the point just past the overlap we just found. Note that 
+					 * we will automatically loop around to start of the bitmap as needed.
+					 */
+					currentStart = nextStart;
+					/* Unset foundStart/Count to allow it to loop around again. */
+					foundStart = 0;
+					foundCount = 0;
+				}
+			}
+		} // end jnl/deleted case
+
+		/* 
+		 * If we found something good, we'd break out of the loop at the top; foundCount
+		 * and foundStart should be set.
+		 */
+
+	} // end while loop. 
+
+bailout:
+
+	if (retval == noErr) {
+		*actualStartBlock = foundStart;
+		*actualNumBlocks = foundCount;
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_FIND_CONTIG_BITMAP | DBG_FUNC_END, foundStart, foundCount, retval, 0, 0);
+
+	return retval;
+
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockFindAny
+
+Function: Find one or more allocation blocks and may return fewer than
+          requested.  The caller guarantees that there is at least one
+          free block.
+
+Inputs:
+	vcb				Pointer to volume where space is to be allocated
+	startingBlock	Preferred first block for allocation
+	endingBlock		Last block to check + 1
+	maxBlocks		Maximum number of contiguous blocks to allocate
+	useMetaZone
+
+Outputs:
+	actualStartBlock	First block of range allocated, or 0 if error
+	actualNumBlocks		Number of blocks allocated, or 0 if error
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindAny(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		register u_int32_t	endingBlock,
+		u_int32_t		maxBlocks,
+		hfs_block_alloc_flags_t flags,
+		Boolean			trustSummary,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks)
+{
+
+	/*
+	 * If it is enabled, scan through the summary table to find the first free block.
+	 *
+	 * If it reports that there are not any free blocks, we could have a false
+	 * positive, so in that case, use the input arguments as a pass through.
+	 */
+	uint32_t start_blk  = startingBlock;
+	uint32_t end_blk = endingBlock;
+	struct hfsmount *hfsmp;
+	OSErr err;
+
+	hfsmp = (struct hfsmount*)vcb;
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		uint32_t suggested_start;
+
+		/* 
+		 * If the summary table is enabled, scan through it to find the first free 
+		 * block.  If there was an error, or we couldn't find anything free in the
+		 * summary table, then just leave the start_blk fields unmodified. We wouldn't
+		 * have gotten to this point if the mount point made it look like there was possibly
+		 * free space in the FS. 
+		 */
+		err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start);
+		if (err == 0) {
+			start_blk = suggested_start;
+		}
+		else {
+			/* Differentiate between ENOSPC and a more esoteric error in the above call. */
+			if ((err == ENOSPC) && (trustSummary)) {
+				/* 
+				 * The 'trustSummary' argument is for doing a full scan if we really
+				 * really, need the space and we think it's somewhere but can't find it in the
+				 * summary table. If it's true, then we trust the summary table and return 
+				 * dskFulErr if we couldn't find it above.
+				 */
+				return dskFulErr;
+			}
+			/* 
+			 * If either trustSummary was false or we got a different errno, then we
+			 * want to fall through to the real bitmap single i/o code...
+			 */ 
+		}
+	}
+
+	err =  BlockFindAnyBitmap(vcb, start_blk, end_blk, maxBlocks,
+			flags, actualStartBlock, actualNumBlocks);
+
+	return err;
+}
+
+
+/*
+ * BlockFindAnyBitmap finds free ranges by scanning the bitmap to
+ * figure out where the free allocation blocks are.  Inputs and
+ * outputs are the same as for BlockFindAny.
+ */
+
+static OSErr BlockFindAnyBitmap(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		register u_int32_t	endingBlock,
+		u_int32_t		maxBlocks,
+		hfs_block_alloc_flags_t flags,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks)
+{
+	OSErr			err;
+	register u_int32_t	block = 0;		//	current block number
+	register u_int32_t	currentWord;	//	Pointer to current word within bitmap block
+	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
+	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
+	u_int32_t  *buffer = NULL;
+	u_int32_t  *currCache = NULL;
+	uintptr_t  blockRef = 0;
+	u_int32_t  bitsPerBlock;
+	u_int32_t  wordsPerBlock;
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+	Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE);
+	Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN);
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
+
+restartSearchAny:
+
+	/*
+	 * When we're skipping the metadata zone and the start/end
+	 * range overlaps with the metadata zone then adjust the 
+	 * start to be outside of the metadata zone.  If the range
+	 * is entirely inside the metadata zone then we can deny the
+	 * request (dskFulErr).
+	 */
+	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
+		if (startingBlock <= vcb->hfs_metazone_end) {
+			if (endingBlock > (vcb->hfs_metazone_end + 2))
+				startingBlock = vcb->hfs_metazone_end + 1;
+			else {
+				err = dskFulErr;
+				goto Exit;
+			}
+		}
+	}
+
+	//	Since this routine doesn't wrap around
+	if (maxBlocks > (endingBlock - startingBlock)) {
+		maxBlocks = endingBlock - startingBlock;
+	}
+
+	//
+	//	Pre-read the first bitmap block
+	//
+	err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef, flags);
+	if (err != noErr) goto Exit;
+	buffer = currCache;
+
+	//
+	//	Set up the current position within the block
+	//
+	{
+		u_int32_t wordIndexInBlock;
+
+		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+		buffer += wordIndexInBlock;
+		wordsLeft = wordsPerBlock - wordIndexInBlock;
+		currentWord = SWAP_BE32 (*buffer);
+		bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
+	}
+
+	/*
+	 * While loop 1:
+	 *		Find the first unallocated block starting at 'block'
+	 */
+	uint32_t summary_block_scan = 0;
+
+	block=startingBlock;
+	while (block < endingBlock) {
+		if ((currentWord & bitMask) == 0)
+			break;
+
+		//	Next bit
+		++block;
+		bitMask >>= 1;
+		if (bitMask == 0) {
+			//	Next word
+			bitMask = kHighBitInWordMask;
+			++buffer;
+
+			if (--wordsLeft == 0) {
+				//	Next block
+				buffer = currCache = NULL;
+				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+					/*
+					 * If summary_block_scan is non-zero, then we must have
+					 * pulled a bitmap file block into core, and scanned through
+					 * the entire thing.  Because we're in this loop, we are 
+					 * implicitly trusting that the bitmap didn't have any knowledge
+					 * about this particular block.  As a result, update the bitmap
+					 * (lazily, now that we've scanned it) with our findings that 
+					 * this particular block is completely used up.
+					 */
+					if (summary_block_scan != 0) {
+						uint32_t summary_bit;
+						(void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
+						hfs_set_summary (hfsmp, summary_bit, 1);
+					}
+				}
+
+				err = ReleaseBitmapBlock(vcb, blockRef, false);
+				if (err != noErr) goto Exit;
+
+				/*
+				 * Skip over metadata blocks.
+				 */
+				if (!useMetaZone) {
+					block = NextBitmapBlock(vcb, block);
+				}
+				if (block >= endingBlock) {
+					err = dskFulErr;
+					goto Exit;
+				}
+
+				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
+				if (err != noErr) goto Exit;
+				buffer = currCache;
+				summary_block_scan = block;
+				wordsLeft = wordsPerBlock;
+			}
+			currentWord = SWAP_BE32 (*buffer);
+		}
+	}
+
+	//	Did we get to the end of the bitmap before finding a free block?
+	//	If so, then couldn't allocate anything.
+	if (block >= endingBlock) {
+		err = dskFulErr;
+		goto Exit;
+	}
+
+
+	/* 
+	 * Don't move forward just yet.  Verify that either one of the following
+	 * two conditions is true:
+	 * 1) journaling is not enabled
+	 * 2) block is not currently on any pending TRIM list. 
+	 */
+	if (hfsmp->jnl != NULL && (forceFlush == false)) {
+		int recently_deleted = 0;
+		uint32_t nextblk;
+		err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk);
+		if ((err == 0) && (recently_deleted)) {
+
+			/* release the bitmap block & unset currCache.  we may jump past it. */
+			err = ReleaseBitmapBlock(vcb, blockRef, false);
+			currCache = NULL;
+			if (err != noErr) {
+				goto Exit;
+			}
+			/* set our start to nextblk, and re-do the search. */
+			startingBlock = nextblk;
+			goto restartSearchAny;
+		}
+	}
+
+
+	//	Return the first block in the allocated range
+	*actualStartBlock = block;
+
+	//	If we could get the desired number of blocks before hitting endingBlock,
+	//	then adjust endingBlock so we won't keep looking.  Ideally, the comparison
+	//	would be (block + maxBlocks) < endingBlock, but that could overflow.  The
+	//	comparison below yields identical results, but without overflow.
+	if (block < (endingBlock-maxBlocks)) {
+		endingBlock = block + maxBlocks;	//	if we get this far, we've found enough
+	}
+
+	/*
+	 * While loop 2:
+	 *		Scan the bitmap, starting at 'currentWord' in the current
+	 *		bitmap block.  Continue iterating through the bitmap until
+	 * 		either we hit an allocated block, or until we have accumuluated
+	 *		maxBlocks worth of bitmap.
+	 */
+	
+	/* Continue until we see an allocated block */
+	while ((currentWord & bitMask) == 0) {	
+		//	Move to the next block.  If no more, then exit.
+		++block;
+		if (block == endingBlock) {
+			break;
+		}
+
+		//	Next bit
+		bitMask >>= 1;
+		if (bitMask == 0) {
+			//	Next word
+			bitMask = kHighBitInWordMask;
+			++buffer;
+
+			if (--wordsLeft == 0) {
+				//	Next block
+				buffer = currCache = NULL;
+
+				/* We're only reading the bitmap here, so mark it as clean */
+				err = ReleaseBitmapBlock(vcb, blockRef, false);
+				if (err != noErr) {
+					goto Exit;
+				}
+
+				/*
+				 * Skip over metadata blocks.
+				 */
+				if (!useMetaZone) {
+					u_int32_t nextBlock;
+					nextBlock = NextBitmapBlock(vcb, block);
+					if (nextBlock != block) {
+						goto Exit;  /* allocation gap, so stop */
+					}
+				}
+
+				if (block >= endingBlock) {
+					goto Exit;
+				}
+
+				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef, flags);
+				if (err != noErr) {
+					goto Exit;
+				}
+				buffer = currCache;
+				wordsLeft = wordsPerBlock;
+			}
+			currentWord = SWAP_BE32 (*buffer);
+		}
+	}
+
+Exit:
+	if (currCache) {
+		/* Release the bitmap reference prior to marking bits in-use */
+		(void) ReleaseBitmapBlock(vcb, blockRef, false);
+		currCache = NULL;
+	}
+
+	if (err == noErr) {
+		*actualNumBlocks = block - *actualStartBlock;
+	
+		// sanity check
+		if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
+			panic("hfs: BlockFindAnyBitmap: allocation overflow on \"%s\"", vcb->vcbVN);
+		}
+	}
+	else {
+		*actualStartBlock = 0;
+		*actualNumBlocks = 0;
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+	return err;
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockFindKnown
+
+Function:   Return a potential extent from the free extent cache.  The
+		    returned extent *must* be marked allocated and removed
+		    from the cache by the *caller*.
+
+Inputs:
+	vcb				Pointer to volume where space is to be allocated
+	maxBlocks		Maximum number of contiguous blocks to allocate
+
+Outputs:
+	actualStartBlock	First block of range allocated, or 0 if error
+	actualNumBlocks		Number of blocks allocated, or 0 if error
+
+Returns:
+	dskFulErr		Free extent cache is empty
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindKnown(
+		ExtendedVCB		*vcb,
+		u_int32_t		maxBlocks,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks)
+{
+	OSErr			err;	
+	u_int32_t		foundBlocks;
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
+
+	hfs_lock_mount (hfsmp);
+	lck_spin_lock(&vcb->vcbFreeExtLock);
+	if ( vcb->vcbFreeExtCnt == 0 || 
+			vcb->vcbFreeExt[0].blockCount == 0) {
+		lck_spin_unlock(&vcb->vcbFreeExtLock);
+		hfs_unlock_mount(hfsmp);
+		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
+		return dskFulErr;
+	}
+	lck_spin_unlock(&vcb->vcbFreeExtLock);
+	hfs_unlock_mount(hfsmp);
+
+	lck_spin_lock(&vcb->vcbFreeExtLock);
+
+	//	Just grab up to maxBlocks of the first (largest) free exent.
+	*actualStartBlock = vcb->vcbFreeExt[0].startBlock;
+	foundBlocks = vcb->vcbFreeExt[0].blockCount;
+	if (foundBlocks > maxBlocks)
+		foundBlocks = maxBlocks;
+	*actualNumBlocks = foundBlocks;
+
+	lck_spin_unlock(&vcb->vcbFreeExtLock);
+
+	// sanity check
+	if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 
+	{
+		printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
+		hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+		err = EIO;
+	} else
+		err = 0;
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_FIND_KNOWN | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+	return err;
+}
+
+/*
+ * BlockMarkAllocated
+ * 
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ */
+
+OSErr BlockMarkAllocated(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		register u_int32_t	numBlocks)
+{
+	return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks, 0);
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockMarkAllocatedInternal
+
+Function:	Mark a contiguous group of blocks as allocated (set in the
+			bitmap).  It assumes those bits are currently marked
+			deallocated (clear in the bitmap).  Note that this function
+			must be called regardless of whether or not the bitmap or
+			tree-based allocator is used, as all allocations must correctly
+			be marked on-disk.  If the tree-based approach is running, then
+			this will be done before the node is removed from the tree.
+
+Inputs:
+	vcb				Pointer to volume where space is to be allocated
+	startingBlock	First block number to mark as allocated
+	numBlocks		Number of blocks to mark as allocated
+_______________________________________________________________________
+*/
+static 
+OSErr BlockMarkAllocatedInternal (
+								  ExtendedVCB		*vcb,
+								  u_int32_t		startingBlock,
+								  u_int32_t	numBlocks,
+								  hfs_block_alloc_flags_t flags)
+{
+	OSErr			err;
+	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
+	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
+	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
+	u_int32_t		firstBit;		//	Bit index within word of first bit to allocate
+	u_int32_t		numBits;		//	Number of bits in word to allocate
+	u_int32_t		*buffer = NULL;
+	uintptr_t  blockRef = 0;
+	u_int32_t  bitsPerBlock;
+	u_int32_t  wordsPerBlock;
+	// XXXdbg
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, flags, 0, 0);
+
+#if DEBUG
+
+	if (!ISSET(flags, HFS_ALLOC_COMMIT)
+		|| ISSET(flags, HFS_ALLOC_USE_TENTATIVE)) {
+		struct rl_entry *range;
+		TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
+			hfs_assert(rl_overlap(range, startingBlock,
+							  startingBlock + numBlocks - 1) == RL_NOOVERLAP);
+		}
+	}
+
+#endif
+
+	int force_flush = 0;
+	/*
+	 * Since we are about to mark these bits as in-use 
+	 * in the bitmap, decide if we need to alert the caller
+	 * that a journal flush might be appropriate. It's safe to 
+	 * poke at the journal pointer here since we MUST have 
+	 * called start_transaction by the time this function is invoked.  
+	 * If the journal is enabled, then it will have taken the requisite 
+	 * journal locks.  If it is not enabled, then we have taken 
+	 * a shared lock on the global lock.
+	 */
+	if (hfsmp->jnl) {
+		uint32_t ignore;
+		err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore);
+		if ((err == 0) && (force_flush)) {
+			journal_request_immediate_flush (hfsmp->jnl);		
+		}
+	}
+
+	hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
+
+	/*
+	 * Don't make changes to the disk if we're just reserving.  Note that
+	 * we could do better in the tentative case because we could, in theory,
+	 * avoid the journal flush above.  However, that would mean that we would
+	 * need to catch the callback to stop it incorrectly addding the extent
+	 * to our free cache.
+	 */
+	if (ISSET(flags, HFS_ALLOC_LOCKED | HFS_ALLOC_TENTATIVE)) {
+		err = 0;
+		goto Exit;
+	}
+
+	//
+	//	Pre-read the bitmap block containing the first word of allocation
+	//
+
+	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+						  HFS_ALLOC_IGNORE_RESERVED);
+	if (err != noErr) goto Exit;
+	//
+	//	Initialize currentWord, and wordsLeft.
+	//
+	{
+		u_int32_t wordIndexInBlock;
+
+		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+		currentWord = buffer + wordIndexInBlock;
+		wordsLeft = wordsPerBlock - wordIndexInBlock;
+	}
+
+	// XXXdbg
+	if (hfsmp->jnl) {
+		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+	}
+
+	//
+	//	If the first block to allocate doesn't start on a word
+	//	boundary in the bitmap, then treat that first word
+	//	specially.
+	//
+
+	firstBit = startingBlock % kBitsPerWord;
+	if (firstBit != 0) {
+		bitMask = kAllBitsSetInWord >> firstBit;	//	turn off all bits before firstBit
+		numBits = kBitsPerWord - firstBit;			//	number of remaining bits in this word
+		if (numBits > numBlocks) {
+			numBits = numBlocks;					//	entire allocation is inside this one word
+			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
+		}
+#if DEBUG
+		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+		}
+#endif
+		*currentWord |= SWAP_BE32 (bitMask);		//	set the bits in the bitmap
+		numBlocks -= numBits;						//	adjust number of blocks left to allocate
+
+		++currentWord;								//	move to next word
+		--wordsLeft;								//	one less word left in this block
+	}
+
+	//
+	//	Allocate whole words (32 blocks) at a time.
+	//
+
+	bitMask = kAllBitsSetInWord;					//	put this in a register for 68K
+	while (numBlocks >= kBitsPerWord) {
+		if (wordsLeft == 0) {
+			//	Read in the next bitmap block
+			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
+
+			buffer = NULL;
+			err = ReleaseBitmapBlock(vcb, blockRef, true);
+			if (err != noErr) goto Exit;
+
+			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+								  HFS_ALLOC_IGNORE_RESERVED);
+			if (err != noErr) goto Exit;
+
+			// XXXdbg
+			if (hfsmp->jnl) {
+				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+			}
+
+			//	Readjust currentWord and wordsLeft
+			currentWord = buffer;
+			wordsLeft = wordsPerBlock;
+		}
+#if DEBUG
+		if (*currentWord != 0) {
+			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+		}
+#endif
+		*currentWord = SWAP_BE32 (bitMask);
+		numBlocks -= kBitsPerWord;
+
+		++currentWord;								//	move to next word
+		--wordsLeft;								//	one less word left in this block
+	}
+
+	//
+	//	Allocate any remaining blocks.
+	//
+
+	if (numBlocks != 0) {
+		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
+		if (wordsLeft == 0) {
+			//	Read in the next bitmap block
+			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
+
+			buffer = NULL;
+			err = ReleaseBitmapBlock(vcb, blockRef, true);
+			if (err != noErr) goto Exit;
+
+			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+								  HFS_ALLOC_IGNORE_RESERVED);
+			if (err != noErr) goto Exit;
+
+			// XXXdbg
+			if (hfsmp->jnl) {
+				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+			}
+
+			currentWord = buffer;
+		}
+#if DEBUG
+		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
+		}
+#endif
+		*currentWord |= SWAP_BE32 (bitMask);			//	set the bits in the bitmap
+
+		//	No need to update currentWord or wordsLeft
+	}
+
+Exit:
+
+	if (buffer)
+		(void)ReleaseBitmapBlock(vcb, blockRef, true);
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+	return err;
+}
+
+
+/*
+ * BlockMarkFree
+ * 
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ *
+ */
+OSErr BlockMarkFree(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		register u_int32_t	numBlocks)
+{
+	return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
+}
+
+
+/*
+ * BlockMarkFreeUnused
+ * 
+ * Scan the bitmap block beyond end of current file system for bits 
+ * that are marked as used.  If any of the bits are marked as used,
+ * this function marks them free.
+ *
+ * Note:  This was specifically written to mark all bits beyond 
+ * end of current file system during hfs_extendfs(), which makes
+ * sure that all the new blocks added to the file system are 
+ * marked as free.   We expect that all the blocks beyond end of
+ * current file system are always marked as free, but there might 
+ * be cases where are marked as used.  This function assumes that 
+ * the number of blocks marked as used incorrectly are relatively
+ * small, otherwise this can overflow journal transaction size
+ * on certain file system configurations (example, large unused 
+ * bitmap with relatively small journal). 
+ *
+ * Input:
+ * 	startingBlock: First block of the range to mark unused
+ * 	numBlocks: Number of blocks in the range to mark unused
+ *
+ * Returns: zero on success, non-zero on error.
+ */
+OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t	numBlocks)
+{
+	int error = 0;
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+	u_int32_t curNumBlocks;
+	u_int32_t  bitsPerBlock;
+	u_int32_t lastBit;
+
+	/* Use the optimal bitmap I/O size instead of bitmap block size */
+	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+	/* 
+	 * First clear any non bitmap allocation block aligned bits
+	 *
+	 * Calculate the first bit in the bitmap block next to 
+	 * the bitmap block containing the bit for startingBlock.
+	 * Using this value, we calculate the total number of 
+	 * bits to be marked unused from startingBlock to the 
+	 * end of bitmap block containing startingBlock. 
+	 */
+	lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
+	curNumBlocks = lastBit - startingBlock;
+	if (curNumBlocks > numBlocks) {
+		curNumBlocks = numBlocks;
+	}
+	error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+	if (error) {
+		return error;
+	}
+	startingBlock += curNumBlocks;
+	numBlocks -= curNumBlocks;
+
+	/* 
+	 * Check a full bitmap block for any 'used' bit.  If any bit is used,
+	 * mark all the bits only in that bitmap block as free.  This ensures
+	 * that we do not write unmodified bitmap blocks and do not 
+	 * overwhelm the journal. 
+	 *
+	 * The code starts by checking full bitmap block at a time, and 
+	 * marks entire bitmap block as free only if any bit in that bitmap 
+	 * block is marked as used.  In the end, it handles the last bitmap 
+	 * block which might be partially full by only checking till the 
+	 * caller-specified last bit and if any bit is set, only mark that 
+	 * range as free.
+	 */
+	while (numBlocks) {
+		if (numBlocks >= bitsPerBlock) {
+			curNumBlocks = bitsPerBlock;
+		} else {
+			curNumBlocks = numBlocks;
+		}
+		if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
+			error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+			if (error) {
+				return error;
+			}
+		}
+		startingBlock += curNumBlocks;
+		numBlocks -= curNumBlocks;
+	}
+
+	return error;
+}
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockMarkFreeInternal
+
+Function:	Mark a contiguous group of blocks as free (clear in the
+			bitmap).  It assumes those bits are currently marked
+			allocated (set in the bitmap).
+
+Inputs:
+	vcb				Pointer to volume where space is to be freed
+	startingBlock	First block number to mark as freed
+	numBlocks		Number of blocks to mark as freed
+	do_validate 	If true, validate that the blocks being 
+					deallocated to check if they are within totalBlocks
+					for current volume and whether they were allocated
+					before they are marked free.
+_______________________________________________________________________
+*/
+static 
+OSErr BlockMarkFreeInternal(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock_in,
+		register u_int32_t	numBlocks_in,
+		Boolean 		do_validate)
+{
+	OSErr		err;
+	u_int32_t	startingBlock = startingBlock_in;
+	u_int32_t	numBlocks = numBlocks_in;
+	uint32_t	unmapStart = startingBlock_in;
+	uint32_t	unmapCount = numBlocks_in;
+	uint32_t	wordIndexInBlock;
+	u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
+	u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
+	u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
+	u_int32_t	currentBit;		//	Bit index within word of current bit to allocate
+	u_int32_t	numBits;		//	Number of bits in word to allocate
+	u_int32_t	*buffer = NULL;
+	uintptr_t	blockRef = 0;
+	u_int32_t	bitsPerBlock;
+	u_int32_t	wordsPerBlock;
+	// XXXdbg
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
+
+	/*
+	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
+	 * need to be able to free blocks being relocated during hfs_truncatefs.
+	 */
+	if ((do_validate == true) && 
+			(startingBlock + numBlocks > vcb->totalBlocks)) {
+#if ALLOC_DEBUG || DEBUG
+		panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
+		__builtin_unreachable();
+#else
+		printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
+		hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+		err = EIO;
+		goto Exit;
+#endif
+	}
+
+	//
+	//	Pre-read the bitmap block containing the first word of allocation
+	//
+
+	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, 
+						  HFS_ALLOC_IGNORE_RESERVED);
+	if (err != noErr) goto Exit;
+	// XXXdbg
+	if (hfsmp->jnl) {
+		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+	}
+
+	uint32_t min_unmap = 0, max_unmap = UINT32_MAX;
+
+	// Work out the bounds of any unmap we can send down
+	struct rl_entry *range;
+	for (int i = 0; i < 2; ++i) {
+		TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+			if (range->rl_start < startingBlock
+				&& range->rl_end >= min_unmap) {
+				min_unmap = range->rl_end + 1;
+			}
+			if (range->rl_end >= startingBlock + numBlocks
+				&& range->rl_start < max_unmap) {
+				max_unmap = range->rl_start;
+			}
+		}
+	}
+
+	//
+	//	Figure out how many bits and words per bitmap block.
+	//
+	bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
+	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+
+	//
+	// Look for a range of free blocks immediately before startingBlock
+	// (up to the start of the current bitmap block).  Set unmapStart to
+	// the first free block.
+	//
+	currentWord = buffer + wordIndexInBlock;
+	currentBit = startingBlock % kBitsPerWord;
+	bitMask = kHighBitInWordMask >> currentBit;
+	while (unmapStart > min_unmap) {
+		// Move currentWord/bitMask back by one bit
+		bitMask <<= 1;
+		if (bitMask == 0) {
+			if (--currentWord < buffer)
+				break;
+			bitMask = kLowBitInWordMask;
+		}
+
+		if (*currentWord & SWAP_BE32(bitMask))
+			break;	// Found an allocated block.  Stop searching.
+		--unmapStart;
+		++unmapCount;
+	}
+
+	//
+	//	If the first block to free doesn't start on a word
+	//	boundary in the bitmap, then treat that first word
+	//	specially.
+	//
+
+	currentWord = buffer + wordIndexInBlock;
+	wordsLeft = wordsPerBlock - wordIndexInBlock;
+	currentBit = startingBlock % kBitsPerWord;
+	if (currentBit != 0) {
+		bitMask = kAllBitsSetInWord >> currentBit;	//	turn off all bits before currentBit
+		numBits = kBitsPerWord - currentBit;		//	number of remaining bits in this word
+		if (numBits > numBlocks) {
+			numBits = numBlocks;					//	entire allocation is inside this one word
+			bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits));	//	turn off bits after last
+		}
+		if ((do_validate == true) && 
+				(*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+			goto Corruption;
+		}
+		*currentWord &= SWAP_BE32 (~bitMask);		//	clear the bits in the bitmap
+		numBlocks -= numBits;						//	adjust number of blocks left to free
+
+		++currentWord;								//	move to next word
+		--wordsLeft;								//	one less word left in this block
+	}
+
+	//
+	//	Free whole words (32 blocks) at a time.
+	//
+
+	while (numBlocks >= kBitsPerWord) {
+		if (wordsLeft == 0) {
+			//	Read in the next bitmap block
+			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
+
+			buffer = NULL;
+			err = ReleaseBitmapBlock(vcb, blockRef, true);
+			if (err != noErr) goto Exit;
+
+			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef,
+								  HFS_ALLOC_IGNORE_RESERVED);
+			if (err != noErr) goto Exit;
+
+			// XXXdbg
+			if (hfsmp->jnl) {
+				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+			}
+
+			//	Readjust currentWord and wordsLeft
+			currentWord = buffer;
+			wordsLeft = wordsPerBlock;
+		}
+		if ((do_validate == true) && 
+				(*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
+			goto Corruption;
+		}
+		*currentWord = 0;							//	clear the entire word
+		numBlocks -= kBitsPerWord;
+
+		++currentWord;								//	move to next word
+		--wordsLeft;									//	one less word left in this block
+	}
+
+	//
+	//	Free any remaining blocks.
+	//
+
+	if (numBlocks != 0) {
+		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
+		if (wordsLeft == 0) {
+			//	Read in the next bitmap block
+			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
+
+			buffer = NULL;
+			err = ReleaseBitmapBlock(vcb, blockRef, true);
+			if (err != noErr) goto Exit;
+
+			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef, 
+								  HFS_ALLOC_IGNORE_RESERVED);
+			if (err != noErr) goto Exit;
+
+			// XXXdbg
+			if (hfsmp->jnl) {
+				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+			}
+
+			currentWord = buffer;
+		}
+		if ((do_validate == true) && 
+				(*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+			goto Corruption;
+		}
+		*currentWord &= SWAP_BE32 (~bitMask);			//	clear the bits in the bitmap
+
+		//	No need to update currentWord or wordsLeft
+	}
+
+	//
+	// Look for a range of free blocks immediately after the range we just freed
+	// (up to the end of the current bitmap block).
+	//
+	wordIndexInBlock = ((startingBlock_in + numBlocks_in - 1) & (bitsPerBlock-1)) / kBitsPerWord;
+	wordsLeft = wordsPerBlock - wordIndexInBlock;
+	currentWord = buffer + wordIndexInBlock;
+	currentBit = (startingBlock_in + numBlocks_in - 1) % kBitsPerWord;
+	bitMask = kHighBitInWordMask >> currentBit;
+	while (unmapStart + unmapCount < max_unmap) {
+		// Move currentWord/bitMask/wordsLeft forward one bit
+		bitMask >>= 1;
+		if (bitMask == 0) {
+			if (--wordsLeft == 0)
+				break;
+			++currentWord;
+			bitMask = kHighBitInWordMask;
+		}
+
+		if (*currentWord & SWAP_BE32(bitMask))
+			break;	// Found an allocated block.  Stop searching.
+		++unmapCount;
+	}
+
+Exit:
+
+	if (buffer)
+		(void)ReleaseBitmapBlock(vcb, blockRef, true);
+
+	if (err == noErr) {
+		hfs_unmap_free_extent(vcb, unmapStart, unmapCount);
+	}
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+	return err;
+
+Corruption:
+#if DEBUG
+	panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
+	__builtin_unreachable();
+#else
+	printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks on volume %s <%u, %u>\n",
+			vcb->vcbVN, startingBlock_in, numBlocks_in);
+	hfs_mark_inconsistent(vcb, HFS_INCONSISTENCY_DETECTED);
+	err = EIO;
+	goto Exit;
+#endif
+}
+
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockFindContiguous
+
+Function:	Find a contiguous range of blocks that are free (bits
+			clear in the bitmap).  If a contiguous range of the
+			minimum size can't be found, an error will be returned.
+			This is only needed to support the bitmap-scanning logic,
+			as the red-black tree should be able to do this by internally
+			searching its tree.
+
+Inputs:
+	vcb				Pointer to volume where space is to be allocated
+	startingBlock	Preferred first block of range
+	endingBlock		Last possible block in range + 1
+	minBlocks		Minimum number of blocks needed.  Must be > 0.
+	maxBlocks		Maximum (ideal) number of blocks desired
+	useMetaZone	OK to dip into metadata allocation zone
+
+Outputs:
+	actualStartBlock	First block of range found, or 0 if error
+	actualNumBlocks		Number of blocks found, or 0 if error
+
+Returns:
+	noErr			Found at least minBlocks contiguous
+	dskFulErr		No contiguous space found, or all less than minBlocks
+_______________________________________________________________________
+*/
+
+static OSErr BlockFindContiguous(
+		ExtendedVCB		*vcb,
+		u_int32_t		startingBlock,
+		u_int32_t		endingBlock,
+		u_int32_t		minBlocks,
+		u_int32_t		maxBlocks,
+		Boolean			useMetaZone,
+		Boolean			trustSummary,
+		u_int32_t		*actualStartBlock,
+		u_int32_t		*actualNumBlocks,
+		hfs_block_alloc_flags_t flags)
+{
+	OSErr			err;
+	register u_int32_t	currentBlock;		//	Block we're currently looking at.
+	u_int32_t			firstBlock;			//	First free block in current extent.
+	u_int32_t			stopBlock;			//	If we get to this block, stop searching for first free block.
+	u_int32_t			foundBlocks;		//	Number of contiguous free blocks in current extent.
+	u_int32_t			*buffer = NULL;
+	register u_int32_t	*currentWord;
+	register u_int32_t	bitMask;
+	register u_int32_t	wordsLeft;
+	register u_int32_t	tempWord;
+	uintptr_t  blockRef = 0;
+	u_int32_t  wordsPerBlock;
+	u_int32_t  updated_free_extent = 0;
+	struct hfsmount *hfsmp = (struct hfsmount*) vcb;
+	HFSPlusExtentDescriptor best = { 0, 0 };
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
+
+	/*
+	 * When we're skipping the metadata zone and the start/end
+	 * range overlaps with the metadata zone then adjust the 
+	 * start to be outside of the metadata zone.  If the range
+	 * is entirely inside the metadata zone then we can deny the
+	 * request (dskFulErr).
+	 */
+	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
+		if (startingBlock <= vcb->hfs_metazone_end) {
+			if (endingBlock > (vcb->hfs_metazone_end + 2))
+				startingBlock = vcb->hfs_metazone_end + 1;
+			else
+				goto DiskFull;
+		}
+	}
+
+	if ((endingBlock - startingBlock) < minBlocks)
+	{
+		//	The set of blocks we're checking is smaller than the minimum number
+		//	of blocks, so we couldn't possibly find a good range.
+		goto DiskFull;
+	}
+
+	stopBlock = endingBlock - minBlocks + 1;
+	currentBlock = startingBlock;
+	firstBlock = 0;
+
+	/*
+	 * Skip over metadata blocks.
+	 */
+	if (!useMetaZone)
+		currentBlock = NextBitmapBlock(vcb, currentBlock);
+
+	/*
+	 * Use the summary table if we can.  Skip over any totally
+	 * allocated blocks.  currentBlock should now point to the first
+	 * block beyond the metadata zone if the metazone allocations are not
+	 * allowed in this invocation.
+	 */
+	if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+		uint32_t suggestion;
+		err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+		if (err && err != ENOSPC)
+			goto ErrorExit;
+		if (err == ENOSPC || suggestion >= stopBlock)
+			goto DiskFull;
+		currentBlock = suggestion;
+	}
+
+
+	//
+	//	Pre-read the first bitmap block.
+	//
+	err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+	if ( err != noErr ) goto ErrorExit;
+
+	//
+	//	Figure out where currentBlock is within the buffer.
+	//
+	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
+
+	wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);	// Current index into buffer
+	currentWord = buffer + wordsLeft;
+	wordsLeft = wordsPerBlock - wordsLeft;
+
+	uint32_t remaining = (hfsmp->freeBlocks - hfsmp->lockedBlocks
+						  - (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)
+							 ? 0 : hfsmp->tentativeBlocks));
+
+	/*
+	 * This outer do-while loop is the main body of this function.  Its job is 
+	 * to search through the blocks (until we hit 'stopBlock'), and iterate
+	 * through swaths of allocated bitmap until it finds free regions.
+	 */
+
+	do
+	{
+		foundBlocks = 0;
+		/*
+		 * We will try and update the summary table as we search
+		 * below.  Note that we will never update the summary table
+		 * for the first and last blocks that the summary table
+		 * covers.  Ideally, we should, but the benefits probably
+		 * aren't that significant so we leave things alone for now.
+		 */
+		uint32_t summary_block_scan = 0;
+		/*
+		 * Inner while loop 1:
+		 *		Look for free blocks, skipping over allocated ones.
+		 *
+		 * Initialization starts with checking the initial partial word
+		 * if applicable.
+		 */
+		bitMask = currentBlock & kBitsWithinWordMask;
+		if (bitMask)
+		{			
+			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
+			bitMask = kHighBitInWordMask >> bitMask;
+			while (tempWord & bitMask)
+			{
+				bitMask >>= 1;
+				++currentBlock;
+			}
+
+			//	Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)? 
+			if (bitMask)
+				goto FoundUnused;
+
+			//	Didn't find any unused bits, so we're done with this word.
+			++currentWord;
+			--wordsLeft;
+		}
+
+		//
+		//	Check whole words
+		//
+		while (currentBlock < stopBlock)
+		{
+			//	See if it's time to read another block.
+			if (wordsLeft == 0)
+			{
+				buffer = NULL;
+				if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+					/*
+					 * If summary_block_scan is non-zero, then we must have
+					 * pulled a bitmap file block into core, and scanned through
+					 * the entire thing.  Because we're in this loop, we are 
+					 * implicitly trusting that the bitmap didn't have any knowledge
+					 * about this particular block.  As a result, update the bitmap
+					 * (lazily, now that we've scanned it) with our findings that 
+					 * this particular block is completely used up.
+					 */
+					if (summary_block_scan != 0) {
+						uint32_t summary_bit;
+						(void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit);
+						hfs_set_summary (hfsmp, summary_bit, 1);
+					}
+				}
+				err = ReleaseBitmapBlock(vcb, blockRef, false);
+				if (err != noErr) goto ErrorExit;
+
+				/*
+				 * Skip over metadata blocks.
+				 */
+				if (!useMetaZone) {
+					currentBlock = NextBitmapBlock(vcb, currentBlock);
+					if (currentBlock >= stopBlock) {
+						goto LoopExit;
+					}
+				}
+
+				/* Skip over fully allocated bitmap blocks if we can */
+				if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) {
+					uint32_t suggestion;
+					err = hfs_find_summary_free (hfsmp, currentBlock, &suggestion);
+					if (err && err != ENOSPC)
+						goto ErrorExit;
+					if (err == ENOSPC || suggestion >= stopBlock)
+						goto LoopExit;
+					currentBlock = suggestion;
+				}
+
+				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+				if ( err != noErr ) goto ErrorExit;
+
+				/*
+				 * Set summary_block_scan to be the block we just read into the block cache.
+				 *
+				 * At this point, we've just read an allocation block worth of bitmap file
+				 * into the buffer above, but we don't know if it is completely allocated or not.
+				 * If we find that it is completely allocated/full then we will jump 
+				 * through this loop again and set the appropriate summary bit as fully allocated.
+				 */	
+				summary_block_scan = currentBlock;
+				currentWord = buffer;
+				wordsLeft = wordsPerBlock;
+			}
+
+			//	See if any of the bits are clear
+			if ((tempWord = SWAP_BE32(*currentWord)) + 1)	//	non-zero if any bits were clear
+			{
+				//	Figure out which bit is clear
+				bitMask = kHighBitInWordMask;
+				while (tempWord & bitMask)
+				{
+					bitMask >>= 1;
+					++currentBlock;
+				}
+
+				break;		//	Found the free bit; break out to FoundUnused.
+			}
+
+			//	Keep looking at the next word
+			currentBlock += kBitsPerWord;
+			++currentWord;
+			--wordsLeft;
+		}
+
+FoundUnused:
+		//	Make sure the unused bit is early enough to use
+		if (currentBlock >= stopBlock)
+		{
+			break;
+		}
+
+		//	Remember the start of the extent
+		firstBlock = currentBlock;
+
+
+		/*
+		 * Inner while loop 2:
+		 *		We get here if we find a free block. Count the number
+		 * 		of contiguous free blocks observed.
+		 * 
+		 * Initialization starts with checking the initial partial word
+		 * if applicable.
+		 */
+		bitMask = currentBlock & kBitsWithinWordMask;
+		if (bitMask)
+		{
+			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
+			bitMask = kHighBitInWordMask >> bitMask;
+			while (bitMask && !(tempWord & bitMask))
+			{
+				bitMask >>= 1;
+				++currentBlock;
+			}
+
+			//	Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)? 
+			if (bitMask)
+				goto FoundUsed;
+
+			//	Didn't find any used bits, so we're done with this word.
+			++currentWord;
+			--wordsLeft;
+		}
+
+		//
+		//	Check whole words
+		//
+		while (currentBlock < endingBlock)
+		{
+			//	See if it's time to read another block.
+			if (wordsLeft == 0)
+			{
+				buffer = NULL;
+				err = ReleaseBitmapBlock(vcb, blockRef, false);
+				if (err != noErr) goto ErrorExit;
+
+				/*
+				 * Skip over metadata blocks.
+				 */
+				if (!useMetaZone) {
+					u_int32_t nextBlock;
+
+					nextBlock = NextBitmapBlock(vcb, currentBlock);
+					if (nextBlock != currentBlock) {
+						goto LoopExit;  /* allocation gap, so stop */
+					}
+				}
+
+				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef, flags);
+				if ( err != noErr ) goto ErrorExit;
+
+				currentWord = buffer;
+				wordsLeft = wordsPerBlock;
+			}
+
+			//	See if any of the bits are set
+			if ((tempWord = SWAP_BE32(*currentWord)) != 0)
+			{
+				//	Figure out which bit is set
+				bitMask = kHighBitInWordMask;
+				while (!(tempWord & bitMask))
+				{
+					bitMask >>= 1;
+					++currentBlock;
+				}
+
+				break;		//	Found the used bit; break out to FoundUsed.
+			}
+
+			//	Keep looking at the next word
+			currentBlock += kBitsPerWord;
+			++currentWord;
+			--wordsLeft;
+
+			//	If we found at least maxBlocks, we can quit early.
+			if ((currentBlock - firstBlock) >= maxBlocks)
+				break;
+		}
+
+FoundUsed:
+		//	Make sure we didn't run out of bitmap looking for a used block.
+		//	If so, pin to the end of the bitmap.
+		if (currentBlock > endingBlock)
+			currentBlock = endingBlock;
+
+		//	Figure out how many contiguous free blocks there were.
+		//	Pin the answer to maxBlocks.
+		foundBlocks = currentBlock - firstBlock;
+		if (foundBlocks > maxBlocks)
+			foundBlocks = maxBlocks;
+
+		if (remaining) {
+			if (foundBlocks > remaining) {
+				hfs_debug("hfs: found more blocks than are indicated free!\n");
+				remaining = UINT32_MAX;
+			} else
+				remaining -= foundBlocks;
+		}
+
+		if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+			if (foundBlocks > best.blockCount) {
+				best.startBlock = firstBlock;
+				best.blockCount = foundBlocks;
+			}
+
+			if (foundBlocks >= maxBlocks || best.blockCount >= remaining)
+				break;
+
+			/*
+			 * Note that we will go ahead and add this free extent to our
+			 * cache below but that's OK because we'll remove it again if we
+			 * decide to use this extent.
+			 */
+		} else if (foundBlocks >= minBlocks)
+			break;		//	Found what we needed!
+
+		/*
+		 * We did not find the total blocks we were looking for, but
+		 * add this free block run to our free extent cache list, if possible.
+		 */
+
+		// If we're ignoring tentative ranges, we need to account for them here
+		if (ISSET(flags, HFS_ALLOC_IGNORE_TENTATIVE)) {
+			struct rl_entry free_extent = rl_make(firstBlock, firstBlock + foundBlocks - 1);
+			struct rl_entry *range;;
+			TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS], rl_link) {
+				rl_subtract(&free_extent, range);
+				if (rl_len(range) == 0)
+					break;
+			}
+			firstBlock = free_extent.rl_start;
+			foundBlocks = rl_len(&free_extent);
+		}
+
+		if (foundBlocks) {
+			if (hfsmp->jnl == NULL) {
+				/* If there is no journal, go ahead and add to the free ext cache. */
+				updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+			}
+			else {
+				/*
+				 * If journaled, only add to the free extent cache if this block is not
+				 * waiting for a TRIM to complete; that implies that the transaction that freed it
+				 * has not yet been committed to stable storage. 
+				 */
+				int recently_deleted = 0;
+				uint32_t nextblock;
+				err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, 
+						(uint64_t)foundBlocks, &recently_deleted, &nextblock);
+				if ((err) || (recently_deleted == 0))  {
+					/* if we hit an error, or the blocks not recently freed, go ahead and insert it */
+					updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
+				}
+			}
+		}
+	} while (currentBlock < stopBlock);
+LoopExit:
+
+	if (ISSET(flags, HFS_ALLOC_TRY_HARD)) {
+		firstBlock = best.startBlock;
+		foundBlocks = best.blockCount;
+	}
+
+	//	Return the outputs.
+	if (foundBlocks < minBlocks)
+	{
+DiskFull:
+		err = dskFulErr;
+ErrorExit:
+		*actualStartBlock = 0;
+		*actualNumBlocks = 0;
+	}
+	else
+	{
+		err = noErr;
+		*actualStartBlock = firstBlock;
+		*actualNumBlocks = foundBlocks;
+		/*
+		 * Sanity check for overflow
+		 */
+		if ((firstBlock + foundBlocks) > vcb->allocLimit) {
+			panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
+					vcb->vcbVN, startingBlock, endingBlock, currentBlock,
+					firstBlock, stopBlock, minBlocks, foundBlocks);
+		}
+	}
+
+	if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
+		int i;
+		u_int32_t min_start = vcb->totalBlocks;
+
+		// set the nextAllocation pointer to the smallest free block number
+		// we've seen so on the next mount we won't rescan unnecessarily
+		lck_spin_lock(&vcb->vcbFreeExtLock);
+		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
+			if (vcb->vcbFreeExt[i].startBlock < min_start) {
+				min_start = vcb->vcbFreeExt[i].startBlock;
+			}
+		}
+		lck_spin_unlock(&vcb->vcbFreeExtLock);
+		if (min_start != vcb->totalBlocks) {
+			if (min_start < vcb->nextAllocation) {
+				vcb->nextAllocation = min_start;
+			}
+			if (min_start < vcb->sparseAllocation) {
+				vcb->sparseAllocation = min_start;
+			}
+		}
+	}
+
+	if (buffer)
+		(void) ReleaseBitmapBlock(vcb, blockRef, false);
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+	return err;
+}
+
+
+/* 
+ * Count number of bits set in the given 32-bit unsigned number 
+ *
+ * Returns:
+ * 	Number of bits set
+ */
+static int num_bits_set(u_int32_t num) 
+{
+	int count;
+
+	for (count = 0; num; count++) {
+		num &= num - 1;
+	}
+
+	return count;
+}
+
+/* 
+ * For a given range of blocks, find the total number of blocks 
+ * allocated.  If 'stop_on_first' is true, it stops as soon as it 
+ * encounters the first allocated block.  This option is useful 
+ * to determine if any block is allocated or not. 
+ *
+ * Inputs:
+ * 	startingBlock	First allocation block number of the range to be scanned.
+ * 	numBlocks	Total number of blocks that need to be scanned.
+ * 	stop_on_first	Stop the search after the first allocated block is found.
+ *
+ * Output:
+ * 	allocCount	Total number of allocation blocks allocated in the given range.
+ *
+ * 			On error, it is the number of allocated blocks found 
+ * 			before the function got an error. 
+ *
+ * 			If 'stop_on_first' is set, 
+ * 				allocCount = 1 if any allocated block was found.
+ * 				allocCount = 0 if no allocated block was found.
+ *
+ * Returns:
+ * 	0 on success, non-zero on failure. 
+ */
+static int 
+hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, 
+		u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
+{
+	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
+	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+	u_int32_t  numBits;        // Number of bits in word to allocate
+	u_int32_t  *buffer = NULL;
+	uintptr_t  blockRef;
+	u_int32_t  bitsPerBlock;
+	u_int32_t  wordsPerBlock;
+	u_int32_t  blockCount = 0;
+	int  error;
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
+
+	/*
+	 * Pre-read the bitmap block containing the first word of allocation
+	 */
+	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+							HFS_ALLOC_IGNORE_TENTATIVE);
+	if (error)
+		goto JustReturn;
+
+	/*
+	 * Initialize currentWord, and wordsLeft.
+	 */
+	{
+		u_int32_t wordIndexInBlock;
+
+		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+		currentWord = buffer + wordIndexInBlock;
+		wordsLeft = wordsPerBlock - wordIndexInBlock;
+	}
+
+	/*
+	 * First test any non word aligned bits.
+	 */
+	firstBit = startingBlock % kBitsPerWord;
+	if (firstBit != 0) {
+		bitMask = kAllBitsSetInWord >> firstBit;
+		numBits = kBitsPerWord - firstBit;
+		if (numBits > numBlocks) {
+			numBits = numBlocks;
+			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+		}
+		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+			if (stop_on_first) {
+				blockCount = 1;
+				goto Exit;
+			}
+			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+		}
+		numBlocks -= numBits;
+		++currentWord;
+		--wordsLeft;
+	}
+
+	/*
+	 * Test whole words (32 blocks) at a time.
+	 */
+	while (numBlocks >= kBitsPerWord) {
+		if (wordsLeft == 0) {
+			/* Read in the next bitmap block. */
+			startingBlock += bitsPerBlock;
+
+			buffer = NULL;
+			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
+			if (error) goto Exit;
+
+			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+									HFS_ALLOC_IGNORE_TENTATIVE);
+			if (error) goto Exit;
+
+			/* Readjust currentWord and wordsLeft. */
+			currentWord = buffer;
+			wordsLeft = wordsPerBlock;
+		}
+		if (*currentWord != 0) {
+			if (stop_on_first) {
+				blockCount = 1;
+				goto Exit;
+			} 
+			blockCount += num_bits_set(*currentWord);
+		}
+		numBlocks -= kBitsPerWord;
+		++currentWord;
+		--wordsLeft;
+	}
+
+	/*
+	 * Test any remaining blocks.
+	 */
+	if (numBlocks != 0) {
+		bitMask = ~(kAllBitsSetInWord >> numBlocks);
+		if (wordsLeft == 0) {
+			/* Read in the next bitmap block */
+			startingBlock += bitsPerBlock;
+
+			buffer = NULL;
+			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
+			if (error) goto Exit;
+
+			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef, 
+									HFS_ALLOC_IGNORE_TENTATIVE);
+			if (error) goto Exit;
+
+			currentWord = buffer;
+		}
+		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+			if (stop_on_first) {
+				blockCount = 1;
+				goto Exit;
+			}
+			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+		}
+	}
+Exit:
+	if (buffer) {
+		(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+	}
+	if (allocCount) {
+		*allocCount = blockCount;
+	}
+
+JustReturn:
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
+
+	return (error);
+}
+
+/* 
+ * Count total number of blocks that are allocated in the given 
+ * range from the bitmap.  This is used to preflight total blocks 
+ * that need to be relocated during volume resize.  
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns:
+ * 	0 on success, non-zero on failure.  
+ * 	On failure, allocCount is zero. 
+ */
+	int
+hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
+		u_int32_t numBlocks, u_int32_t *allocCount)
+{
+	return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
+}
+
+/*
+ * Test to see if any blocks in a range are allocated.
+ * 
+ * Note:  On error, this function returns 1, which means that 
+ * one or more blocks in the range are allocated.  This function 
+ * is primarily used for volume resize and we do not want 
+ * to report to the caller that the blocks are free when we 
+ * were not able to deterministically find it out.  So on error, 
+ * we always report that the blocks are allocated.  
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns 
+ *	0 if all blocks in the range are free.
+ *	1 if blocks in the range are allocated, or there was an error.
+ */
+	int 
+hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+	int error; 
+	u_int32_t allocCount;
+
+	error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
+	if (error) {
+		/* On error, we always say that the blocks are allocated 
+		 * so that volume resize does not return false success.
+		 */
+		return 1;
+	} else {
+		/* The function was deterministically able to find out 
+		 * if there was any block allocated or not.  In that case,
+		 * the value in allocCount is good enough to be returned 
+		 * back to the caller.
+		 */
+		return allocCount;
+	}
+} 
+
+/*
+ * CONFIG_HFS_RBTREE
+ * Check to see if the red-black tree is live.  Allocation file lock must be held
+ * shared or exclusive to call this function. Note that we may call this even if
+ * HFS is built without activating the red-black tree code.
+ */
+int 
+hfs_isrbtree_active(struct hfsmount *hfsmp){
+
+#pragma unused (hfsmp)
+
+	/* Just return 0 for now */
+	return 0;
+}
+
+
+
+/* Summary Table Functions */
+/*
+ * hfs_check_summary:
+ * 
+ * This function should be used to query the summary table to see if we can
+ * bypass a bitmap block or not when we're trying to find a free allocation block.
+ *
+ *
+ * Inputs:
+ * 		allocblock - allocation block number. Will be used to infer the correct summary bit.
+ * 		hfsmp -- filesystem in question.
+ * 
+ * Output Arg:
+ *		*freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize
+ * 		page of bitmap file.
+ * 
+ *
+ * Returns:
+ * 		0 on success
+ *		EINVAL on error
+ * 	
+ */
+
+static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) {
+
+	int err = EINVAL;
+	if (hfsmp->vcbVBMIOSize) {
+		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+			uint32_t index;
+			if (hfs_get_summary_index (hfsmp, allocblock, &index)) {
+				*freeblocks = 0;
+				return EINVAL;
+			}
+
+			/* Ok, now that we have the bit index into the array, what byte is it in ? */
+			uint32_t byteindex = index / kBitsPerByte;
+			uint8_t current_byte = hfsmp->hfs_summary_table[byteindex];
+			uint8_t bit_in_byte = index % kBitsPerByte;
+
+			if (current_byte & (1 << bit_in_byte)) {
+				/* 
+				 * We do not believe there is anything free in the
+				 * entire vcbVBMIOSize'd block.
+				 */
+				*freeblocks = 0;
+			}	
+			else {
+				/* Looks like there might be a free block here... */
+				*freeblocks = 1;
+			}
+		}
+		err = 0;
+	}
+
+	return err;
+}
+
+
+#if 0
+/*
+ * hfs_get_next_summary
+ *
+ * From a given allocation block, jump to the allocation block at the start of the
+ * next vcbVBMIOSize boundary.  This is useful when trying to quickly skip over
+ * large swaths of bitmap once we have determined that the bitmap is relatively full. 
+ *
+ * Inputs: hfsmount, starting allocation block number
+ * Output Arg: *newblock will contain the allocation block number to start
+ * querying.
+ * 
+ * Returns:
+ *		0 on success
+ * 		EINVAL if the block argument is too large to be used, or the summary table not live.
+ * 		EFBIG if there are no more summary bits to be queried
+ */
+static int 
+hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) {
+
+	u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+	u_int32_t start_offset;
+	u_int32_t next_offset;
+	int err = EINVAL; 
+
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) {
+			return err;
+		}
+
+		next_offset = start_offset++;	
+
+		if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) {
+			/* Can't jump to the next summary bit. */
+			return EINVAL;
+		}
+
+		/* Otherwise, compute and return */
+		*newblock = next_offset * bits_per_iosize;
+		if (*newblock >= hfsmp->totalBlocks) {
+			return EINVAL;
+		}
+		err = 0;
+	}
+
+	return err;
+}
+
+#endif
+
+/*
+ * hfs_release_summary 
+ * 
+ * Given an extent that is about to be de-allocated on-disk, determine the number
+ * of summary bitmap bits that need to be marked as 'potentially available'.
+ * Then go ahead and mark them as free.
+ *
+ *	Inputs:
+ * 		hfsmp 		- hfs mount
+ * 		block 		- starting allocation block.
+ * 		length		- length of the extent.
+ * 
+ * 	Returns:
+ *		EINVAL upon any errors.
+ */
+static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) {
+	int err = EINVAL;
+	uint32_t end_blk = (start_blk + length) - 1;
+
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		/* Figure out what the starting / ending block's summary bits are */
+		uint32_t start_bit;
+		uint32_t end_bit;
+		uint32_t current_bit;
+
+		err = hfs_get_summary_index (hfsmp, start_blk, &start_bit);
+		if (err) {
+			goto release_err;
+		}
+		err = hfs_get_summary_index (hfsmp, end_blk, &end_bit);
+		if (err) {
+			goto release_err;
+		}
+
+		if (ALLOC_DEBUG) {
+			if (start_bit > end_bit) {
+				panic ("HFS: start > end!, %d %d ", start_bit, end_bit);
+			}
+		}
+		current_bit = start_bit;
+		while (current_bit <= end_bit) {
+			err = hfs_set_summary (hfsmp, current_bit, 0); 
+			current_bit++;
+		}
+	}
+
+release_err:
+	return err;
+}
+
+/*
+ * hfs_find_summary_free
+ * 
+ * Given a allocation block as input, returns an allocation block number as output as a 
+ * suggestion for where to start scanning the bitmap in order to find free blocks.  It will
+ * determine the vcbVBMIOsize of the input allocation block, convert that into a summary
+ * bit, then keep iterating over the summary bits in order to find the first free one.
+ * 
+ * Inputs:
+ *		hfsmp 		- hfs mount
+ * 		block		- starting allocation block
+ * 		newblock 	- output block as suggestion
+ * 
+ * Returns:
+ * 		0 on success
+ * 		ENOSPC if we could not find a free block 
+ */
+
+int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block,  uint32_t *newblock) {
+
+	int err = ENOSPC;
+	uint32_t bit_index = 0;
+	uint32_t maybe_has_blocks = 0;
+
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		uint32_t byte_index;
+		uint8_t curbyte;
+		uint8_t bit_in_byte;
+		uint32_t summary_cap;
+
+		/* 
+		 * We generate a cap for the summary search because the summary table
+		 * always represents a full summary of the bitmap FILE, which may
+		 * be way more bits than are necessary for the actual filesystem 
+		 * whose allocations are mapped by the bitmap.
+		 * 
+		 * Compute how much of hfs_summary_size is useable for the given number
+		 * of allocation blocks eligible on this FS.
+		 */
+		err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit - 1, &summary_cap);
+		if (err) {
+			goto summary_exit;
+		}
+
+		/* Check the starting block first */
+		err = hfs_check_summary (hfsmp, block, &maybe_has_blocks);
+		if (err) {
+			goto summary_exit;
+		}
+
+		if (maybe_has_blocks) {
+			/* 
+			 * It looks like the initial start block could have something.  
+			 * Short-circuit and just use that.
+			 */
+			*newblock = block;
+			goto summary_exit;
+		}
+
+		/*
+		 * OK, now we know that the first block was useless.  
+		 * Get the starting summary bit, and find it in the array 
+		 */
+		maybe_has_blocks = 0;
+		err = hfs_get_summary_index (hfsmp, block, &bit_index);
+		if (err) {
+			goto summary_exit;
+		}
+
+		/* Iterate until we find something. */
+		while (bit_index <= summary_cap) {
+			byte_index = bit_index / kBitsPerByte;
+			curbyte = hfsmp->hfs_summary_table[byte_index];
+			bit_in_byte = bit_index % kBitsPerByte;
+
+			if (curbyte & (1 << bit_in_byte)) {
+				/* nothing here.  increment and move on */
+				bit_index++;
+			}
+			else {
+				/* 
+				 * found something! convert bit_index back into 
+				 * an allocation block for use. 'newblock' will now
+				 * contain the proper allocation block # based on the bit
+				 * index.
+				 */
+				err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock);	
+				if (err) {
+					goto summary_exit;
+				}
+				maybe_has_blocks = 1;
+				break;
+			}
+		}
+
+		/* If our loop didn't find anything, set err to ENOSPC */
+		if (maybe_has_blocks == 0) {
+			err = ENOSPC;
+		}
+	}
+
+	/* If the summary table is not active for this mount, we'll just return ENOSPC */
+summary_exit:
+	if (maybe_has_blocks) {
+		err = 0;
+	}
+
+	return err;
+}
+
+/*
+ * hfs_get_summary_allocblock
+ * 
+ * Convert a summary bit into an allocation block number to use to start searching for free blocks.
+ * 
+ * Inputs:
+ *		hfsmp 			- hfs mount
+ * 		summarybit 		- summmary bit index 
+ *		*alloc			- allocation block number in the bitmap file.
+ *
+ * Output:
+ *		0 on success
+ * 		EINVAL on failure
+ */
+int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t
+		summarybit, uint32_t *alloc) {
+	uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+	uint32_t allocblk;
+
+	allocblk = summarybit * bits_per_iosize;
+
+	if (allocblk >= hfsmp->totalBlocks) {
+		return EINVAL;
+	}
+	else {
+		*alloc = allocblk;
+	}
+
+	return 0;
+}
+
+
+/*
+ * hfs_set_summary:
+ * 
+ * This function should be used to manipulate the summary table 
+ *
+ * The argument 'inuse' will set the value of the bit in question to one or zero
+ * depending on its value.
+ *
+ * Inputs:
+ * 		hfsmp 		- hfs mount
+ *		summarybit	- the bit index into the summary table to set/unset.
+ * 		inuse		- the value to assign to the bit.
+ *
+ * Returns:
+ * 		0 on success
+ *		EINVAL on error
+ * 	
+ */
+
+static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) {
+
+	int err = EINVAL;
+	if (hfsmp->vcbVBMIOSize) {
+		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {	
+
+			if (ALLOC_DEBUG) {
+				if (hfsmp->hfs_summary_table == NULL) {
+					panic ("hfs_set_summary: no table for %p ", hfsmp);
+				}
+			}
+
+			/* Ok, now that we have the bit index into the array, what byte is it in ? */
+			uint32_t byte_index = summarybit / kBitsPerByte;
+			uint8_t current_byte = hfsmp->hfs_summary_table[byte_index];
+			uint8_t bit_in_byte = summarybit % kBitsPerByte;
+
+			if (inuse) {
+				current_byte = (current_byte | (1 << bit_in_byte));
+			}
+			else {
+				current_byte = (current_byte & ~(1 << bit_in_byte));
+			}
+
+			hfsmp->hfs_summary_table[byte_index] = current_byte;
+		}
+		err = 0;
+	}
+
+	return err;
+}
+
+
+/*
+ * hfs_get_summary_index:
+ *
+ * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth
+ * of IO against the bitmap file.
+ * 
+ * Returns:
+ *		0 on success
+ * 		EINVAL on failure
+ */
+static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) {
+	uint32_t summary_bit;
+	uint32_t bits_per_iosize;
+	int err = EINVAL;
+
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		/* Is the input block bigger than the total number of blocks? */
+		if (block >= hfsmp->totalBlocks) {
+			return EINVAL;
+		}
+
+		/* Is there even a vbmIOSize set? */
+		if (hfsmp->vcbVBMIOSize == 0) {
+			return EINVAL;
+		}
+
+		bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+		summary_bit = block / bits_per_iosize;
+
+		*index = summary_bit;
+		err = 0;
+	}
+
+	return err;
+}
+
+/*
+ * hfs_init_summary
+ * 
+ * From a given mount structure, compute how big the summary table should be for the given
+ * filesystem, then allocate and bzero the memory.
+ *
+ * Returns:
+ * 0 on success
+ * EINVAL on failure
+ */
+int
+hfs_init_summary (struct hfsmount *hfsmp) {
+
+	uint32_t summary_size;	
+	uint32_t summary_size_bytes;
+	uint8_t *summary_table;
+
+	if (hfsmp->hfs_allocation_cp == NULL) {
+		if (ALLOC_DEBUG) {
+			printf("hfs: summary table cannot progress without a bitmap cnode! \n");
+		}
+		return EINVAL;
+	}
+	/* 
+	 * The practical maximum size of the summary table is 16KB:  
+	 *
+	 *		(512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte.
+	 * 
+	 * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but
+	 * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes
+	 * supporting this much worse.  The math would instead look like this:
+	 * (512MB / 2k) / 8 == 32k. 
+	 * 
+	 * So, we will disallow the summary table if the allocation block size is < 4k.
+	 */
+
+	if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) {
+		printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize);
+		return EINVAL;
+	}
+
+	summary_size = hfsmp->hfs_allocation_cp->c_blocks;
+
+	if (ALLOC_DEBUG) {
+		printf("HFS Summary Table Initialization: Bitmap %u blocks\n", 
+				hfsmp->hfs_allocation_cp->c_blocks);
+	}
+
+	/*
+	 * If the bitmap IO size is not the same as the allocation block size then
+	 * then re-compute the number of summary bits necessary.  Note that above, the 
+	 * the default size is the number of allocation blocks in the bitmap *FILE* 
+	 * (not the number of bits in the bitmap itself).  If the allocation block size
+	 * is large enough though, we may need to increase this. 
+	 */
+	if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
+		uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
+		lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
+
+		/* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
+		summary_size = (uint32_t) lrg_size;
+	}
+
+	/* 
+	 * If the block size is the same as the IO Size, then the total number of blocks
+	 * is already equal to the number of IO units, which is our number of summary bits.
+	 */
+
+	summary_size_bytes = summary_size / kBitsPerByte;
+	/* Always add one byte, just in case we have a dangling number of bits */
+	summary_size_bytes++;
+
+	if (ALLOC_DEBUG) {
+		printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size); 
+		printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes); 
+	}
+
+	/* Store the field in the mount point */
+	hfsmp->hfs_summary_size = summary_size;
+	hfsmp->hfs_summary_bytes = summary_size_bytes;
+
+	summary_table = hfs_mallocz(summary_size_bytes);
+
+	/* enable the summary table */
+	hfsmp->hfs_flags |= HFS_SUMMARY_TABLE;
+	hfsmp->hfs_summary_table = summary_table;
+
+	if (ALLOC_DEBUG) {
+		if (hfsmp->hfs_summary_table == NULL) {
+			panic ("HFS Summary Init: no table for %p\n", hfsmp);
+		}
+	}
+	return 0;
+}
+
+/*
+ * hfs_rebuild_summary
+ *
+ * This function should be used to allocate a new hunk of memory for use as a summary
+ * table, then copy the existing data into it.  We use it whenever the filesystem's size
+ * changes.  When a resize is in progress, you can still use the extant summary
+ * table if it is active.
+ * 
+ * Inputs:
+ * 		hfsmp 		-- FS in question
+ * 		newlength	-- new length of the FS in allocation blocks.
+ *
+ * Outputs: 
+ *		0 on success, EINVAL on failure.  If this function fails,  the summary table
+ * 		will be disabled for future use.
+ *
+ */
+static int hfs_rebuild_summary (struct hfsmount *hfsmp) {
+
+	uint32_t new_summary_size;
+
+	new_summary_size = hfsmp->hfs_allocation_cp->c_blocks;
+
+
+	if (ALLOC_DEBUG) {
+		printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size);
+	}
+
+	/* 
+	 * If the bitmap IO size is not the same as the allocation block size, then re-compute
+	 * the number of summary bits necessary.  Note that above, the default size is the number
+	 * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages).
+	 * If the allocation block size is large enough though, we may need to increase this, as 
+	 * bitmap IO is capped at 64k per IO
+	 */
+	if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) {
+		uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize;
+		lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize;
+
+		/* With a full bitmap and 64k-capped iosize chunks, this would be 64k */
+		new_summary_size = (uint32_t) lrg_size;
+	}
+
+	/* 
+	 * Ok, we have the new summary bitmap theoretical max size.  See if it's the same as 
+	 * what we've got already...
+	 */
+	if (new_summary_size != hfsmp->hfs_summary_size) {
+		uint32_t summarybytes = new_summary_size / kBitsPerByte;
+		uint32_t copysize;
+		uint8_t *newtable;
+		/* Add one byte for slop */
+		summarybytes++;
+
+		if (ALLOC_DEBUG) {
+			printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size);
+			printf("HFS Summary Table Size (in bytes) %d \n", summarybytes);
+		}
+
+		newtable = hfs_mallocz(summarybytes);
+
+		/* 
+		 * The new table may be smaller than the old one. If this is true, then
+		 * we can't copy the full size of the existing summary table into the new
+		 * one. 
+		 * 
+		 * The converse is not an issue since we bzeroed the table above. 
+		 */ 
+		copysize = hfsmp->hfs_summary_bytes;
+		if (summarybytes < hfsmp->hfs_summary_bytes) {	
+			copysize = summarybytes;
+		}
+		memcpy (newtable, hfsmp->hfs_summary_table, copysize); 
+
+		/* We're all good.  Destroy the old copy and update ptrs */
+		hfs_free(hfsmp->hfs_summary_table, hfsmp->hfs_summary_bytes);
+
+		hfsmp->hfs_summary_table = newtable;
+		hfsmp->hfs_summary_size = new_summary_size;	
+		hfsmp->hfs_summary_bytes = summarybytes;
+	}
+
+	return 0;
+}
+
+
+#if ALLOC_DEBUG
+/* 
+ * hfs_validate_summary
+ * 
+ * Validation routine for the summary table.  Debug-only function.
+ * 
+ * Bitmap lock must be held.
+ *
+ */
+void hfs_validate_summary (struct hfsmount *hfsmp) {
+	uint32_t i;
+	int err;
+
+	/* 
+	 * Iterate over all of the bits in the summary table, and verify if 
+	 * there really are free blocks in the pages that we believe may
+	 * may contain free blocks.
+	 */
+
+	if (hfsmp->hfs_summary_table == NULL) {
+		panic ("HFS Summary: No HFS summary table!");
+	}	
+
+	/* 131072 bits == 16384 bytes.  This is the theoretical max size of the summary table. we add 1 byte for slop */
+	if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) {
+		panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size);
+	}
+
+	if (hfsmp->vcbVBMIOSize == 0) {
+		panic("HFS Summary: no VCB VBM IO Size !");
+	}
+
+	printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN);
+	printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks);
+
+
+	/* iterate through all possible summary bits */
+	for (i = 0; i < hfsmp->hfs_summary_size ; i++) {
+
+		uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte;
+		uint32_t byte_offset = hfsmp->vcbVBMIOSize * i;
+
+		/* Compute the corresponding allocation block for the summary bit. */
+		uint32_t alloc_block = i * bits_per_iosize;
+
+		/* 
+		 * We use a uint32_t pointer here because it will speed up 
+		 * access to the real bitmap data on disk. 
+		 */
+		uint32_t *block_data;
+		struct buf *bp;
+		int counter;
+		int counter_max;
+		int saw_free_bits = 0;
+
+		/* Get the block */
+		if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data,  &bp))) {
+			panic ("HFS Summary: error (%d) in ReadBitmapRange!", err);
+		}
+
+		/* Query the status of the bit and then make sure we match */
+		uint32_t maybe_has_free_blocks;
+		err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks);
+		if (err) {
+			panic ("HFS Summary: hfs_check_summary returned error (%d) ", err);
+		}
+		counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+		for (counter = 0; counter < counter_max; counter++) {
+			uint32_t word = block_data[counter];
+
+			/* We assume that we'll not find any free bits here. */
+			if (word != kAllBitsSetInWord) {
+				if (maybe_has_free_blocks) {
+					/* All done */
+					saw_free_bits = 1;
+					break;
+				}
+				else {
+					panic ("HFS Summary: hfs_check_summary saw free bits!");
+				}
+			}
+		}
+
+		if (maybe_has_free_blocks && (saw_free_bits == 0)) {
+			panic ("HFS Summary: did not see free bits !");	
+		}
+
+		/* Release the block. */
+		if ((err =  ReleaseScanBitmapRange (bp))) {
+			panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err);
+		}
+	}
+
+	printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN);
+
+	return;
+}
+#endif
+
+/*
+ * hfs_alloc_scan_range:
+ *
+ * This function should be used to scan large ranges of the allocation bitmap
+ * at one time.  It makes two key assumptions:
+ * 
+ * 		1) Bitmap lock is held during the duration of the call (exclusive)
+ * 		2) There are no pages in the buffer cache for any of the bitmap 
+ * 		blocks that we may encounter.  It *MUST* be completely empty.
+ * 
+ * The expected use case is when we are scanning the bitmap in full while we are 
+ * still mounting the filesystem in order to issue TRIMs or build up the summary 
+ * table for the mount point. It should be done after any potential journal replays
+ * are completed and their I/Os fully issued.
+ * 
+ * The key reason for assumption (2) above is that this function will try to issue 
+ * I/O against the bitmap file in chunks as large a possible -- essentially as 
+ * much as the buffer layer will handle (1MB).  Because the size of these I/Os 
+ * is larger than what would be expected during normal runtime we must invalidate 
+ * the buffers as soon as we are done with them so that they do not persist in 
+ * the buffer cache for other threads to find, as they'll typically be doing 
+ * allocation-block size I/Os instead.
+ * 
+ * Input Args:
+ *		hfsmp 		- hfs mount data structure
+ * 		startbit 	- allocation block # to start our scan. It must be aligned
+ *					on a vcbVBMIOsize boundary.
+ *		list		- journal trim list data structure for issuing TRIMs
+ *
+ * Output Args:
+ *		bitToScan 	- Return the next bit to scan if this function is called again. 
+ *					Caller will supply this into the next invocation
+ *					of this call as 'startbit'. 	
+ */
+
+static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit, 
+		u_int32_t *bitToScan, struct jnl_trim_list *list) {
+
+	int error;
+	int readwrite = 1;
+	u_int32_t curAllocBlock;
+	struct buf *blockRef = NULL;
+	u_int32_t *buffer = NULL;
+	u_int32_t free_offset = 0; //tracks the start of the current free range
+	u_int32_t size = 0; // tracks the length of the current free range.
+	u_int32_t iosize = 0; //how much io we should generate against the bitmap
+	u_int32_t byte_off; // byte offset into the bitmap file.
+	u_int32_t completed_size; // how much io was actually completed
+	u_int32_t last_bitmap_block;
+	u_int32_t current_word;	
+	u_int32_t word_index = 0;	
+
+	/* summary table building */
+	uint32_t summary_bit = 0;
+	uint32_t saw_free_blocks = 0;
+	uint32_t last_marked = 0;
+
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		readwrite = 0;
+	}
+
+	/* 
+	 * Compute how much I/O we should generate here.
+	 * hfs_scan_range_size will validate that the start bit 
+	 * converted into a byte offset into the bitmap file,
+	 * is aligned on a VBMIOSize boundary. 
+	 */
+	error = hfs_scan_range_size (hfsmp, startbit, &iosize);
+	if (error) {
+		if (ALLOC_DEBUG) {
+			panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error);
+		}
+		return error;
+	}
+
+	if (iosize < hfsmp->vcbVBMIOSize) {
+		if (ALLOC_DEBUG) {
+			panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize);
+		}
+		return EINVAL;
+	}
+
+	/* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
+	byte_off = startbit / kBitsPerByte;
+
+	/*
+	 * When the journal replays blocks, it does so by writing directly to the disk
+	 * device (bypassing any filesystem vnodes and such).  When it finishes its I/Os
+	 * it also immediately re-reads and invalidates the range covered by the bp so
+	 * it does not leave anything lingering in the cache (for iosize reasons).  
+	 * 
+	 * As such, it is safe to do large I/Os here with ReadBitmapRange. 
+	 *
+	 * NOTE: It is not recommended, but it is possible to call the function below
+	 * on sections of the bitmap that may be in core already as long as the pages are not
+	 * dirty.  In that case, we'd notice that something starting at that
+	 * logical block of the bitmap exists in the metadata cache, and we'd check 
+	 * if the iosize requested is the same as what was already allocated for it.  
+	 * Odds are pretty good we're going to request something larger.  In that case, 
+	 * we just free the existing memory associated with the buf and reallocate a 
+	 * larger range. This function should immediately invalidate it as soon as we're 
+	 * done scanning, so this shouldn't cause any coherency issues.
+	 */
+
+	error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef);
+	if (error) {
+		if (ALLOC_DEBUG) {
+			panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error);
+		}
+		return error;
+	}
+
+	/* 
+	 * At this point, we have a giant wired buffer that represents some portion of
+	 * the bitmap file that we want to analyze.   We may not have gotten all 'iosize'
+	 * bytes though, so clip our ending bit to what we actually read in.
+	 */
+	completed_size = buf_count(blockRef);
+	last_bitmap_block = completed_size * kBitsPerByte;
+	last_bitmap_block = last_bitmap_block + startbit;
+
+	/* Cap the last block to the total number of blocks if required */
+	if (last_bitmap_block > hfsmp->totalBlocks) {
+		last_bitmap_block = hfsmp->totalBlocks;
+	}	
+
+	/* curAllocBlock represents the logical block we're analyzing. */
+	curAllocBlock = startbit;	
+	word_index = 0;
+	size = 0;
+
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) {
+			error = EINVAL;
+			if (ALLOC_DEBUG) {
+				panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit);
+			}
+			return error;
+		}
+		/* 
+		 * summary_bit should now be set to the summary bit corresponding to
+		 * the allocation block of the first bit that we're supposed to scan
+		 */ 
+	}
+	saw_free_blocks = 0;
+
+	while (curAllocBlock < last_bitmap_block) {
+		u_int32_t bit;
+
+		/* Update the summary table as needed */
+		if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+			if (ALLOC_DEBUG) {
+				if (hfsmp->hfs_summary_table == NULL) {
+					panic ("hfs_alloc_scan_range: no summary table!");
+				}
+			}	
+
+			uint32_t temp_summary;
+			error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary);
+			if (error) {
+				if (ALLOC_DEBUG) {
+					panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock);
+				}
+				return EINVAL;
+			}
+
+			if (ALLOC_DEBUG) {
+				if (temp_summary < summary_bit) {
+					panic ("hfs_alloc_scan_range: backwards summary bit?\n");
+				}
+			}
+
+			/* 
+			 * If temp_summary is greater than summary_bit, then this
+			 * means that the next allocation block crosses a vcbVBMIOSize boundary
+			 * and we should treat this range of on-disk data as part of a new summary
+			 * bit.
+			 */ 
+			if (temp_summary > summary_bit) {
+				if (saw_free_blocks == 0) {
+					/* Mark the bit as totally consumed in the summary table */
+					hfs_set_summary (hfsmp, summary_bit, 1);
+				}
+				else {
+					/* Mark the bit as potentially free in summary table */
+					hfs_set_summary (hfsmp, summary_bit, 0);
+				}
+				last_marked = summary_bit;
+				/* 
+				 * Any time we set the summary table, update our counter which tracks
+				 * what the last bit that was fully marked in the summary table. 
+				 *  
+				 * Then reset our marker which says we haven't seen a free bit yet.
+				 */
+				saw_free_blocks = 0;
+				summary_bit = temp_summary;
+			}
+		} /* End summary table conditions */
+
+		current_word = SWAP_BE32(buffer[word_index]);
+		/* Iterate through the word 1 bit at a time... */
+		for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) {
+			if (curAllocBlock >= last_bitmap_block) {
+				break;
+			}
+			u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit));
+
+			if (allocated) { 
+				if (size != 0) {
+					if (readwrite) {
+						/* Insert the previously tracked range of free blocks to the trim list */
+						hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
+					}
+					add_free_extent_cache (hfsmp, free_offset, size);
+					size = 0;
+					free_offset = 0;
+				}
+			}
+			else {
+				/* Not allocated */
+				size++;
+				if (free_offset == 0) {
+					/* Start a new run of free spcae at curAllocBlock */
+					free_offset = curAllocBlock;
+				}
+				if (saw_free_blocks == 0) {
+					saw_free_blocks = 1;
+				}
+			}
+		} /* end for loop iterating through the word */
+
+		if (curAllocBlock < last_bitmap_block) {
+			word_index++;
+		}
+
+	} /* End while loop (iterates through last_bitmap_block) */
+
+
+	/* 
+	 * We've (potentially) completed our pass through this region of bitmap, 
+	 * but one thing we may not have done is updated that last summary bit for 
+	 * the last page we scanned, because we would have never transitioned across 
+	 * a vcbVBMIOSize boundary again.  Check for that and update the last bit
+	 * as needed.
+	 * 
+	 * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap
+	 * for the region of bitmap on-disk that we were scanning. (it is one greater).
+	 */
+	if ((curAllocBlock >= last_bitmap_block) && 
+			(hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { 
+		uint32_t temp_summary;
+		/* temp_block should be INSIDE the region we just scanned, so subtract 1 */
+		uint32_t temp_block = last_bitmap_block - 1;
+		error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary);
+		if (error) {
+			if (ALLOC_DEBUG) {
+				panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block);
+			}
+			return EINVAL;
+		}
+
+		/* Did we already update this in the table? */
+		if (temp_summary > last_marked) {
+			if (saw_free_blocks == 0) {
+				hfs_set_summary (hfsmp, temp_summary, 1);
+			}
+			else {
+				hfs_set_summary (hfsmp, temp_summary, 0);
+			}
+		}
+	}
+
+	/* 
+	 * We may have been tracking a range of free blocks that hasn't been inserted yet. 
+	 * Keep the logic for the TRIM and free extent separate from that of the summary 
+	 * table management even though they are closely linked.
+	 */
+	if (size != 0) {
+		if (readwrite) {
+			hfs_track_unmap_blocks (hfsmp, free_offset, size, list);
+		}
+		add_free_extent_cache (hfsmp, free_offset, size);
+	}
+
+	/* 
+	 * curAllocBlock represents the next block we need to scan when we return
+	 * to this function. 
+	 */
+	*bitToScan = curAllocBlock;
+	ReleaseScanBitmapRange(blockRef);
+
+	return 0;
+
+}
+
+
+
+/*
+ * Compute the maximum I/O size to generate against the bitmap file
+ * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap. 
+ * 
+ * Inputs:
+ *		hfsmp		-- hfsmount to look at 
+ *		bitmap_off 	-- bit offset into the bitmap file
+ *	
+ * Outputs:
+ * 		iosize	-- iosize to generate.
+ *
+ * Returns:
+ *		0 on success; EINVAL otherwise 
+ */
+static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) {
+
+	/* 
+	 * The maximum bitmap size is 512MB regardless of ABN size, so we can get away
+	 * with 32 bit math in this function.
+	 */
+
+	uint32_t bitmap_len;
+	uint32_t remaining_bitmap;
+	uint32_t target_iosize;
+	uint32_t bitmap_off; 
+
+	/* Is this bit index not word aligned?  If so, immediately fail. */
+	if (bitmap_st % kBitsPerWord) {
+		if (ALLOC_DEBUG) {
+			panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st);
+		}
+		return EINVAL;
+	}
+
+	/* bitmap_off is in bytes, not allocation blocks/bits */
+	bitmap_off = bitmap_st / kBitsPerByte;
+
+	if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) {
+		if (ALLOC_DEBUG) {
+			panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off);
+		}
+		return EINVAL;
+	}
+
+	/* 
+	 * Also invalid if it's not at least aligned to HFS bitmap logical
+	 * block boundaries.  We don't have to emit an iosize that's an 
+	 * exact multiple of the VBMIOSize, but it must start on such 
+	 * a boundary.
+	 *
+	 * The vcbVBMIOSize may be SMALLER than the allocation block size
+	 * on a FS with giant allocation blocks, but it will never be
+	 * greater than it, so it should be safe to start I/O
+	 * aligned on a VBMIOsize boundary. 
+	 */
+	if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) {
+		if (ALLOC_DEBUG) {
+			panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off);
+		}
+		return EINVAL;
+	}
+
+	/* 
+	 * Generate the total bitmap file length in bytes, then round up
+	 * that value to the end of the last allocation block, if needed (It 
+	 * will probably be needed).  We won't scan past the last actual 
+	 * allocation block.  
+	 *
+	 * Unless we're completing the bitmap scan (or bitmap < 1MB), we
+	 * have to complete the I/O on VBMIOSize boundaries, but we can only read
+	 * up until the end of the bitmap file.
+	 */
+	bitmap_len = roundup(hfsmp->totalBlocks, hfsmp->blockSize * 8) / 8;
+
+	remaining_bitmap = bitmap_len - bitmap_off;
+
+	/* 
+	 * io size is the MIN of the maximum I/O we can generate or the
+	 * remaining amount of bitmap.
+	 */
+	target_iosize = MIN((MAXBSIZE), remaining_bitmap);
+	*iosize = target_iosize;
+
+	return 0;
+}
+
+
+
+
+/*
+ * This function is basically the same as hfs_isallocated, except it's designed for 
+ * use with the red-black tree validation code.  It assumes we're only checking whether
+ * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
+ * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
+ *
+ * This should not be called in general purpose scanning code.
+ */
+int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
+
+	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+	u_int32_t  numBits;        // Number of bits in word to allocate
+	u_int32_t  bitsPerBlock;
+	uintptr_t  blockRef = 0;
+	u_int32_t  numBlocks = 1;
+	u_int32_t  *buffer = NULL;
+
+	int  inuse = 0;
+	int error;
+
+
+	if (bp_buf) {
+		/* just use passed-in buffer if avail. */
+		buffer = bp_buf;
+	}
+	else {
+		/*
+		 * Pre-read the bitmap block containing the first word of allocation
+		 */
+		error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef,
+								HFS_ALLOC_IGNORE_TENTATIVE);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Initialize currentWord, and wordsLeft.
+	 */
+	u_int32_t wordIndexInBlock;
+
+	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+	currentWord = buffer + wordIndexInBlock;
+
+	/*
+	 * First test any non word aligned bits.
+	 */
+	firstBit = startingBlock % kBitsPerWord;
+	bitMask = kAllBitsSetInWord >> firstBit;
+	numBits = kBitsPerWord - firstBit;
+	if (numBits > numBlocks) {
+		numBits = numBlocks;
+		bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+	}
+	if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+		inuse = 1;
+		goto Exit;
+	}
+	++currentWord;
+
+Exit:
+	if(bp_buf == NULL) {
+		if (buffer) {
+			(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+		}
+	}
+	return (inuse);
+
+
+
+}
+
+/*
+ * This function resets all of the data structures relevant to the
+ * free extent cache stored in the hfsmount struct.  
+ * 
+ * If we are using the red-black tree code then we need to account for the fact that 
+ * we may encounter situations where we need to jettison the tree.  If that is the 
+ * case, then we fail-over to the bitmap scanning logic, but we need to ensure that 
+ * the free ext cache is zeroed before we start using it.  
+ *
+ * We also reset and disable the cache when allocLimit is updated... which 
+ * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()). 
+ * It is independent of the type of allocator being used currently.
+ */
+void ResetVCBFreeExtCache(struct hfsmount *hfsmp) 
+{
+	int bytes;
+	void *freeExt;
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+	/* reset Free Extent Count */
+	hfsmp->vcbFreeExtCnt = 0;
+
+	/* reset the actual array */
+	bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
+	freeExt = (void*)(hfsmp->vcbFreeExt);
+
+	bzero (freeExt, bytes);
+
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+	return;
+}
+
+/*
+ * This function is used to inform the allocator if we have to effectively shrink
+ * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs. 
+ *
+ * The bitmap lock must be held when calling this function.  This function also modifies the
+ * allocLimit field in the hfs mount point structure in the general case. 
+ * 
+ * In the shrinking case, we'll have to remove all free extents from the red-black
+ * tree past the specified offset new_end_block.  In the growth case, we'll have to force
+ * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
+ * 
+ * new_end_block represents the total number of blocks available for allocation in the resized
+ * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
+ * will be out of the (0, n-1) range that are indexable in the bitmap.
+ *
+ * Returns	0 on success
+ *			errno on failure
+ */
+u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
+
+	/* 
+	 * Update allocLimit to the argument specified
+	 */
+	hfsmp->allocLimit = new_end_block;
+
+	/* Invalidate the free extent cache completely so that 
+	 * it does not have any extents beyond end of current 
+	 * volume.
+	 */
+	ResetVCBFreeExtCache(hfsmp);
+
+	/* Force a rebuild of the summary table. */
+	(void) hfs_rebuild_summary (hfsmp);
+
+	// Delete any tentative ranges that are in the area we're shrinking
+	struct rl_entry *range, *next_range;
+	TAILQ_FOREACH_SAFE(range, &hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS],
+					   rl_link, next_range) {
+		if (rl_overlap(range, new_end_block, RL_INFINITY) != RL_NOOVERLAP)
+			hfs_release_reserved(hfsmp, range, HFS_TENTATIVE_BLOCKS);
+	}
+
+	return 0;
+}
+
+/*
+ * Remove an extent from the list of free extents.
+ *
+ * This is a low-level routine.	 It does not handle overlaps or splitting;
+ * that is the responsibility of the caller.  The input extent must exactly
+ * match an extent already in the list; it will be removed, and any following
+ * extents in the list will be shifted up.
+ *
+ * Inputs:
+ *	startBlock - Start of extent to remove
+ *	blockCount - Number of blocks in extent to remove
+ *
+ * Result:
+ *	The index of the extent that was removed.
+ */
+static void remove_free_extent_list(struct hfsmount *hfsmp, int index)
+{
+	if (index < 0 || (uint32_t)index >= hfsmp->vcbFreeExtCnt) {
+		if (ALLOC_DEBUG)
+			panic("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+		else
+			printf("hfs: remove_free_extent_list: %p: index (%d) out of range (0, %u)", hfsmp, index, hfsmp->vcbFreeExtCnt);
+		return;
+	}
+	int shift_count = hfsmp->vcbFreeExtCnt - index - 1;
+	if (shift_count > 0) {
+		memmove(&hfsmp->vcbFreeExt[index], &hfsmp->vcbFreeExt[index+1], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+	}
+	hfsmp->vcbFreeExtCnt--;
+}
+
+
+/*
+ * Add an extent to the list of free extents.
+ *
+ * This is a low-level routine.	 It does not handle overlaps or coalescing;
+ * that is the responsibility of the caller.  This routine *does* make
+ * sure that the extent it is adding is inserted in the correct location.
+ * If the list is full, this routine will handle either removing the last
+ * extent in the list to make room for the new extent, or ignoring the
+ * new extent if it is "worse" than the last extent in the list.
+ *
+ * Inputs:
+ *	startBlock - Start of extent to add
+ *	blockCount - Number of blocks in extent to add
+ *
+ * Result:
+ *	The index where the extent that was inserted, or kMaxFreeExtents
+ *	if the extent was not inserted (the list was full, and the extent
+ *	being added was "worse" than everything in the list).
+ */
+static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+	uint32_t i;
+
+	/* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */
+	if (ALLOC_DEBUG) {
+		uint32_t endBlock = startBlock + blockCount;
+		for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+			if (endBlock < hfsmp->vcbFreeExt[i].startBlock ||
+					startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) {
+				continue;
+			}
+			panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d",
+					hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i);
+		}
+	}	 
+
+	/* Figure out what index the new extent should be inserted at. */
+	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			/* The list is sorted by increasing offset. */
+			if (startBlock < hfsmp->vcbFreeExt[i].startBlock) {
+				break;
+			}
+		} else {
+			/* The list is sorted by decreasing size. */
+			if (blockCount > hfsmp->vcbFreeExt[i].blockCount) {
+				break;
+			}
+		}
+	}
+
+	/* When we get here, i is the index where the extent should be inserted. */
+	if (i == kMaxFreeExtents) {
+		/*
+		 * The new extent is worse than anything already in the list,
+		 * and the list is full, so just ignore the extent to be added.
+		 */
+		return i;
+	}
+
+	/*
+	 * Grow the list (if possible) to make room for an insert.
+	 */
+	if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents)
+		hfsmp->vcbFreeExtCnt++;
+
+	/*
+	 * If we'll be keeping any extents after the insert position, then shift them.
+	 */
+	int shift_count = hfsmp->vcbFreeExtCnt - i - 1;
+	if (shift_count > 0) {
+		memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0]));
+	}
+
+	/* Finally, store the new extent at its correct position. */
+	hfsmp->vcbFreeExt[i].startBlock = startBlock;
+	hfsmp->vcbFreeExt[i].blockCount = blockCount;
+	return i;
+}
+
+
+/*
+ * Remove an entry from free extent cache after it has been allocated.
+ *
+ * This is a high-level routine.  It handles removing a portion of a
+ * cached extent, potentially splitting it into two (if the cache was
+ * already full, throwing away the extent that would sort last).  It
+ * also handles removing an extent that overlaps multiple extents in
+ * the cache.
+ *
+ * Inputs: 
+ *	hfsmp		- mount point structure 
+ *	startBlock	- starting block of the extent to be removed. 
+ *	blockCount	- number of blocks of the extent to be removed.
+ */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+	u_int32_t i, insertedIndex;
+	u_int32_t currentStart, currentEnd, endBlock;
+	int extentsRemoved = 0;
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+	endBlock = startBlock + blockCount;
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+	/*
+	 * Iterate over all of the extents in the free extent cache, removing or
+	 * updating any entries that overlap with the input extent.
+	 */
+	for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) {
+		currentStart = hfsmp->vcbFreeExt[i].startBlock;
+		currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount;
+
+		/*
+		 * If the current extent is entirely before or entirely after the
+		 * the extent to be removed, then we keep it as-is.
+		 */
+		if (currentEnd <= startBlock || currentStart >= endBlock) {
+			continue;
+		}
+
+		/*
+		 * If the extent being removed entirely contains the current extent,
+		 * then remove the current extent.
+		 */
+		if (startBlock <= currentStart && endBlock >= currentEnd) {
+			remove_free_extent_list(hfsmp, i);
+
+			/*
+			 * We just removed the extent at index i.  The extent at
+			 * index i+1 just got shifted to index i.  So decrement i
+			 * to undo the loop's "++i", and the next iteration will
+			 * examine index i again, which contains the next extent
+			 * in the list.
+			 */
+			--i;
+			++extentsRemoved;
+			continue;
+		}
+
+		/*
+		 * If the extent being removed is strictly "in the middle" of the
+		 * current extent, then we need to split the current extent into
+		 * two discontiguous extents (the "head" and "tail").  The good
+		 * news is that we don't need to examine any other extents in
+		 * the list.
+		 */
+		if (startBlock > currentStart && endBlock < currentEnd) {
+			remove_free_extent_list(hfsmp, i);
+			add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+			add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+			break;
+		}
+
+		/*
+		 * The only remaining possibility is that the extent to be removed
+		 * overlaps the start or end (but not both!) of the current extent.
+		 * So we need to replace the current extent with a shorter one.
+		 *
+		 * The only tricky part is that the updated extent might be at a
+		 * different index than the original extent.  If the updated extent
+		 * was inserted after the current extent, then we need to re-examine
+		 * the entry at index i, since it now contains the extent that was
+		 * previously at index i+1.	 If the updated extent was inserted
+		 * before or at the same index as the removed extent, then the
+		 * following extents haven't changed position.
+		 */
+		remove_free_extent_list(hfsmp, i);
+		if (startBlock > currentStart) {
+			/* Remove the tail of the current extent. */
+			insertedIndex = add_free_extent_list(hfsmp, currentStart, startBlock - currentStart);
+		} else {
+			/* Remove the head of the current extent. */
+			insertedIndex = add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock);
+		}
+		if (insertedIndex > i) {
+			--i;	/* Undo the "++i" in the loop, so we examine the entry at index i again. */
+		}
+	}
+
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+	sanity_check_free_ext(hfsmp, 0);
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
+
+	return;
+}
+
+
+/*
+ * Add an entry to free extent cache after it has been deallocated.	 
+ *
+ * This is a high-level routine.  It will merge overlapping or contiguous
+ * extents into a single, larger extent.
+ *
+ * If the extent provided has blocks beyond current allocLimit, it is
+ * clipped to allocLimit (so that we won't accidentally find and allocate
+ * space beyond allocLimit).
+ *
+ * Inputs: 
+ *	hfsmp		- mount point structure 
+ *	startBlock	- starting block of the extent to be removed. 
+ *	blockCount	- number of blocks of the extent to be removed.
+ *
+ * Returns:
+ *	true		- if the extent was added successfully to the list
+ *	false		- if the extent was not added to the list, maybe because 
+ *			  the extent was beyond allocLimit, or is not best 
+ *			  candidate to be put in the cache.
+ */
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+	Boolean retval = false;
+	uint32_t endBlock;
+	uint32_t currentEnd;
+	uint32_t i; 
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+#if DEBUG
+	for (i = 0; i < 2; ++i) {
+		struct rl_entry *range;
+		TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[i], rl_link) {
+			hfs_assert(rl_overlap(range, startBlock,
+							  startBlock + blockCount - 1) == RL_NOOVERLAP);
+		}
+	}
+#endif
+
+	/* No need to add extent that is beyond current allocLimit */
+	if (startBlock >= hfsmp->allocLimit) {
+		goto out_not_locked;
+	}
+
+	/* If end of the free extent is beyond current allocLimit, clip the extent */
+	if ((startBlock + blockCount) > hfsmp->allocLimit) {
+		blockCount = hfsmp->allocLimit - startBlock;
+	}
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+	/*
+	 * Make a pass through the free extent cache, looking for known extents that
+	 * overlap or are contiguous with the extent to be added.  We'll remove those
+	 * extents from the cache, and incorporate them into the new extent to be added.
+	 */
+	endBlock = startBlock + blockCount;
+	for (i=0; i < hfsmp->vcbFreeExtCnt; ++i) {
+		currentEnd = hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount;
+		if (hfsmp->vcbFreeExt[i].startBlock > endBlock || currentEnd < startBlock) {
+			/* Extent i does not overlap and is not contiguous, so keep it. */
+			continue;
+		} else {
+			/* We need to remove extent i and combine it with the input extent. */
+			if (hfsmp->vcbFreeExt[i].startBlock < startBlock)
+				startBlock = hfsmp->vcbFreeExt[i].startBlock;
+			if (currentEnd > endBlock)
+				endBlock = currentEnd;
+
+			remove_free_extent_list(hfsmp, i);
+			/*
+			 * We just removed the extent at index i.  The extent at
+			 * index i+1 just got shifted to index i.  So decrement i
+			 * to undo the loop's "++i", and the next iteration will
+			 * examine index i again, which contains the next extent
+			 * in the list.
+			 */
+			--i;
+		}
+	}
+	add_free_extent_list(hfsmp, startBlock, endBlock - startBlock);
+
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+out_not_locked:
+	sanity_check_free_ext(hfsmp, 0);
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
+
+	return retval;
+}
+
+/* Debug function to check if the free extent cache is good or not */
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
+{
+	u_int32_t i, j;
+
+	/* Do not do anything if debug is not on */
+	if (ALLOC_DEBUG == 0) {
+		return;
+	}
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+	if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents)
+		panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt);
+
+	/* 
+	 * Iterate the Free extent cache and ensure no entries are bogus or refer to
+	 * allocated blocks.
+	 */
+	for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
+		u_int32_t start, nblocks;
+
+		start   = hfsmp->vcbFreeExt[i].startBlock;
+		nblocks = hfsmp->vcbFreeExt[i].blockCount;
+
+		/* Check if any of the blocks in free extent cache are allocated.  
+		 * This should not be enabled always because it might take 
+		 * very long for large extents that get added to the list.
+		 *
+		 * We have to drop vcbFreeExtLock while we call hfs_isallocated
+		 * because it is going to do I/O.  Note that the free extent
+		 * cache could change.  That's a risk we take when using this
+		 * debugging code.  (Another alternative would be to try to
+		 * detect when the free extent cache changed, and perhaps
+		 * restart if the list changed while we dropped the lock.)
+		 */
+		if (check_allocated) {
+			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+			if (hfs_isallocated(hfsmp, start, nblocks)) {
+				panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
+						hfsmp, i, start, nblocks);
+			}
+			lck_spin_lock(&hfsmp->vcbFreeExtLock);
+		}
+
+		/* Check if any part of the extent is beyond allocLimit */
+		if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
+			panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
+					hfsmp, i, start, nblocks, hfsmp->allocLimit);
+		}
+
+		/* Check if there are any duplicate start blocks */
+		for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
+			if (start == hfsmp->vcbFreeExt[j].startBlock) {
+				panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", 
+						hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, 
+						hfsmp->vcbFreeExt[j].blockCount);
+			}
+		}
+
+		/* Check if the entries are out of order */
+		if ((i+1) != hfsmp->vcbFreeExtCnt) {
+			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+				/* sparse devices are sorted by starting block number (ascending) */
+				if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
+					panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
+							hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
+							hfsmp->vcbFreeExt[i+1].blockCount);
+				}
+			} else {
+				/* normally sorted by block count (descending) */
+				if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
+					panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
+							hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
+							hfsmp->vcbFreeExt[i+1].blockCount);
+				}
+			}
+		}
+	}
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+}
+
+#define BIT_RIGHT_MASK(bit)	(0xffffffffffffffffull >> (bit))
+
+static int clzll(uint64_t x)
+{
+	if (x == 0)
+		return 64;
+	else
+		return __builtin_clzll(x);
+}
+
+#if !HFS_ALLOC_TEST
+
+static errno_t get_more_bits(bitmap_context_t *bitmap_ctx)
+{
+	uint32_t	start_bit;
+	uint32_t	iosize = 0;
+	uint32_t	byte_offset;
+	uint32_t	last_bitmap_block;
+	int			error;
+	struct hfsmount *hfsmp = bitmap_ctx->hfsmp;
+#if !HFS_ALLOC_TEST
+	uint64_t	lock_elapsed;
+#endif
+
+
+	if (bitmap_ctx->bp)
+		ReleaseScanBitmapRange(bitmap_ctx->bp);
+	
+	if (msleep(NULL, NULL, PINOD | PCATCH,
+			   "hfs_fsinfo", NULL) == EINTR) {
+		return EINTR;
+	}
+
+#if !HFS_ALLOC_TEST
+	/*
+	 * Let someone else use the allocation map after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME .
+	 * lock_start is initialized in hfs_find_free_extents().
+	 */
+	absolutetime_to_nanoseconds(mach_absolute_time() - bitmap_ctx->lock_start, &lock_elapsed);
+
+	if (lock_elapsed >= HFS_FSINFO_MAX_LOCKHELD_TIME) {
+
+		hfs_systemfile_unlock(hfsmp, bitmap_ctx->lockflags);
+		
+		/* add tsleep here to force context switch and fairness */
+		tsleep((caddr_t)get_more_bits, PRIBIO, "hfs_fsinfo", 1);
+
+		hfs_journal_lock(hfsmp);
+
+		/* Flush the journal and wait for all I/Os to finish up */
+		error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+		if (error) {
+			hfs_journal_unlock(hfsmp);
+			return error;
+		}
+
+		/*
+		 * Take bitmap lock to ensure it is not being modified while journal is still held.
+		 * Since we are reading larger than normal blocks from the bitmap, which
+		 * might confuse other parts of the bitmap code using normal blocks, we
+		 * take exclusive lock here.
+		 */
+		bitmap_ctx->lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+		bitmap_ctx->lock_start = mach_absolute_time();
+
+		/* Release the journal lock */
+		hfs_journal_unlock(hfsmp);
+
+		/*
+		 * Bitmap is read in large block size (up to 1MB),
+		 * unlike the runtime which reads the bitmap in the
+		 * 4K block size.  If the bitmap is read by both ways
+		 * at the same time, it can result in multiple buf_t with
+		 * different sizes and potentially case data corruption.
+		 * To avoid this, we invalidate all the existing buffers
+		 * associated with the bitmap vnode.
+		 */
+		error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+		if (error) {
+			/* hfs_systemfile_unlock will be called in the caller */
+			return error;
+		}
+	}
+#endif
+
+	start_bit = bitmap_ctx->run_offset;
+
+	if (start_bit >= bitmap_ctx->hfsmp->totalBlocks) {
+		bitmap_ctx->chunk_end = 0;
+		bitmap_ctx->bp = NULL;
+		bitmap_ctx->bitmap = NULL;
+		return 0;
+	}
+
+	hfs_assert(start_bit % 8 == 0);
+
+	/*
+	 * Compute how much I/O we should generate here.
+	 * hfs_scan_range_size will validate that the start bit
+	 * converted into a byte offset into the bitmap file,
+	 * is aligned on a VBMIOSize boundary.
+	 */
+	error = hfs_scan_range_size (bitmap_ctx->hfsmp, start_bit, &iosize);
+	if (error)
+		return error;
+
+	hfs_assert(iosize != 0);
+
+	/* hfs_scan_range_size should have verified startbit.  Convert it to bytes */
+	byte_offset = start_bit / kBitsPerByte;
+
+	/*
+	 * When the journal replays blocks, it does so by writing directly to the disk
+	 * device (bypassing any filesystem vnodes and such).  When it finishes its I/Os
+	 * it also immediately re-reads and invalidates the range covered by the bp so
+	 * it does not leave anything lingering in the cache (for iosize reasons).
+	 *
+	 * As such, it is safe to do large I/Os here with ReadBitmapRange.
+	 *
+	 * NOTE: It is not recommended, but it is possible to call the function below
+	 * on sections of the bitmap that may be in core already as long as the pages are not
+	 * dirty.  In that case, we'd notice that something starting at that
+	 * logical block of the bitmap exists in the metadata cache, and we'd check
+	 * if the iosize requested is the same as what was already allocated for it.
+	 * Odds are pretty good we're going to request something larger.  In that case,
+	 * we just free the existing memory associated with the buf and reallocate a
+	 * larger range. This function should immediately invalidate it as soon as we're
+	 * done scanning, so this shouldn't cause any coherency issues.
+	 */
+	error = ReadBitmapRange(bitmap_ctx->hfsmp, byte_offset, iosize, (uint32_t **)&bitmap_ctx->bitmap, &bitmap_ctx->bp);
+	if (error)
+		return error;
+
+	/*
+	 * At this point, we have a giant wired buffer that represents some portion of
+	 * the bitmap file that we want to analyze.   We may not have gotten all 'iosize'
+	 * bytes though, so clip our ending bit to what we actually read in.
+	 */
+	last_bitmap_block = start_bit + buf_count(bitmap_ctx->bp) * kBitsPerByte;
+
+	/* Cap the last block to the total number of blocks if required */
+	if (last_bitmap_block > bitmap_ctx->hfsmp->totalBlocks)
+		last_bitmap_block = bitmap_ctx->hfsmp->totalBlocks;
+
+	bitmap_ctx->chunk_current = 0;  // new chunk of bitmap
+	bitmap_ctx->chunk_end = last_bitmap_block - start_bit;
+
+	return 0;
+}
+
+#endif // !HFS_ALLOC_TEST
+
+// Returns number of contiguous bits set at start
+static int bit_count_set(void *bitmap, int start, int end)
+{
+	if (start == end)
+		return 0;
+
+	hfs_assert(end > start);
+
+	const int start_bit = start & 63;
+	const int end_bit   = end & 63;
+
+	uint64_t *p = (uint64_t *)bitmap + start / 64;
+	uint64_t x = ~OSSwapBigToHostInt64(*p);
+
+	if ((start & ~63) == (end & ~63)) {
+		// Start and end in same 64 bits
+		x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
+		return clzll(x) - start_bit;
+	}
+
+	// Deal with initial unaligned bit
+	x &= BIT_RIGHT_MASK(start_bit);
+
+	if (x)
+		return clzll(x) - start_bit;
+
+	// Go fast
+	++p;
+	int count = 64 - start_bit;
+	int nquads = (end - end_bit - start - 1) / 64;
+
+	while (nquads--) {
+		if (*p != 0xffffffffffffffffull) {
+			x = ~OSSwapBigToHostInt64(*p);
+			return count + clzll(x);
+		}
+		++p;
+		count += 64;
+	}
+
+	if (end_bit) {
+		x = ~OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
+		count += clzll(x);
+	}
+
+	return count;
+}
+
+/* Returns the number of a run of cleared bits:
+ *  bitmap is a single chunk of memory being examined
+ *  start: the start bit relative to the current buffer to be examined; start is inclusive.
+ *  end: the end bit relative to the current buffer to be examined; end is not inclusive.
+ */
+static int bit_count_clr(void *bitmap, int start, int end)
+{
+	if (start == end)
+		return 0;
+
+	hfs_assert(end > start);
+
+	const int start_bit = start & 63;
+	const int end_bit   = end & 63;
+
+	uint64_t *p = (uint64_t *)bitmap + start / 64;
+	uint64_t x = OSSwapBigToHostInt64(*p);
+
+	if ((start & ~63) == (end & ~63)) {
+		// Start and end in same 64 bits
+		x = (x & BIT_RIGHT_MASK(start_bit)) | BIT_RIGHT_MASK(end_bit);
+
+		return clzll(x) - start_bit;
+	}
+
+	// Deal with initial unaligned bit
+	x &= BIT_RIGHT_MASK(start_bit);
+
+	if (x)
+		return clzll(x) - start_bit;
+
+	// Go fast
+	++p;
+	int count = 64 - start_bit;
+	int nquads = (end - end_bit - start - 1) / 64;
+
+	while (nquads--) {
+		if (*p) {
+			x = OSSwapBigToHostInt64(*p);
+			return count + clzll(x);
+		}
+		++p;
+		count += 64;
+	}
+
+	if (end_bit) {
+		x = OSSwapBigToHostInt64(*p) | BIT_RIGHT_MASK(end_bit);
+
+		count += clzll(x);
+	}
+
+	return count;
+}
+
+#if !HFS_ALLOC_TEST
+static errno_t update_summary_table(bitmap_context_t *bitmap_ctx, uint32_t start, uint32_t count, bool set)
+{
+	uint32_t	end, start_summary_bit, end_summary_bit;
+	errno_t		error = 0;
+
+	if (count == 0)
+		goto out;
+
+	if (!ISSET(bitmap_ctx->hfsmp->hfs_flags, HFS_SUMMARY_TABLE))
+		return 0;
+
+	if (hfs_get_summary_index (bitmap_ctx->hfsmp, start, &start_summary_bit)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	end = start + count - 1;
+	if (hfs_get_summary_index (bitmap_ctx->hfsmp, end, &end_summary_bit)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	// if summary table bit has been updated with free block previously, leave it.
+	if ((start_summary_bit == bitmap_ctx->last_free_summary_bit) && set)
+		start_summary_bit++;
+
+	for (uint32_t summary_bit = start_summary_bit; summary_bit <= end_summary_bit; summary_bit++)
+		hfs_set_summary (bitmap_ctx->hfsmp, summary_bit, set);
+
+	if (!set)
+		bitmap_ctx->last_free_summary_bit = end_summary_bit;
+
+out:
+	return error;
+
+}
+#endif //!HFS_ALLOC_TEST
+
+/*
+ * Read in chunks of the bitmap into memory, and find a run of cleared/set bits;
+ * the run can extend across chunk boundaries.
+ * bit_count_clr can be passed to get a run of cleared bits.
+ * bit_count_set can be passed to get a run of set bits.
+ */
+static errno_t hfs_bit_count(bitmap_context_t *bitmap_ctx, int (*fn)(void *, int ,int), uint32_t *bit_count)
+{
+	int count;
+	errno_t error = 0;
+
+	*bit_count = 0;
+
+	do {
+		if (bitmap_ctx->run_offset == 0 || bitmap_ctx->chunk_current == bitmap_ctx->chunk_end) {
+			if ((error = get_more_bits(bitmap_ctx)) != 0)
+				goto out;
+		}
+
+		if (bitmap_ctx->chunk_end == 0)
+			break;
+
+		count = fn(bitmap_ctx->bitmap, bitmap_ctx->chunk_current, bitmap_ctx->chunk_end);
+
+		bitmap_ctx->run_offset += count;
+		bitmap_ctx->chunk_current += count;
+		*bit_count += count;
+
+	} while (bitmap_ctx->chunk_current >= bitmap_ctx->chunk_end && count);
+
+out:
+	return error;
+
+}
+
+// Returns count of number of bits clear
+static errno_t hfs_bit_count_clr(bitmap_context_t *bitmap_ctx, uint32_t *count)
+{
+	return hfs_bit_count(bitmap_ctx, bit_count_clr, count);
+}
+
+// Returns count of number of bits set
+static errno_t hfs_bit_count_set(bitmap_context_t *bitmap_ctx, uint32_t *count)
+{
+	return hfs_bit_count(bitmap_ctx, bit_count_set, count);
+}
+
+static uint32_t hfs_bit_offset(bitmap_context_t *bitmap_ctx)
+{
+	return bitmap_ctx->run_offset;
+}
+
+/*
+ * Perform a full scan of the bitmap file.
+ * Note: during the scan of bitmap file, it may drop and reacquire the
+ * bitmap lock to let someone else use the bitmap for fairness.
+ * Currently it is used by HFS_GET_FSINFO statistic gathing, which
+ * is run while other processes might perform HFS operations.
+ */
+
+errno_t hfs_find_free_extents(struct hfsmount *hfsmp,
+							  void (*callback)(void *data, off_t free_extent_size), void *callback_arg)
+{
+	struct bitmap_context bitmap_ctx;
+	uint32_t count;
+	errno_t error = 0;
+
+	if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+		error = hfs_init_summary(hfsmp);
+		if (error)
+			return error;
+	}
+
+	bzero(&bitmap_ctx, sizeof(struct bitmap_context));
+
+	/*
+	 * The journal maintains list of recently deallocated blocks to
+	 * issue DKIOCUNMAPs when the corresponding journal transaction is
+	 * flushed to the disk.  To avoid any race conditions, we only
+	 * want one active trim list.  Therefore we make sure that the
+	 * journal trim list is sync'ed, empty, and not modifiable for
+	 * the duration of our scan.
+	 *
+	 * Take the journal lock before flushing the journal to the disk.
+	 * We will keep on holding the journal lock till we don't get the
+	 * bitmap lock to make sure that no new journal transactions can
+	 * start.  This will make sure that the journal trim list is not
+	 * modified after the journal flush and before getting bitmap lock.
+	 * We can release the journal lock after we acquire the bitmap
+	 * lock as it will prevent any further block deallocations.
+	 */
+	hfs_journal_lock(hfsmp);
+
+	/* Flush the journal and wait for all I/Os to finish up */
+	error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+	if (error) {
+		hfs_journal_unlock(hfsmp);
+		return error;
+	}
+
+	/*
+	 * Take bitmap lock to ensure it is not being modified.
+	 * Since we are reading larger than normal blocks from the bitmap, which
+	 * might confuse other parts of the bitmap code using normal blocks, we
+	 * take exclusive lock here.
+	 */
+	bitmap_ctx.lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+#if !HFS_ALLOC_TEST
+	bitmap_ctx.lock_start = mach_absolute_time();
+#endif
+
+	/* Release the journal lock */
+	hfs_journal_unlock(hfsmp);
+
+	/*
+	 * Bitmap is read in large block size (up to 1MB),
+	 * unlike the runtime which reads the bitmap in the
+	 * 4K block size.  If the bitmap is read by both ways
+	 * at the same time, it can result in multiple buf_t with
+	 * different sizes and potentially case data corruption.
+	 * To avoid this, we invalidate all the existing buffers
+	 * associated with the bitmap vnode.
+	 */
+	error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+	if (error)
+		goto out;
+
+	/*
+	 * Get the list of all free extent ranges.  hfs_alloc_scan_range()
+	 * will call hfs_fsinfo_data_add() to account for all the free
+	 * extent ranges found during scan.
+	 */
+	bitmap_ctx.hfsmp = hfsmp;
+	bitmap_ctx.run_offset = 0;
+
+	while (bitmap_ctx.run_offset < hfsmp->totalBlocks) {
+
+		uint32_t start = hfs_bit_offset(&bitmap_ctx);
+
+		if ((error = hfs_bit_count_clr(&bitmap_ctx, &count)) != 0)
+			goto out;
+
+		if (count)
+			callback(callback_arg, hfs_blk_to_bytes(count, hfsmp->blockSize));
+
+		if ((error = update_summary_table(&bitmap_ctx, start, count, false)) != 0)
+			goto out;
+
+		start = hfs_bit_offset(&bitmap_ctx);
+
+		if ((error = hfs_bit_count_set(&bitmap_ctx, &count)) != 0)
+			goto out;
+
+		if ((error = update_summary_table(&bitmap_ctx, start, count, true)) != 0)
+			goto out;
+	}
+
+out:
+	if (bitmap_ctx.lockflags) {
+		hfs_systemfile_unlock(hfsmp, bitmap_ctx.lockflags);
+	}
+
+	return error;
+}
+
diff --git a/core/hfs.h b/core/hfs.h
new file mode 100644
index 0000000..786199c
--- /dev/null
+++ b/core/hfs.h
@@ -0,0 +1,1171 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __HFS__
+#define __HFS__
+
+/* If set to 1, enables the code to allocate blocks from the start 
+ * of the disk instead of the nextAllocation for sparse devices like 
+ * sparse disk images or sparsebundle images.  The free extent cache 
+ * for such volumes is also maintained based on the start block instead 
+ * of number of contiguous allocation blocks.  These devices prefer 
+ * allocation of blocks near the start of the disk to avoid the 
+ * increasing the image size, but it can also result in file fragmentation. 
+ */
+#define HFS_SPARSE_DEV 1
+
+#if DEBUG
+#define HFS_CHECK_LOCK_ORDER 1
+#endif
+
+#define HFS_TMPDBG 0
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/quota.h>
+#include <sys/dirent.h>
+#include <sys/event.h>
+#include <sys/disk.h>
+#include <kern/thread_call.h>
+#include <kern/locks.h>
+#include <vm/vm_kern.h>
+#include <sys/sysctl.h>
+#include <uuid/uuid.h>
+
+#include "../hfs_encodings/hfs_encodings.h"
+
+#include "hfs_journal.h"
+#include "hfs_format.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+#include "hfs_macos_defs.h"
+#include "hfs_hotfiles.h"
+#include "hfs_fsctl.h"
+
+__BEGIN_DECLS
+
+#if CONFIG_PROTECT
+/* Forward declare the cprotect struct */
+struct cprotect;
+#endif
+
+/*
+ *	Just reported via MIG interface.
+ */
+#define VERSION_STRING	"hfs-2 (4-12-99)"
+
+#define HFS_LINK_MAX	32767
+
+#define HFS_MAX_DEFERED_ALLOC	(1024*1024)
+
+#define HFS_MAX_FILES	(UINT32_MAX - kHFSFirstUserCatalogNodeID)
+
+// 400 megs is a "big" file (i.e. one that when deleted
+// would touch enough data that we should break it into
+// multiple separate transactions)
+#define HFS_BIGFILE_SIZE (400LL * 1024LL * 1024LL)
+
+enum { kMDBSize = 512 };				/* Size of I/O transfer to read entire MDB */
+
+enum { kMasterDirectoryBlock = 2 };			/* MDB offset on disk in 512-byte blocks */
+enum { kMDBOffset = kMasterDirectoryBlock * 512 };	/* MDB offset on disk in bytes */
+
+#define kRootDirID kHFSRootFolderID
+
+
+/* number of locked buffer caches to hold for b-tree meta data */
+#define kMaxLockedMetaBuffers		32		
+
+extern struct timezone gTimeZone;
+
+
+/* How many free extents to cache per volume */
+#define kMaxFreeExtents		10
+
+/* Maximum file size that we're willing to defrag on open */
+#define HFS_MAX_DEFRAG_SIZE 104857600   // 100 * 1024 * 1024 (100MB)  
+#define HFS_INITIAL_DEFRAG_SIZE 20971520 // 20 * 1024 * 1024 (20MB)
+
+
+/* The maximum time hfs locks can be held while performing hfs statistics gathering */
+#define HFS_FSINFO_MAX_LOCKHELD_TIME	20 * 1000000ULL	/* at most 20 milliseconds. */
+
+/*
+ * HFS_MINFREE gives the minimum acceptable percentage
+ * of file system blocks which may be free (but this
+ * minimum will never exceed HFS_MAXRESERVE bytes). If
+ * the free block count drops below this level only the
+ * superuser may continue to allocate blocks.
+ */
+#define HFS_MINFREE		1
+#define HFS_MAXRESERVE		((u_int64_t)(250*1024*1024))
+#define HFS_BT_MAXRESERVE	((u_int64_t)(10*1024*1024))
+
+/*
+ * The system distinguishes between the desirable low-disk
+ * notifiaction levels for root volumes and non-root volumes.
+ * The various thresholds are computed as a fraction of the
+ * volume size, all capped at a certain fixed level
+ */
+ 
+#define HFS_ROOTVERYLOWDISKTRIGGERFRACTION 5
+#define HFS_ROOTVERYLOWDISKTRIGGERLEVEL ((u_int64_t)(512*1024*1024))
+#define HFS_ROOTLOWDISKTRIGGERFRACTION 10
+#define HFS_ROOTLOWDISKTRIGGERLEVEL ((u_int64_t)(1024*1024*1024))
+#define HFS_ROOTNEARLOWDISKTRIGGERFRACTION 10.5
+#define HFS_ROOTNEARLOWDISKTRIGGERLEVEL ((u_int64_t)(1024*1024*1024 + 100*1024*1024))
+#define HFS_ROOTLOWDISKSHUTOFFFRACTION 11
+#define HFS_ROOTLOWDISKSHUTOFFLEVEL ((u_int64_t)(1024*1024*1024 + 250*1024*1024))
+
+#define HFS_VERYLOWDISKTRIGGERFRACTION 1
+#define HFS_VERYLOWDISKTRIGGERLEVEL ((u_int64_t)(150*1024*1024))
+#define HFS_LOWDISKTRIGGERFRACTION 2
+#define HFS_LOWDISKTRIGGERLEVEL ((u_int64_t)(500*1024*1024))
+#define HFS_NEARLOWDISKTRIGGERFRACTION 10
+#define HFS_NEARLOWDISKTRIGGERLEVEL ((uint64_t)(1024*1024*1024))
+#define HFS_LOWDISKSHUTOFFFRACTION 12
+#define HFS_LOWDISKSHUTOFFLEVEL ((u_int64_t)(1024*1024*1024 + 200*1024*1024))
+
+/* Internal Data structures*/
+
+/* This structure describes the HFS specific mount structure data. */
+typedef struct hfsmount {
+	u_int32_t     hfs_flags;              /* see below */
+
+	/* Physical Description */
+	u_int32_t     hfs_logical_block_size;	/* Logical block size of the disk as reported by ioctl(DKIOCGETBLOCKSIZE), always a multiple of 512 */
+	daddr64_t     hfs_logical_block_count;  /* Number of logical blocks on the disk, as reported by ioctl(DKIOCGETBLOCKCOUNT) */
+	u_int64_t     hfs_logical_bytes;	/* Number of bytes on the disk device this HFS is mounted on (blockcount * blocksize) */
+	/*
+	 * Regarding the two AVH sector fields below: 
+	 * Under normal circumstances, the filesystem's notion of the "right" location for the AVH is such that
+	 * the partition and filesystem's are in sync.  However, during a filesystem resize, HFS proactively
+	 * writes a new AVH at the end of the filesystem, assuming that the partition will be resized accordingly.
+	 *
+	 * However, it is not technically a corruption if the partition size is never modified.  As a result, we need
+	 * to keep two copies of the AVH around "just in case" the partition size is not modified.
+	 */
+	daddr64_t	hfs_partition_avh_sector;	/* location of Alt VH w.r.t partition size */
+	daddr64_t	hfs_fs_avh_sector;	/* location of Alt VH w.r.t filesystem size */
+
+	u_int32_t     hfs_physical_block_size;	/* Physical block size of the disk as reported by ioctl(DKIOCGETPHYSICALBLOCKSIZE) */ 
+	u_int32_t     hfs_log_per_phys;		/* Number of logical blocks per physical block size */
+
+	/* Access to VFS and devices */
+	struct mount		*hfs_mp;				/* filesystem vfs structure */
+	struct vnode		*hfs_devvp;				/* block device mounted vnode */
+	struct vnode *		hfs_extents_vp;
+	struct vnode *		hfs_catalog_vp;
+	struct vnode *		hfs_allocation_vp;
+	struct vnode *		hfs_attribute_vp;
+	struct vnode *		hfs_startup_vp;
+	struct vnode *		hfs_attrdata_vp;   /* pseudo file */
+	struct cnode *		hfs_extents_cp;
+	struct cnode *		hfs_catalog_cp;
+	struct cnode *		hfs_allocation_cp;
+	struct cnode *		hfs_attribute_cp;
+	struct cnode *		hfs_startup_cp;
+	dev_t			hfs_raw_dev;			/* device mounted */
+	u_int32_t		hfs_logBlockSize;		/* Size of buffer cache buffer for I/O */
+	
+	/* Default values for HFS standard and non-init access */
+	uid_t         hfs_uid;            /* uid to set as owner of the files */
+	gid_t         hfs_gid;            /* gid to set as owner of the files */
+	mode_t        hfs_dir_mask;       /* mask to and with directory protection bits */
+	mode_t        hfs_file_mask;      /* mask to and with file protection bits */
+	u_int32_t        hfs_encoding;       /* Default encoding for non hfs+ volumes */	
+
+	/* Persistent fields (on disk, dynamic) */
+	time_t        hfs_mtime;          /* file system last modification time */
+	u_int32_t     hfs_filecount;      /* number of files in file system */
+	u_int32_t     hfs_dircount;       /* number of directories in file system */
+	u_int32_t     freeBlocks;	  	  /* free allocation blocks */
+	u_int32_t	  reclaimBlocks;	  /* number of blocks we are reclaiming during resize */
+	u_int32_t	  tentativeBlocks;	  /* tentative allocation blocks -- see note below */
+	u_int32_t     nextAllocation;	  /* start of next allocation search */
+	u_int32_t     sparseAllocation;   /* start of allocations for sparse devices */
+	u_int32_t     vcbNxtCNID;         /* next unused catalog node ID - protected by catalog lock */
+	u_int32_t     vcbWrCnt;           /* file system write count */
+	u_int64_t     encodingsBitmap;    /* in-use encodings */
+	u_int16_t     vcbNmFls;           /* HFS Only - root dir file count */
+	u_int16_t     vcbNmRtDirs;        /* HFS Only - root dir directory count */
+
+	/* Persistent fields (on disk, static) */
+	u_int16_t 			vcbSigWord;
+
+	// Volume will be inconsistent if header is not flushed
+	bool				hfs_header_dirty;
+
+	// Volume header is dirty, but won't be inconsistent if not flushed
+	bool				hfs_header_minor_change;
+
+	u_int32_t 			vcbAtrb;
+	u_int32_t 			vcbJinfoBlock;
+	u_int32_t 			localCreateDate;/* volume create time from volume header (For HFS+, value is in local time) */
+	time_t				hfs_itime;	/* file system creation time (creation date of the root folder) */
+	time_t				hfs_btime;	/* file system last backup time */
+	u_int32_t 			blockSize;	/* size of allocation blocks */
+	u_int32_t 			totalBlocks;	/* total allocation blocks */
+	u_int32_t			allocLimit;	/* Do not allocate this block or beyond */
+	/*
+	 * NOTE: When resizing a volume to make it smaller, allocLimit is set to the allocation
+	 * block number which will contain the new alternate volume header.  At all other times,
+	 * allocLimit is set to totalBlocks.  The allocation code uses allocLimit instead of
+	 * totalBlocks to limit which blocks may be allocated, so that during a resize, we don't
+	 * put new content into the blocks we're trying to truncate away.
+	 */
+	int32_t 			vcbClpSiz;
+	u_int32_t     vcbFndrInfo[8];
+	int16_t 			vcbVBMSt;		/* HFS only */
+	int16_t 			vcbAlBlSt;		/* HFS only */
+
+	/* vcb stuff */
+	u_int8_t		 	vcbVN[256];		/* volume name in UTF-8 */
+	u_int32_t	 		volumeNameEncodingHint;
+	u_int32_t 			hfsPlusIOPosOffset;	/* Disk block where HFS+ starts */
+	u_int32_t 			vcbVBMIOSize;		/* volume bitmap I/O size */
+	
+	/* cache of largest known free extents */
+	u_int32_t			vcbFreeExtCnt;
+	HFSPlusExtentDescriptor vcbFreeExt[kMaxFreeExtents];
+	lck_spin_t			vcbFreeExtLock;
+	
+	/* Summary Table */
+	u_int8_t			*hfs_summary_table; /* Each bit is 1 vcbVBMIOSize of bitmap, byte indexed */
+	u_int32_t			hfs_summary_size;	/* number of BITS in summary table defined above (not bytes!) */
+	u_int32_t			hfs_summary_bytes;	/* number of BYTES in summary table */
+	
+	u_int32_t 			scan_var;			/* For initializing the summary table */
+
+
+	u_int32_t		reserveBlocks;		/* free block reserve */
+	u_int32_t		loanedBlocks;		/* blocks on loan for delayed allocations */
+	u_int32_t		lockedBlocks;		/* blocks reserved and locked */
+
+	/*
+	 * HFS+ Private system directories (two). Any access
+	 * (besides looking at the cd_cnid) requires holding
+	 * the Catalog File lock.
+	 */
+	struct cat_desc     hfs_private_desc[2];
+	struct cat_attr     hfs_private_attr[2];
+
+	u_int32_t		hfs_metadata_createdate;
+#if CONFIG_HFS_STD
+	hfs_to_unicode_func_t	hfs_get_unicode;
+	unicode_to_hfs_func_t	hfs_get_hfsname;
+#endif
+ 
+	/* Quota variables: */
+	struct quotafile	hfs_qfiles[MAXQUOTAS];    /* quota files */
+
+	/* Journaling variables: */
+	struct journal      *jnl;           // the journal for this volume (if one exists)
+	struct vnode        *jvp;           // device where the journal lives (may be equal to devvp)
+	u_int32_t            jnl_start;     // start block of the journal file (so we don't delete it)
+	u_int32_t            jnl_size;
+	u_int32_t            hfs_jnlfileid;
+	u_int32_t            hfs_jnlinfoblkid;
+	lck_rw_t	     	 hfs_global_lock;
+	thread_t			 hfs_global_lockowner;
+	u_int32_t            hfs_transaction_nesting;
+
+	/* 
+	 * Notification variables
+	 * See comments in hfs mount code for what the 
+	 * default levels are set to.
+	 */
+	u_int32_t		hfs_notification_conditions;
+	u_int32_t		hfs_freespace_notify_dangerlimit;
+	u_int32_t		hfs_freespace_notify_warninglimit;
+	u_int32_t		hfs_freespace_notify_nearwarninglimit;
+	u_int32_t		hfs_freespace_notify_desiredlevel;
+
+	/* time mounted and last mounted mod time "snapshot" */
+	time_t		hfs_mount_time;
+	time_t 		hfs_last_mounted_mtime;
+
+	/* Metadata allocation zone variables: */
+	u_int32_t	hfs_metazone_start;
+	u_int32_t	hfs_metazone_end;
+	u_int32_t	hfs_hotfile_start;
+	u_int32_t	hfs_hotfile_end;
+        u_int32_t       hfs_min_alloc_start;
+	u_int32_t       hfs_freed_block_count;
+	u_int64_t       hfs_cs_hotfile_size;     // in bytes
+	int		hfs_hotfile_freeblks;
+	int             hfs_hotfile_blk_adjust; // since we pass this to OSAddAtomic, this needs to be 4-byte aligned
+	int		hfs_hotfile_maxblks;
+	int		hfs_overflow_maxblks;
+	int		hfs_catalog_maxblks;
+
+	/* Hot File Clustering variables: */
+	lck_mtx_t       hfc_mutex;      /* serialize hot file stages */
+	enum hfc_stage  hfc_stage;      /* what are we up to... */
+	time_t		hfc_timebase;   /* recording period start time */
+	time_t		hfc_timeout;    /* recording period stop time */
+	struct hotfile_data *hfc_recdata;
+	struct hotfilelist *hfc_filelist;
+	uint32_t	hfc_maxfiles;   /* maximum files to track */
+	struct vnode *  hfc_filevp;
+
+	/* defrag-on-open variables */
+	int		hfs_defrag_nowait;  //issue defrags now, regardless of whether or not we've gone past 3 min.
+	uint64_t		hfs_defrag_max;	//maximum file size we'll defragment on this mount
+
+#if HFS_SPARSE_DEV
+	/* Sparse device variables: */
+	struct vnode * hfs_backingvp;
+	u_int32_t      hfs_last_backingstatfs;
+	u_int32_t      hfs_sparsebandblks;
+	u_int64_t      hfs_backingfs_maxblocks;
+#endif
+	size_t         hfs_max_inline_attrsize;
+
+	lck_mtx_t      hfs_mutex;      /* protects access to hfsmount data */
+
+	uint32_t       hfs_syncers;	// Count of the number of syncers running
+	enum {
+		HFS_THAWED,
+		HFS_WANT_TO_FREEZE,	// This state stops hfs_sync from starting
+		HFS_FREEZING,		// We're in this state whilst we're flushing
+		HFS_FROZEN			// Everything gets blocked in hfs_lock_global
+	} hfs_freeze_state;
+	union {
+		/*
+		 * When we're freezing (HFS_FREEZING) but not yet
+		 * frozen (HFS_FROZEN), we record the freezing thread
+		 * so that we stop other threads from taking locks,
+		 * but allow the freezing thread. 
+		 */
+		const struct thread *hfs_freezing_thread;
+		/*
+		 * Once we have frozen (HFS_FROZEN), we record the
+		 * process so that if it dies, we can automatically
+		 * unfreeze. 
+		 */
+		proc_t hfs_freezing_proc;
+	};
+
+	thread_t		hfs_downgrading_thread; /* thread who's downgrading to rdonly */
+
+	/* Resize variables: */
+	u_int32_t		hfs_resize_blocksmoved;
+	u_int32_t		hfs_resize_totalblocks;
+	u_int32_t		hfs_resize_progress;
+#if CONFIG_PROTECT
+	/* Data Protection fields */
+	cpx_t			hfs_resize_cpx;
+	u_int16_t		hfs_running_cp_major_vers;
+	uint32_t		default_cp_class; /* default effective class value */
+	uint64_t		cproot_flags;
+	uint8_t			cp_crypto_generation; 
+	cp_lock_state_t hfs_cp_lock_state;  /* per-mount device lock state info */
+#if HFS_CONFIG_KEY_ROLL
+	uint32_t		hfs_auto_roll_min_key_os_version;
+	uint32_t		hfs_auto_roll_max_key_os_version;
+#endif
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+	boolean_t		hfs_cp_verbose;
+#endif
+#endif
+
+#endif
+
+	/* the full UUID of the volume, not the one stored in finderinfo */
+	uuid_t		 hfs_full_uuid;
+
+	/* Per mount cnode hash variables: */
+	lck_mtx_t      hfs_chash_mutex;	/* protects access to cnode hash table */
+	u_long         hfs_cnodehash;	/* size of cnode hash table - 1 */
+	LIST_HEAD(cnodehashhead, cnode) *hfs_cnodehashtbl;	/* base of cnode hash */
+					
+	/* Per mount fileid hash variables  (protected by catalog lock!) */
+	u_long hfs_idhash; /* size of cnid/fileid hash table -1 */
+	LIST_HEAD(idhashhead, cat_preflightid) *hfs_idhashtbl; /* base of ID hash */
+
+    // Records the oldest outstanding sync request
+    struct timeval	hfs_sync_req_oldest;
+
+    /* Records the syncer thread so that we can avoid the syncer
+       queing more syncs. */
+    thread_t		hfs_syncer_thread;
+
+	// Not currently used except for debugging purposes
+	// Since we pass this to OSAddAtomic, this needs to be 4-byte aligned.
+	uint32_t        hfs_active_threads;
+
+	enum {
+		// These are indices into the array below
+
+		// Tentative ranges can be claimed back at any time
+		HFS_TENTATIVE_BLOCKS	= 0,
+
+		// Locked ranges cannot be claimed back, but the allocation
+		// won't have been written to disk yet
+		HFS_LOCKED_BLOCKS		= 1,
+	};
+	// These lists are not sorted like a range list usually is
+	struct rl_head hfs_reserved_ranges[2];
+} hfsmount_t;
+
+/*
+ * HFS_META_DELAY is a duration (in usecs) used for triggering the 
+ * hfs_syncer() routine. We will back off if writes are in 
+ * progress, but...
+ * HFS_MAX_META_DELAY is the maximum time we will allow the
+ * syncer to be delayed.
+ */
+enum {
+    HFS_META_DELAY     = 100  * 1000,	// 0.1 secs
+    HFS_MAX_META_DELAY = 5000 * 1000	// 5 secs
+};
+
+#define HFS_META_DELAY_TS \
+	(struct timespec){ 0, HFS_META_DELAY * NSEC_PER_USEC }
+
+typedef hfsmount_t  ExtendedVCB;
+
+/* Aliases for legacy (Mac OS 9) field names */
+#define vcbLsMod           hfs_mtime
+#define vcbVolBkUp         hfs_btime
+#define extentsRefNum      hfs_extents_vp
+#define catalogRefNum      hfs_catalog_vp
+#define allocationsRefNum  hfs_allocation_vp
+#define vcbFilCnt          hfs_filecount
+#define vcbDirCnt          hfs_dircount
+
+static inline void MarkVCBDirty(hfsmount_t *hfsmp)
+{ 
+	hfsmp->hfs_header_dirty = true;
+}
+
+static inline void MarkVCBClean(hfsmount_t *hfsmp)
+{
+	hfsmp->hfs_header_dirty = false;
+	hfsmp->hfs_header_minor_change = false;
+}
+
+static inline bool IsVCBDirty(ExtendedVCB *vcb)
+{
+	return vcb->hfs_header_minor_change || vcb->hfs_header_dirty;
+}
+
+// Header is changed but won't be inconsistent if we don't write it
+static inline void hfs_note_header_minor_change(hfsmount_t *hfsmp)
+{
+	hfsmp->hfs_header_minor_change = true;
+}
+
+// Must header be flushed for volume to be consistent?
+static inline bool hfs_header_needs_flushing(hfsmount_t *hfsmp)
+{
+	return (hfsmp->hfs_header_dirty
+			|| ISSET(hfsmp->hfs_catalog_cp->c_flag, C_MODIFIED)
+			|| ISSET(hfsmp->hfs_extents_cp->c_flag, C_MODIFIED)
+			|| (hfsmp->hfs_attribute_cp
+				&& ISSET(hfsmp->hfs_attribute_cp->c_flag, C_MODIFIED))
+			|| (hfsmp->hfs_allocation_cp
+				&& ISSET(hfsmp->hfs_allocation_cp->c_flag, C_MODIFIED))
+			|| (hfsmp->hfs_startup_cp
+				&& ISSET(hfsmp->hfs_startup_cp->c_flag, C_MODIFIED)));
+}
+
+/*
+ * There are two private directories in HFS+.
+ *
+ * One contains inodes for files that are hardlinked or open/unlinked.
+ * The other contains inodes for directories that are hardlinked.
+ */
+enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
+
+#define HFS_ALLOCATOR_SCAN_INFLIGHT	0x0001  	/* scan started */
+#define HFS_ALLOCATOR_SCAN_COMPLETED 0x0002		/* initial scan was completed */
+
+/* HFS mount point flags */
+#define HFS_READ_ONLY             0x00001
+#define HFS_UNKNOWN_PERMS         0x00002
+#define HFS_WRITEABLE_MEDIA       0x00004
+#define HFS_CLEANED_ORPHANS       0x00008
+#define HFS_X                     0x00010
+#define HFS_CASE_SENSITIVE        0x00020
+#define HFS_STANDARD              0x00040
+#define HFS_METADATA_ZONE         0x00080
+#define HFS_FRAGMENTED_FREESPACE  0x00100
+#define HFS_NEED_JNL_RESET        0x00200
+#define HFS_HAS_SPARSE_DEVICE     0x00400
+#define HFS_RESIZE_IN_PROGRESS    0x00800
+#define HFS_QUOTAS                0x01000
+#define HFS_CREATING_BTREE        0x02000
+/* When set, do not update nextAllocation in the mount structure */
+#define HFS_SKIP_UPDATE_NEXT_ALLOCATION 0x04000	
+/* When set, the file system supports extent-based extended attributes */
+#define HFS_XATTR_EXTENTS         0x08000	
+#define	HFS_FOLDERCOUNT           0x10000
+/* When set, the file system exists on a virtual device, like disk image */
+#define HFS_VIRTUAL_DEVICE        0x20000
+/* When set, we're in hfs_changefs, so hfs_sync should do nothing. */
+#define HFS_IN_CHANGEFS           0x40000
+/* When set, we are in process of downgrading or have downgraded to read-only, 
+ * so hfs_start_transaction should return EROFS.
+ */
+#define HFS_RDONLY_DOWNGRADE      0x80000
+#define HFS_DID_CONTIG_SCAN      0x100000
+#define HFS_UNMAP                0x200000
+#define HFS_SSD                  0x400000
+#define HFS_SUMMARY_TABLE        0x800000
+#define HFS_CS                  0x1000000
+#define HFS_CS_METADATA_PIN     0x2000000
+#define HFS_CS_HOTFILE_PIN      0x4000000	/* cooperative fusion (enables a hotfile variant) */
+#define HFS_FEATURE_BARRIER     0x8000000	/* device supports barrier-only flush */
+#define HFS_CS_SWAPFILE_PIN    0x10000000
+#define HFS_RUN_SYNCER		   0x20000000
+
+/* Macro to update next allocation block in the HFS mount structure.  If 
+ * the HFS_SKIP_UPDATE_NEXT_ALLOCATION is set, do not update 
+ * nextAllocation block.
+ */
+#define HFS_UPDATE_NEXT_ALLOCATION(hfsmp, new_nextAllocation)			\
+	{								\
+		if ((hfsmp->hfs_flags & HFS_SKIP_UPDATE_NEXT_ALLOCATION) == 0)\
+			hfsmp->nextAllocation = new_nextAllocation;	\
+	}								\
+
+/* Macro for incrementing and decrementing the folder count in a cnode 
+ * attribute only if the HFS_FOLDERCOUNT bit is set in the mount flags 
+ * and kHFSHasFolderCount bit is set in the cnode flags.  Currently these 
+ * bits are only set for case sensitive HFS+ volumes.
+ */
+#define INC_FOLDERCOUNT(hfsmp, cattr) 				\
+	if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&		\
+	    (cattr.ca_recflags & kHFSHasFolderCountMask)) { 	\
+		cattr.ca_dircount++;				\
+	}							\
+
+#define DEC_FOLDERCOUNT(hfsmp, cattr) 				\
+	if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&		\
+	    (cattr.ca_recflags & kHFSHasFolderCountMask) && 	\
+	    (cattr.ca_dircount > 0)) { 				\
+		cattr.ca_dircount--;				\
+	}							\
+
+typedef struct filefork FCB;
+
+/*
+ * Macros for creating item names for our special/private directories.
+ */
+#define MAKE_INODE_NAME(name, size, linkno) \
+	    (void) snprintf((name), size, "%s%d", HFS_INODE_PREFIX, (linkno))
+#define HFS_INODE_PREFIX_LEN	5
+
+#define MAKE_DIRINODE_NAME(name, size, linkno) \
+	    (void) snprintf((name), size, "%s%d", HFS_DIRINODE_PREFIX, (linkno))
+#define HFS_DIRINODE_PREFIX_LEN   4
+
+#define MAKE_DELETED_NAME(NAME, size, FID) \
+	    (void) snprintf((NAME), size, "%s%d", HFS_DELETE_PREFIX, (FID))
+#define HFS_DELETE_PREFIX_LEN	4
+
+
+#define HFS_AVERAGE_NAME_SIZE	22
+#define AVERAGE_HFSDIRENTRY_SIZE  (8+HFS_AVERAGE_NAME_SIZE+4)
+
+#define STD_DIRENT_LEN(namlen) \
+	((sizeof(struct dirent) - (NAME_MAX+1)) + (((namlen)+1 + 3) &~ 3))
+
+#define EXT_DIRENT_LEN(namlen) \
+	((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
+
+
+enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 };
+
+
+/* macro to determine if hfs or hfsplus */
+#define ISHFSPLUS(VCB) ((VCB)->vcbSigWord == kHFSPlusSigWord)
+#define ISHFS(VCB) ((VCB)->vcbSigWord == kHFSSigWord)
+
+
+/*
+ * Various ways to acquire a VFS mount point pointer:
+ */
+#define VTOVFS(VP)  vnode_mount((VP))
+#define HFSTOVFS(HFSMP) ((HFSMP)->hfs_mp)
+#define VCBTOVFS(VCB)   HFSTOVFS(VCB)
+
+/*
+ * Various ways to acquire an HFS mount point pointer:
+ */
+#define VTOHFS(VP)  ((struct hfsmount *)vfs_fsprivate(vnode_mount((VP))))
+#define	VFSTOHFS(MP)  ((struct hfsmount *)vfs_fsprivate((MP)))
+#define VCBTOHFS(VCB) (VCB)
+#define FCBTOHFS(FCB)  ((struct hfsmount *)vfs_fsprivate(vnode_mount((FCB)->ff_cp->c_vp)))
+
+/*
+ * Various ways to acquire a VCB (legacy) pointer:
+ */
+#define VTOVCB(VP)       VTOHFS(VP)
+#define VFSTOVCB(MP)     VFSTOHFS(MP)
+#define HFSTOVCB(HFSMP)  (HFSMP)
+#define FCBTOVCB(FCB)    FCBTOHFS(FCB)
+
+
+#define E_NONE	0
+#define kHFSBlockSize 512
+
+/*
+ * Macros for getting the MDB/VH sector and offset
+ */
+#define HFS_PRI_SECTOR(blksize)          (1024 / (blksize))
+#define HFS_PRI_OFFSET(blksize)          ((blksize) > 1024 ? 1024 : 0)
+
+#define HFS_ALT_SECTOR(blksize, blkcnt)  (((blkcnt) - 1) - (512 / (blksize)))
+#define HFS_ALT_OFFSET(blksize)          ((blksize) > 1024 ? (blksize) - 1024 : 0)
+
+/* Convert the logical sector number to be aligned on physical block size boundary.  
+ * We are assuming the partition is a multiple of physical block size.
+ */
+#define HFS_PHYSBLK_ROUNDDOWN(sector_num, log_per_phys)	((sector_num / log_per_phys) * log_per_phys)
+
+/*
+ * HFS specific fcntl()'s
+ */
+#define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
+#define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
+/* See HFSIOC_EXT_BULKACCESS and friends for HFS specific fsctls*/
+
+
+
+/*
+ *	This is the straight GMT conversion constant:
+ *	00:00:00 January 1, 1970 - 00:00:00 January 1, 1904
+ *	(3600 * 24 * ((365 * (1970 - 1904)) + (((1970 - 1904) / 4) + 1)))
+ */
+#define MAC_GMT_FACTOR		2082844800UL
+
+static inline __attribute__((const))
+off_t hfs_blk_to_bytes(uint32_t blk, uint32_t blk_size)
+{
+	return (off_t)blk * blk_size; 		// Avoid the overflow
+}
+
+/*
+ * For now, we use EIO to indicate consistency issues.  It is safe to
+ * return or assign an error value to HFS_EINCONSISTENT but it is
+ * *not* safe to compare against it because EIO can be generated for
+ * other reasons.  We take advantage of the fact that == has
+ * left-to-right associativity and so any uses of:
+ *
+ *    if (error == HFS_EINCONSISTENT)
+ *
+ * will produce a compiler warning: "comparison between pointer and
+ * integer".
+ *
+ * Note that not everwhere is consistent with the use of
+ * HFS_EINCONSISTENT.  Some places return EINVAL, EIO directly or
+ * other error codes.
+ */
+#define HFS_EINCONSISTENT		(void *)0 == (void *)0 ? EIO : EIO
+
+#define HFS_ERESERVEDNAME		-8
+
+extern int (**hfs_specop_p)(void *);
+
+/*****************************************************************************
+	FUNCTION PROTOTYPES 
+******************************************************************************/
+
+/*****************************************************************************
+	hfs_vnop_xxx functions from different files 
+******************************************************************************/
+int hfs_vnop_readdirattr(struct vnop_readdirattr_args *);  /* in hfs_attrlist.c */
+int hfs_vnop_getattrlistbulk(struct vnop_getattrlistbulk_args *);  /* in hfs_attrlist.c */
+
+int hfs_vnop_inactive(struct vnop_inactive_args *);        /* in hfs_cnode.c */
+int hfs_vnop_reclaim(struct vnop_reclaim_args *);          /* in hfs_cnode.c */
+
+int hfs_set_backingstore (struct vnode *vp, int val);				/* in hfs_cnode.c */
+int hfs_is_backingstore (struct vnode *vp, int *val);		/* in hfs_cnode.c */
+
+int hfs_vnop_link(struct vnop_link_args *);                /* in hfs_link.c */
+
+int hfs_vnop_lookup(struct vnop_lookup_args *);            /* in hfs_lookup.c */
+
+int hfs_vnop_search(struct vnop_searchfs_args *);          /* in hfs_search.c */
+
+int hfs_vnop_read(struct vnop_read_args *);           /* in hfs_readwrite.c */
+int hfs_vnop_write(struct vnop_write_args *);         /* in hfs_readwrite.c */
+int hfs_vnop_ioctl(struct vnop_ioctl_args *);         /* in hfs_readwrite.c */
+int hfs_vnop_select(struct vnop_select_args *);       /* in hfs_readwrite.c */
+int hfs_vnop_strategy(struct vnop_strategy_args *);   /* in hfs_readwrite.c */
+int hfs_vnop_allocate(struct vnop_allocate_args *);   /* in hfs_readwrite.c */
+int hfs_vnop_pagein(struct vnop_pagein_args *);       /* in hfs_readwrite.c */
+int hfs_vnop_pageout(struct vnop_pageout_args *);     /* in hfs_readwrite.c */
+int hfs_vnop_bwrite(struct vnop_bwrite_args *);       /* in hfs_readwrite.c */
+int hfs_vnop_blktooff(struct vnop_blktooff_args *);   /* in hfs_readwrite.c */
+int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);   /* in hfs_readwrite.c */
+int hfs_vnop_blockmap(struct vnop_blockmap_args *);   /* in hfs_readwrite.c */
+errno_t hfs_flush_invalid_ranges(vnode_t vp);		  /* in hfs_readwrite.c */
+
+int hfs_vnop_getxattr(struct vnop_getxattr_args *);        /* in hfs_xattr.c */
+int hfs_vnop_setxattr(struct vnop_setxattr_args *);        /* in hfs_xattr.c */
+int hfs_vnop_removexattr(struct vnop_removexattr_args *);  /* in hfs_xattr.c */
+int hfs_vnop_listxattr(struct vnop_listxattr_args *);      /* in hfs_xattr.c */
+#if NAMEDSTREAMS
+extern int hfs_vnop_getnamedstream(struct vnop_getnamedstream_args*);
+extern int hfs_vnop_makenamedstream(struct vnop_makenamedstream_args*);
+extern int hfs_vnop_removenamedstream(struct vnop_removenamedstream_args*);
+#endif
+
+
+/*****************************************************************************
+	Functions from MacOSStubs.c
+******************************************************************************/
+time_t to_bsd_time(u_int32_t hfs_time);
+
+u_int32_t to_hfs_time(time_t bsd_time);
+
+
+/*****************************************************************************
+	Functions from hfs_notifications.c
+******************************************************************************/
+void hfs_generate_volume_notifications(struct hfsmount *hfsmp);
+
+
+/*****************************************************************************
+	Functions from hfs_readwrite.c
+******************************************************************************/
+extern int  hfs_relocate(struct  vnode *, u_int32_t, kauth_cred_t, struct  proc *);
+
+/* flags for hfs_pin_block_range() and hfs_pin_vnode() */
+#define HFS_PIN_IT       0x0001
+#define HFS_UNPIN_IT     0x0002
+#define HFS_TEMP_PIN     0x0004
+#define HFS_EVICT_PIN    0x0008
+#define HFS_DATALESS_PIN 0x0010
+
+//
+// pin/un-pin an explicit range of blocks to the "fast" (usually ssd) device
+//
+int hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks);
+
+//
+// pin/un-pin all the extents belonging to a vnode.
+// also, if it is non-null, "num_blocks_pinned" returns the number of blocks pin/unpinned by the function
+//
+int hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned);
+
+
+int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid, uint8_t forktype, uint32_t *pinned);
+                                     
+
+/* Flags for HFS truncate */
+#define HFS_TRUNCATE_SKIPTIMES  	0x00000002 /* implied by skipupdate; it is a subset */
+											
+
+extern int hfs_truncate(struct vnode *, off_t, int, int, vfs_context_t);
+
+extern int hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 
+								struct filefork *rsrcfork,  u_int32_t fileid);
+
+extern int hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp);
+
+extern int hfs_bmap(struct vnode *, daddr_t, struct vnode **, daddr64_t *, unsigned int *);
+
+extern errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock);
+
+
+/*****************************************************************************
+	Functions from hfs_resize.c 
+******************************************************************************/
+int hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context);
+int hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context);
+
+
+/*****************************************************************************
+	Functions from hfs_vfsops.c
+******************************************************************************/
+
+extern void hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result);
+
+/* used as a callback by the journaling code */
+extern void hfs_sync_metadata(void *arg);
+
+extern int hfs_vget(struct hfsmount *, cnid_t, struct vnode **, int, int);
+
+extern void hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding);
+
+enum volop {VOL_UPDATE, VOL_MKDIR, VOL_RMDIR, VOL_MKFILE, VOL_RMFILE};
+extern int hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot);
+
+enum {
+	HFS_FVH_WAIT					= 0x0001,
+	HFS_FVH_WRITE_ALT				= 0x0002,
+	HFS_FVH_FLUSH_IF_DIRTY			= 0x0004,
+};
+typedef uint32_t hfs_flush_volume_header_options_t;
+int hfs_flushvolumeheader(struct hfsmount *hfsmp, hfs_flush_volume_header_options_t);
+
+extern int  hfs_extendfs(struct hfsmount *, u_int64_t, vfs_context_t);
+extern int  hfs_truncatefs(struct hfsmount *, u_int64_t, vfs_context_t);
+extern int  hfs_resize_progress(struct hfsmount *, u_int32_t *);
+
+/* If a runtime corruption is detected, mark the volume inconsistent 
+ * bit in the volume attributes.
+ */
+
+typedef enum {
+	HFS_INCONSISTENCY_DETECTED,
+
+	// Used when unable to rollback an operation that failed
+	HFS_ROLLBACK_FAILED,
+
+	// Used when the latter part of an operation failed, but we chose not to roll back
+	HFS_OP_INCOMPLETE,
+
+	// Used when someone told us to force an fsck on next mount
+	HFS_FSCK_FORCED,
+} hfs_inconsistency_reason_t;
+
+void hfs_mark_inconsistent(struct hfsmount *hfsmp,
+						   hfs_inconsistency_reason_t reason);
+
+void hfs_scan_blocks (struct hfsmount *hfsmp);
+int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
+
+/*****************************************************************************
+	Functions from hfs_vfsutils.c
+******************************************************************************/
+u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
+                               u_int32_t blockSizeLimit,
+                               u_int32_t baseMultiple);
+
+#if CONFIG_HFS_STD
+OSErr	hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
+		struct proc *p);
+#endif
+OSErr	hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
+		off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args, kauth_cred_t cred);
+
+OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp);
+
+extern int hfsUnmount(struct hfsmount *hfsmp, struct proc *p);
+
+extern bool overflow_extents(struct filefork *fp);
+
+extern int hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
+		struct proc *p, int invokesuperuserstatus);
+
+extern int check_for_dataless_file(struct vnode *vp, uint64_t op_type);
+extern int hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid);
+extern void hfs_pin_fs_metadata(struct hfsmount *hfsmp);
+
+/* Return information about number of metadata blocks for volume */
+extern int hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo);
+
+/*
+ * Journal lock function prototypes
+ */
+int hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype);
+void hfs_unlock_global (struct hfsmount *hfsmp);
+
+/* HFS mount lock/unlock prototypes */
+void hfs_lock_mount (struct hfsmount *hfsmp);
+void hfs_unlock_mount (struct hfsmount *hfsmp);
+
+
+/* HFS System file locking */
+#define SFL_CATALOG     0x0001
+#define SFL_EXTENTS     0x0002
+#define SFL_BITMAP      0x0004
+#define SFL_ATTRIBUTE   0x0008
+#define SFL_STARTUP	0x0010
+#define SFL_VM_PRIV	0x0020
+#define SFL_VALIDMASK   (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE | SFL_STARTUP | SFL_VM_PRIV)
+
+extern u_int32_t  GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name,
+						   struct cat_attr *fattr, struct cat_fork *forkinfo);
+
+extern void hfs_remove_orphans(struct hfsmount *);
+
+u_int32_t GetLogicalBlockSize(struct vnode *vp);
+
+extern u_int32_t hfs_free_cnids(struct hfsmount * hfsmp);
+extern u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve);
+
+short MacToVFSError(OSErr err);
+
+void hfs_metadatazone_init(struct hfsmount *hfsmp, int disable);
+
+/* HFS directory hint functions. */
+extern directoryhint_t * hfs_getdirhint(struct cnode *, int, int);
+extern void  hfs_reldirhint(struct cnode *, directoryhint_t *);
+extern void  hfs_reldirhints(struct cnode *, int);
+extern void  hfs_insertdirhint(struct cnode *, directoryhint_t *);
+
+extern int hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2);
+
+extern int     hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
+			   void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
+			   HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred);
+
+extern int  hfs_virtualmetafile(struct cnode *);
+
+extern int hfs_start_transaction(struct hfsmount *hfsmp);
+extern int hfs_end_transaction(struct hfsmount *hfsmp);
+extern void hfs_journal_lock(struct hfsmount *hfsmp);
+extern void hfs_journal_unlock(struct hfsmount *hfsmp);
+extern void hfs_syncer_lock(struct hfsmount *hfsmp);
+extern void hfs_syncer_unlock(struct hfsmount *hfsmp);
+extern void hfs_syncer_wait(struct hfsmount *hfsmp, struct timespec *ts);
+extern void hfs_syncer_wakeup(struct hfsmount *hfsmp);
+extern void hfs_syncer(void *arg, wait_result_t);
+extern void hfs_sync_ejectable(struct hfsmount *hfsmp);
+
+typedef enum hfs_flush_mode {
+	HFS_FLUSH_JOURNAL,              // Flush journal
+	HFS_FLUSH_JOURNAL_META,         // Flush journal and metadata blocks
+	HFS_FLUSH_FULL,                 // Flush journal and does a cache flush
+	HFS_FLUSH_CACHE,                // Flush track cache to media
+	HFS_FLUSH_BARRIER,              // Barrier-only flush to ensure write order
+	HFS_FLUSH_JOURNAL_BARRIER       // Flush journal with barrier
+} hfs_flush_mode_t;
+
+extern errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode);
+
+extern void hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents);
+
+/* Erase unused Catalog nodes due to <rdar://problem/6947811>. */
+extern int hfs_erase_unused_nodes(struct hfsmount *hfsmp);
+
+extern uint64_t hfs_usecs_to_deadline(uint64_t usecs);
+
+extern int hfs_freeze(struct hfsmount *hfsmp);
+extern int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process);
+
+void hfs_close_jvp(hfsmount_t *hfsmp);
+
+// Return a heap address suitable for logging or tracing
+uintptr_t obfuscate_addr(void *addr);
+
+#if CONFIG_HFS_STD
+int hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen,
+				ByteCount *actualDstLen, unsigned char* dstStr);
+int utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr,
+				Str31 dstStr);
+int unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry);
+#endif
+
+void *hfs_malloc(size_t size);
+void hfs_free(void *ptr, size_t size);
+void *hfs_mallocz(size_t size);
+
+typedef enum {
+	HFS_CNODE_ZONE,
+	HFS_FILEFORK_ZONE,
+	HFS_DIRHINT_ZONE,
+	HFS_NUM_ZONES
+} hfs_zone_kind_t;
+
+typedef struct hfs_zone_entry {
+	hfs_zone_kind_t	hze_kind;
+	size_t			hze_elem_size;
+	const char *	hze_name;
+	boolean_t		hze_noencrypt;
+} hfs_zone_entry_t;
+
+typedef struct hfs_zone {
+	zone_t hz_zone;
+	size_t hz_elem_size;
+} hfs_zone_t;
+
+void hfs_init_zones(void);
+void *hfs_zalloc(hfs_zone_kind_t type);
+void hfs_zfree(void *ptr, hfs_zone_kind_t type);
+
+void hfs_sysctl_register(void);
+void hfs_sysctl_unregister(void);
+
+#if HFS_MALLOC_DEBUG
+
+void hfs_alloc_trace_disable(void);
+void hfs_alloc_trace_enable(void);
+bool hfs_dump_allocations(void);
+
+#endif // HFS_MALLOC_DEBUG
+
+/*****************************************************************************
+	Functions from hfs_vnops.c
+******************************************************************************/
+int hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags);
+
+int hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p);
+
+int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, struct proc *p);
+
+int hfs_vnop_create(struct vnop_create_args *ap);
+
+int hfs_vnop_remove(struct vnop_remove_args*);
+
+#define  kMaxSecsForFsync	5
+#define  HFS_SYNCTRANS		1
+extern int hfs_btsync(struct vnode *vp, int sync_transaction);
+
+extern void replace_desc(struct cnode *cp, struct cat_desc *cdp);
+
+extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
+						struct vnode **rvpp);
+
+typedef enum {
+	// Push all modifications to disk (including minor ones)
+	HFS_UPDATE_FORCE = 0x01,
+} hfs_update_options_t;
+
+extern int hfs_update(struct vnode *, int options);
+
+typedef enum hfs_sync_mode {
+	HFS_FSYNC,
+	HFS_FSYNC_FULL,
+	HFS_FSYNC_BARRIER
+} hfs_fsync_mode_t;
+
+extern int hfs_fsync(struct vnode *, int, hfs_fsync_mode_t, struct proc *);
+
+const struct cat_fork *
+hfs_prepare_fork_for_update(filefork_t *ff,
+							const struct cat_fork *cf,
+							struct cat_fork *cf_buf,
+							uint32_t block_size);
+
+struct decmpfs_cnode;
+struct decmpfs_cnode *hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
+
+/*****************************************************************************
+	Functions from hfs_xattr.c
+******************************************************************************/
+
+/* 
+ * Maximum extended attribute size supported for all extended attributes except  
+ * resource fork and finder info.
+ */
+#define HFS_XATTR_MAXSIZE	INT32_MAX
+
+/* Number of bits used to represent maximum extended attribute size */
+#define HFS_XATTR_SIZE_BITS	31
+
+int  hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey);
+int  hfs_buildattrkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *key);
+void hfs_xattr_init(struct hfsmount * hfsmp);
+int file_attribute_exist(struct hfsmount *hfsmp, uint32_t fileID);
+int init_attrdata_vnode(struct hfsmount *hfsmp);
+int hfs_xattr_read(vnode_t vp, const char *name, void *data, size_t *size);
+int hfs_getxattr_internal(cnode_t *, struct vnop_getxattr_args *,
+                          struct hfsmount *, u_int32_t);
+int hfs_xattr_write(vnode_t vp, const char *name, const void *data, size_t size);
+int hfs_setxattr_internal(struct cnode *, const void *, size_t, 
+                          struct vnop_setxattr_args *, struct hfsmount *, u_int32_t);
+extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid, 
+							 bool *open_transaction);
+
+int hfs_removexattr_by_id (struct hfsmount *hfsmp, uint32_t fileid, const char *xattr_name );
+	
+extern int hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state);
+
+
+
+/*****************************************************************************
+	Functions from hfs_link.c
+******************************************************************************/
+
+extern int  hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp,
+                       struct componentname *cnp, int skip_reserve);
+extern int  hfs_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid,
+                           cnid_t *prevlinkid,  cnid_t *nextlinkid);
+extern int  hfs_lookup_lastlink(struct hfsmount *hfsmp, cnid_t linkfileid,
+                           cnid_t *nextlinkid, struct cat_desc *cdesc);
+extern void  hfs_privatedir_init(struct hfsmount *, enum privdirtype);
+
+extern void  hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid);
+extern void  hfs_relorigins(struct cnode *cp);
+extern void  hfs_relorigin(struct cnode *cp, cnid_t parentcnid);
+extern int   hfs_haslinkorigin(cnode_t *cp);
+extern cnid_t  hfs_currentparent(cnode_t *cp, bool have_lock);
+extern cnid_t  hfs_currentcnid(cnode_t *cp);
+errno_t hfs_first_link(hfsmount_t *hfsmp, cnode_t *cp, cnid_t *link_id);
+
+
+/*****************************************************************************
+	Functions from VolumeAllocation.c
+ ******************************************************************************/
+extern int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks);
+
+extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, 
+		u_int32_t numBlocks, u_int32_t *alloc_count);
+
+extern int hfs_isrbtree_active (struct hfsmount *hfsmp);
+
+/*****************************************************************************
+	Functions from hfs_fsinfo.c
+ ******************************************************************************/
+extern errno_t hfs_get_fsinfo(struct hfsmount *hfsmp, void *a_data);
+extern void hfs_fsinfo_data_add(struct hfs_fsinfo_data *fsinfo, uint64_t entry);
+
+struct hfs_sysctl_chain {
+	struct sysctl_oid *oid;
+	struct hfs_sysctl_chain *next;
+};
+
+extern struct hfs_sysctl_chain *sysctl_list;
+
+SYSCTL_DECL(_vfs_generic_hfs);
+
+#define HFS_SYSCTL(kind, parent, flags, name, ...)							\
+	SYSCTL_##kind(parent, flags, name, __VA_ARGS__);							\
+	struct hfs_sysctl_chain hfs_sysctl_##parent##_##name##_chain = {		\
+		.oid = &sysctl_##parent##_##name									\
+	};																		\
+	static __attribute__((__constructor__)) void							\
+	hfs_sysctl_register_##parent##_##name(void) {							\
+		hfs_sysctl_##parent##_##name##_chain.next = sysctl_list;			\
+		sysctl_list = &hfs_sysctl_##parent##_##name##_chain;				\
+	}
+
+__END_DECLS
+
+#undef assert
+#define assert         Do_not_use_assert__Use_hfs_assert_instead
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* __HFS__ */
diff --git a/core/hfs_alloc_trace.h b/core/hfs_alloc_trace.h
new file mode 100644
index 0000000..304a194
--- /dev/null
+++ b/core/hfs_alloc_trace.h
@@ -0,0 +1,34 @@
+//
+//  hfs_alloc_trace.h
+//  hfs
+//
+//  Created by Chris Suter on 8/19/15.
+//
+//
+
+#ifndef hfs_alloc_trace_h
+#define hfs_alloc_trace_h
+
+#include <sys/types.h>
+#include <stdbool.h>
+
+enum {
+	HFS_ALLOC_BACKTRACE_LEN = 4,
+};
+
+#pragma pack(push, 8)
+
+struct hfs_alloc_trace_info {
+	int entry_count;
+	bool more;
+	struct hfs_alloc_info_entry {
+		uint64_t ptr;
+		uint64_t sequence;
+		uint64_t size;
+		uint64_t backtrace[HFS_ALLOC_BACKTRACE_LEN];
+	} entries[];
+};
+
+#pragma pack(pop)
+
+#endif /* hfs_alloc_trace_h */
diff --git a/core/hfs_attrlist.c b/core/hfs_attrlist.c
new file mode 100644
index 0000000..1fa4268
--- /dev/null
+++ b/core/hfs_attrlist.c
@@ -0,0 +1,1743 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * hfs_attrlist.c - HFS attribute list processing
+ *
+ * Copyright (c) 1998-2002, Apple Inc.  All Rights Reserved.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/attr.h>
+#include <sys/stat.h>
+#include <sys/unistd.h>
+#include <sys/vm.h>
+#include <sys/kauth.h>
+#include <sys/fsctl.h>
+
+#include <kern/locks.h>
+
+#include "hfs.h"
+#include "hfs_cnode.h"
+#include "hfs_mount.h"
+#include "hfs_dbg.h"
+#include "hfs_attrlist.h"
+#include "hfs_btreeio.h"
+#include "hfs_cprotect.h"
+
+/* Packing routines: */
+
+static void packnameattr(struct attrblock *abp, struct vnode *vp,
+			const u_int8_t *name, int namelen);
+
+static void packcommonattr(struct attrblock *abp, struct hfsmount *hfsmp,
+			struct vnode *vp, struct cat_desc * cdp,
+			struct cat_attr * cap, struct vfs_context *ctx);
+
+static void packfileattr(struct attrblock *abp, struct hfsmount *hfsmp,
+			struct cat_attr *cattrp, struct cat_fork *datafork,
+			struct cat_fork *rsrcfork, struct vnode *vp);
+
+static void packdirattr(struct attrblock *abp, struct hfsmount *hfsmp,
+			struct vnode *vp, struct cat_desc * descp,
+			struct cat_attr * cattrp);
+
+static u_int32_t hfs_real_user_access(vnode_t vp, vfs_context_t ctx);
+
+static void get_vattr_data_for_attrs(struct attrlist *, struct vnode_attr *,
+    struct hfsmount *, struct vnode *, struct cat_desc *, struct cat_attr *,
+    struct cat_fork *, struct cat_fork *, vfs_context_t);
+
+static void vattr_data_for_common_attrs(struct attrlist *, struct vnode_attr *,
+    struct hfsmount *, struct vnode *, struct cat_desc *, struct cat_attr *,
+    vfs_context_t);
+
+static void vattr_data_for_dir_attrs(struct attrlist *, struct vnode_attr *,
+    struct hfsmount *, struct vnode *, struct cat_desc *, struct cat_attr *);
+
+static void vattr_data_for_file_attrs(struct attrlist *, struct vnode_attr *,
+    struct hfsmount *, struct cat_attr *, struct cat_fork *, struct cat_fork *,
+    struct vnode *vp);
+
+static int hfs_readdirattr_internal(struct vnode *, struct attrlist *,
+    struct vnode_attr *, uio_t, uint64_t, int, uint32_t *, int *, int *,
+    vfs_context_t);
+
+/*
+ * readdirattr operation will return attributes for the items in the
+ * directory specified. 
+ *
+ * It does not do . and .. entries. The problem is if you are at the root of the
+ * hfs directory and go to .. you could be crossing a mountpoint into a
+ * different (ufs) file system. The attributes that apply for it may not 
+ * apply for the file system you are doing the readdirattr on. To make life 
+ * simpler, this call will only return entries in its directory, hfs like.
+ */
+int
+hfs_vnop_readdirattr(ap)
+	struct vnop_readdirattr_args /* {
+		struct vnode *a_vp;
+		struct attrlist *a_alist;
+		struct uio *a_uio;
+		u_long a_maxcount;
+		u_long a_options;
+		u_long *a_newstate;
+		int *a_eofflag;
+		u_long *a_actualcount;
+		vfs_context_t a_context;
+	} */ *ap;
+{
+	int error;
+	struct attrlist *alist = ap->a_alist;
+
+	/* Check for invalid options and buffer space. */
+	if (((ap->a_options & ~(FSOPT_NOINMEMUPDATE | FSOPT_NOFOLLOW)) != 0) ||
+	    (ap->a_maxcount <= 0)) {
+		return (EINVAL);
+	}
+	/*
+	 * Reject requests for unsupported attributes.
+	 */
+	if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) ||
+	    (alist->commonattr & ~HFS_ATTR_CMN_VALID) ||
+	    (alist->volattr  != 0) ||
+	    (alist->dirattr & ~HFS_ATTR_DIR_VALID) ||
+	    (alist->fileattr & ~HFS_ATTR_FILE_VALID) ||
+	    (alist->forkattr != 0)) {
+		return (EINVAL);
+	}
+
+	error = hfs_readdirattr_internal(ap->a_vp, alist, NULL, ap->a_uio,
+	    (uint64_t)ap->a_options, ap->a_maxcount, ap->a_newstate,
+	    ap->a_eofflag, (int *)ap->a_actualcount, ap->a_context);
+
+	return (error);
+}
+
+
+/*
+ * getattrlistbulk, like readdirattr, will return attributes for the items in
+ * the directory specified.
+ *
+ * It does not do . and .. entries. The problem is if you are at the root of the
+ * hfs directory and go to .. you could be crossing a mountpoint into a
+ * different (ufs) file system. The attributes that apply for it may not
+ * apply for the file system you are doing the readdirattr on. To make life
+ * simpler, this call will only return entries in its directory, hfs like.
+ */
+int
+hfs_vnop_getattrlistbulk(ap)
+	struct vnop_getattrlistbulk_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		struct attrlist *a_alist;
+		struct vnode_attr *a_vap;
+		struct uio *a_uio;
+		void *a_private;
+		uint64_t a_options;
+		int32_t *a_eofflag;
+		int32_t *a_actualcount;
+		vfs_context_t a_context;
+	} */ *ap;
+{
+	int error = 0;
+
+	error = hfs_readdirattr_internal(ap->a_vp, ap->a_alist, ap->a_vap,
+	    ap->a_uio, (uint64_t)ap->a_options, 0, NULL, ap->a_eofflag,
+	    (int *)ap->a_actualcount, ap->a_context);
+
+	return (error);
+}
+
+/*
+ * Common function for both hfs_vnop_readdirattr and hfs_vnop_getattrlistbulk.
+ * This either fills in a vnode_attr structure or fills in an attrbute buffer
+ * Currently the difference in behaviour required for the two vnops is keyed
+ * on whether the passed in vnode_attr pointer is null or not. If the pointer
+ * is null we fill in buffer passed and if it is not null we fill in the fields
+ * of the vnode_attr structure.
+ */
+int
+hfs_readdirattr_internal(struct vnode *dvp, struct attrlist *alist,
+    struct vnode_attr *vap, uio_t uio, uint64_t options, int maxcount,
+    uint32_t *newstate, int *eofflag, int *actualcount, vfs_context_t ctx)
+{
+	struct cnode *dcp;
+	struct hfsmount * hfsmp;
+	u_int32_t fixedblocksize;
+	u_int32_t maxattrblocksize = 0;
+	u_int32_t currattrbufsize;
+	void *attrbufptr = NULL;
+	void *attrptr = NULL;
+	void *varptr = NULL;
+	caddr_t namebuf = NULL;
+	struct attrblock attrblk;
+	int error = 0;
+	int index = 0;
+	int i = 0;
+	struct cat_desc *lastdescp = NULL;
+	struct cat_entrylist *ce_list = NULL;
+	directoryhint_t *dirhint = NULL;
+	unsigned int tag;
+	int maxentries = 0;
+	int lockflags;
+	u_int32_t dirchg = 0;
+	int reachedeof = 0;
+	int internal_actualcount;
+	int internal_eofflag;
+	
+	/* Lets makse sure we have something assign to actualcount always, min change required */
+	if (actualcount == NULL) {
+		actualcount = &internal_actualcount;
+	}
+	/* Lets makse sure we have something assign to eofflag always, min change required */
+	if (eofflag == NULL) {
+		eofflag = &internal_eofflag;
+	}
+	
+	*(actualcount) = 0;
+	*(eofflag) = 0;
+
+	if ((uio_resid(uio) <= 0) || (uio_iovcnt(uio) > 1))
+		return (EINVAL);
+
+	if (VTOC(dvp)->c_bsdflags & UF_COMPRESSED) {
+		int compressed = hfs_file_is_compressed(VTOC(dvp), 0);  /* 0 == take the cnode lock */
+
+		if (!compressed) {
+			error = check_for_dataless_file(dvp, NAMESPACE_HANDLER_READ_OP);
+			if (error) {
+				return error;
+			}
+		}
+	}
+
+	/*
+	 * Take an exclusive directory lock since we manipulate the directory hints
+	 */
+	if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		return (error);
+	}
+	dcp = VTOC(dvp);
+	hfsmp = VTOHFS(dvp);
+
+	dirchg = dcp->c_dirchangecnt;
+
+	/* Extract directory index and tag (sequence number) from uio_offset */
+	index = uio_offset(uio) & HFS_INDEX_MASK;
+	tag = uio_offset(uio) & ~HFS_INDEX_MASK;
+
+	/*
+	 * We can't just use the valence as an optimization to avoid
+	 * going to the catalog.  It might be wrong (== 0), and that would
+	 * cause us to avoid iterating the directory when it might actually have
+	 * contents.  Instead, use the catalog to tell us when we've hit EOF
+	 * for this directory
+	 */
+
+	/* Get a buffer to hold packed attributes. */
+	fixedblocksize = (sizeof(u_int32_t) + hfs_attrblksize(alist)); /* 4 bytes for length */
+
+	if (!vap) {
+		maxattrblocksize = fixedblocksize;
+		if (alist->commonattr & ATTR_CMN_NAME)
+			maxattrblocksize += kHFSPlusMaxFileNameBytes + 1;
+
+		attrbufptr = hfs_malloc(maxattrblocksize);
+		attrptr = attrbufptr;
+		varptr = (char *)attrbufptr + fixedblocksize;  /* Point to variable-length storage */
+	} else {
+		if ((alist->commonattr & ATTR_CMN_NAME) && !vap->va_name) {
+			namebuf = hfs_malloc(MAXPATHLEN);
+			if (!namebuf) {
+				error = ENOMEM;
+				goto exit2;
+			}
+			vap->va_name = namebuf;
+		}
+	}
+	/* Get a detached directory hint (cnode must be locked exclusive) */	
+	dirhint = hfs_getdirhint(dcp, ((index - 1) & HFS_INDEX_MASK) | tag, TRUE);
+
+	/* Hide tag from catalog layer. */
+	dirhint->dh_index &= HFS_INDEX_MASK;
+	if (dirhint->dh_index == HFS_INDEX_MASK) {
+		dirhint->dh_index = -1;
+	}
+
+	/*
+	 * Obtain a list of catalog entries and pack their attributes until
+	 * the output buffer is full or maxcount entries have been packed.
+	 */
+
+	/*
+	 * Constrain our list size.
+	 */
+	maxentries = uio_resid(uio) / (fixedblocksize + HFS_AVERAGE_NAME_SIZE);
+	/* There is maxcount for the bulk vnop */
+	if (!vap)
+		maxentries = min(maxentries, maxcount);
+	maxentries = min(maxentries, MAXCATENTRIES);
+	if (maxentries < 1) {
+		error = EINVAL;
+		goto exit2;
+	}
+
+	/* Initialize a catalog entry list. */
+	ce_list = hfs_mallocz(CE_LIST_SIZE(maxentries));
+	ce_list->maxentries = maxentries;
+
+	/*
+	 * Populate the ce_list from the catalog file.
+	 */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+	 
+	error = cat_getentriesattr(hfsmp, dirhint, ce_list, &reachedeof);
+	/* Don't forget to release the descriptors later! */
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if ((error == ENOENT) || (reachedeof != 0)) { 
+		*(eofflag) = TRUE;
+		error = 0;
+	}
+	if (error) {
+		goto exit1;
+	}
+
+	dcp->c_touch_acctime = TRUE;
+
+	/* 
+	 * Check for a FS corruption in the valence. We're holding the cnode lock
+	 * exclusive since we need to serialize the directory hints, so if we found
+	 * that the valence reported 0, but we actually found some items here, then
+	 * silently minimally self-heal and bump the valence to 1.
+	 */
+	if ((dcp->c_entries == 0) && (ce_list->realentries > 0)) {
+		dcp->c_entries++;
+		dcp->c_flag |= C_MODIFIED;
+		printf("%s : repairing valence to non-zero!\n", __FUNCTION__);
+		/* force an update on dcp while we're still holding the lock. */
+		hfs_update(dvp, 0);
+	}
+
+	/*
+	 * Drop the directory lock so we don't deadlock when we:
+	 *   - acquire a child cnode lock
+	 *   - make calls to vnode_authorize()
+	 *   - make calls to kauth_cred_ismember_gid()
+	 */
+	hfs_unlock(dcp);
+	dcp = NULL;
+
+	/* Process the catalog entries. */
+	for (i = 0; i < (int)ce_list->realentries; ++i) {
+		struct cnode *cp = NULL;
+		struct vnode *vp = NULL;
+		struct cat_desc * cdescp;
+		struct cat_attr * cattrp;
+		struct cat_fork c_datafork;
+		struct cat_fork c_rsrcfork;
+
+		bzero(&c_datafork, sizeof(c_datafork));
+		bzero(&c_rsrcfork, sizeof(c_rsrcfork));
+		cdescp = &ce_list->entry[i].ce_desc;
+		cattrp = &ce_list->entry[i].ce_attr;
+		c_datafork.cf_size   = ce_list->entry[i].ce_datasize;
+		c_datafork.cf_blocks = ce_list->entry[i].ce_datablks;
+		c_rsrcfork.cf_size   = ce_list->entry[i].ce_rsrcsize;
+		c_rsrcfork.cf_blocks = ce_list->entry[i].ce_rsrcblks;
+
+		if (((alist->commonattr & ATTR_CMN_USERACCESS) &&
+		    (cattrp->ca_recflags & kHFSHasSecurityMask))
+#if CONFIG_PROTECT
+		    ||
+		   ((alist->commonattr & ATTR_CMN_DATA_PROTECT_FLAGS) && (vap))
+#endif
+		    ) {
+			/*
+			 * Obtain vnode for our vnode_authorize() calls.
+			 */
+			if (hfs_vget(hfsmp, cattrp->ca_fileid, &vp, 0, 0) != 0) {
+				vp = NULL;
+			}
+		} else if (vap || !(options & FSOPT_NOINMEMUPDATE)) {
+			/* Get in-memory cnode data (if any). */
+			vp = hfs_chash_getvnode(hfsmp, cattrp->ca_fileid, 0, 0, 0);
+		}
+		if (vp != NULL) {
+			cp = VTOC(vp);
+			/* Only use cnode's decriptor for non-hardlinks */
+			if (!(cp->c_flag & C_HARDLINK))
+				cdescp = &cp->c_desc;
+			cattrp = &cp->c_attr;
+			if (cp->c_datafork) {
+				c_datafork.cf_size   = cp->c_datafork->ff_size;
+				c_datafork.cf_blocks = cp->c_datafork->ff_blocks;
+			}
+			if (cp->c_rsrcfork) {
+				c_rsrcfork.cf_size   = cp->c_rsrcfork->ff_size;
+				c_rsrcfork.cf_blocks = cp->c_rsrcfork->ff_blocks;
+			}
+			/* All done with cnode. */
+			hfs_unlock(cp);
+			cp = NULL;
+		}
+
+		if (!vap) {
+			*((u_int32_t *)attrptr) = 0;
+			attrptr = ((u_int32_t *)attrptr) + 1;
+			attrblk.ab_attrlist = alist;
+			attrblk.ab_attrbufpp = &attrptr;
+			attrblk.ab_varbufpp = &varptr;
+			attrblk.ab_flags = 0;
+			attrblk.ab_blocksize = maxattrblocksize;
+			attrblk.ab_context = ctx;
+
+			/* Pack catalog entries into attribute buffer. */
+			hfs_packattrblk(&attrblk, hfsmp, vp, cdescp, cattrp, &c_datafork, &c_rsrcfork, ctx);
+			currattrbufsize = ((char *)varptr - (char *)attrbufptr);
+
+			/* All done with vnode. */
+			if (vp != NULL) {
+				vnode_put(vp);
+				vp = NULL;
+			}
+
+			/* Make sure there's enough buffer space remaining. */
+			// LP64todo - fix this!
+			if (uio_resid(uio) < 0 ||
+				currattrbufsize > (u_int32_t)uio_resid(uio)) {
+				break;
+			} else {
+				*((u_int32_t *)attrbufptr) = currattrbufsize;
+				error = uiomove((caddr_t)attrbufptr, currattrbufsize, uio);
+				if (error != E_NONE) {
+					break;
+				}
+				attrptr = attrbufptr;
+				/* Point to variable-length storage */
+				varptr = (char *)attrbufptr + fixedblocksize;
+				/* Save the last valid catalog entry */
+				lastdescp = &ce_list->entry[i].ce_desc;
+				index++;
+				*actualcount += 1;
+
+				/* Termination checks */
+				if ((--maxcount <= 0) ||
+				    // LP64todo - fix this!
+				    uio_resid(uio) < 0 ||
+				    ((u_int32_t)uio_resid(uio) < (fixedblocksize + HFS_AVERAGE_NAME_SIZE))){
+					break;
+				}
+			}
+		} else {
+			size_t orig_resid = (size_t)uio_resid(uio);
+			size_t resid;
+
+			get_vattr_data_for_attrs(alist, vap, hfsmp, vp, cdescp,
+			    cattrp, &c_datafork, &c_rsrcfork, ctx);
+
+#if CONFIG_PROTECT
+			if ((alist->commonattr & ATTR_CMN_DATA_PROTECT_FLAGS) &&
+			    vp) {
+				cp_key_class_t class;
+
+				if (!cp_vnode_getclass(vp, &class)) {
+					VATTR_RETURN(vap, va_dataprotect_class,
+					    (uint32_t)class);
+				}
+			}
+#endif
+			error = vfs_attr_pack(vp, uio, alist, options, vap,
+			    NULL, ctx);
+
+			/* All done with vnode. */
+			if (vp) {
+				vnode_put(vp);
+				vp = NULL;
+			}
+
+			resid = uio_resid(uio);
+
+			/* Was this entry succesful ? */
+			if (error || resid == orig_resid)
+				break;
+
+			/* Save the last valid catalog entry */
+			lastdescp = &ce_list->entry[i].ce_desc;
+			index++;
+			*actualcount += 1;
+
+			/* Do we have the bare minimum for the next entry ? */
+			if (resid < sizeof(uint32_t))
+				break;
+		}
+	} /* for each catalog entry */
+
+	/*
+	 * If we couldn't fit all the entries requested in the user's buffer,
+	 * it's not EOF.
+	 */
+	if (*eofflag && (*actualcount < (int)ce_list->realentries))
+		*eofflag = 0;
+
+	/* If we skipped catalog entries for reserved files that should
+	 * not be listed in namespace, update the index accordingly.
+	 */
+	if (ce_list->skipentries) {
+		index += ce_list->skipentries;
+		ce_list->skipentries = 0;
+	}
+
+	/* 
+	 * If there are more entries then save the last name. 
+	 * Key this behavior based on whether or not we observed EOFFLAG. 
+	 *
+	 * Do not use the valence as a way to determine if we hit EOF, since
+	 * it can be wrong.  Use the catalog's output only.
+	 */
+	if ((*(eofflag) == 0) && (lastdescp != NULL)) {
+
+		/* Remember last entry */
+		if ((dirhint->dh_desc.cd_flags & CD_HASBUF) &&
+		    (dirhint->dh_desc.cd_nameptr != NULL)) {
+			dirhint->dh_desc.cd_flags &= ~CD_HASBUF;
+			vfs_removename((const char *)dirhint->dh_desc.cd_nameptr);
+		}
+		if (lastdescp->cd_nameptr != NULL) {
+			dirhint->dh_desc.cd_namelen = lastdescp->cd_namelen;
+			dirhint->dh_desc.cd_nameptr = (const u_int8_t *)
+			vfs_addname((const char *)lastdescp->cd_nameptr, lastdescp->cd_namelen, 0, 0);
+			dirhint->dh_desc.cd_flags |= CD_HASBUF;
+		} else {
+			dirhint->dh_desc.cd_namelen = 0;
+			dirhint->dh_desc.cd_nameptr = NULL;
+		}
+		dirhint->dh_index = index - 1;
+		dirhint->dh_desc.cd_cnid = lastdescp->cd_cnid;
+		dirhint->dh_desc.cd_hint = lastdescp->cd_hint;
+		dirhint->dh_desc.cd_encoding = lastdescp->cd_encoding;
+	} 
+
+	/* All done with the catalog descriptors. */
+	for (i = 0; i < (int)ce_list->realentries; ++i)
+		cat_releasedesc(&ce_list->entry[i].ce_desc);
+	ce_list->realentries = 0;
+
+	(void) hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+	dcp = VTOC(dvp);
+
+exit1:
+	/* Pack directory index and tag into uio_offset. */
+	while (tag == 0) tag = (++dcp->c_dirhinttag) << HFS_INDEX_BITS;	
+	uio_setoffset(uio, index | tag);
+	dirhint->dh_index |= tag;
+
+exit2:
+	if (newstate)
+		*newstate = dirchg;
+
+	/* 
+	 * Drop directory hint on error or if there are no more entries,
+	 * only if EOF was seen.
+	 */
+	if (dirhint) {
+		if ((error != 0) || *(eofflag))
+			hfs_reldirhint(dcp, dirhint);
+		else
+			hfs_insertdirhint(dcp, dirhint);
+	}
+	if (namebuf) {
+		hfs_free(namebuf, MAXPATHLEN);
+		vap->va_name = NULL;
+	}
+	if (attrbufptr)
+		hfs_free(attrbufptr, maxattrblocksize);
+	if (ce_list)
+		hfs_free(ce_list, CE_LIST_SIZE(maxentries));
+
+	if (vap && *actualcount && error)
+		error = 0;
+
+	hfs_unlock(dcp);
+	return (error);
+}
+
+
+/*==================== Attribute list support routines ====================*/
+
+/*
+ * Pack cnode attributes into an attribute block.
+ */
+void
+hfs_packattrblk(struct attrblock *abp,
+		struct hfsmount *hfsmp,
+		struct vnode *vp,
+		struct cat_desc *descp,
+		struct cat_attr *attrp,
+		struct cat_fork *datafork,
+		struct cat_fork *rsrcfork,
+		struct vfs_context *ctx)
+{
+	struct attrlist *attrlistp = abp->ab_attrlist;
+
+	if (attrlistp->commonattr)
+		packcommonattr(abp, hfsmp, vp, descp, attrp, ctx);
+
+	if (attrlistp->dirattr && S_ISDIR(attrp->ca_mode))
+		packdirattr(abp, hfsmp, vp, descp,attrp);
+
+	if (attrlistp->fileattr && !S_ISDIR(attrp->ca_mode))
+		packfileattr(abp, hfsmp, attrp, datafork, rsrcfork, vp);
+}
+
+static char*
+mountpointname(struct mount *mp)
+{
+	struct vfsstatfs *vsfs = vfs_statfs(mp);
+
+	size_t namelength = strlen(vsfs->f_mntonname);
+	int foundchars = 0;
+	char *c;
+	
+	if (namelength == 0)
+		return (NULL);
+	
+	/*
+	 * Look backwards through the name string, looking for
+	 * the first slash encountered (which must precede the
+	 * last part of the pathname).
+	 */
+	for (c = vsfs->f_mntonname + namelength - 1;
+	     namelength > 0; --c, --namelength) {
+		if (*c != '/') {
+			foundchars = 1;
+		} else if (foundchars) {
+			return (c + 1);
+		}
+	}
+	
+	return vsfs->f_mntonname;
+}
+
+
+static void
+packnameattr(
+	struct attrblock *abp,
+	struct vnode *vp,
+	const u_int8_t *name,
+	int namelen)
+{
+	void *varbufptr;
+	struct attrreference * attr_refptr;
+	char *mpname;
+	size_t mpnamelen;
+	u_int32_t attrlength;
+	u_int8_t empty = 0;
+	
+	/* A cnode's name may be incorrect for the root of a mounted
+	 * filesystem (it can be mounted on a different directory name
+	 * than the name of the volume, such as "blah-1").  So for the
+	 * root directory, it's best to return the last element of the
+	 location where the volume's mounted:
+	 */
+	if ((vp != NULL) && vnode_isvroot(vp) &&
+	    (mpname = mountpointname(vnode_mount(vp)))) {
+		mpnamelen = strlen(mpname);
+		
+		/* Trim off any trailing slashes: */
+		while ((mpnamelen > 0) && (mpname[mpnamelen-1] == '/'))
+			--mpnamelen;
+
+		/* If there's anything left, use it instead of the volume's name */
+		if (mpnamelen > 0) {
+			name = (u_int8_t *)mpname;
+			namelen = mpnamelen;
+		}
+	}
+	if (name == NULL) {
+		name = &empty;
+		namelen = 0;
+	}
+
+	varbufptr = *abp->ab_varbufpp;
+	attr_refptr = (struct attrreference *)(*abp->ab_attrbufpp);
+
+	attrlength = namelen + 1;
+	attr_refptr->attr_dataoffset = (char *)varbufptr - (char *)attr_refptr;
+	attr_refptr->attr_length = attrlength;
+	(void) strncpy((char *)varbufptr, (const char *) name, attrlength);
+	/*
+	 * Advance beyond the space just allocated and
+	 * round up to the next 4-byte boundary:
+	 */
+	varbufptr = ((char *)varbufptr) + attrlength + ((4 - (attrlength & 3)) & 3);
+	++attr_refptr;
+
+	*abp->ab_attrbufpp = attr_refptr;
+	*abp->ab_varbufpp = varbufptr;
+}
+
+static void
+packcommonattr(
+	struct attrblock *abp,
+	struct hfsmount *hfsmp,
+	struct vnode *vp,
+	struct cat_desc * cdp,
+	struct cat_attr * cap,
+	struct vfs_context * ctx)
+{
+	attrgroup_t attr = abp->ab_attrlist->commonattr;
+	struct mount *mp = HFSTOVFS(hfsmp);
+	void *attrbufptr = *abp->ab_attrbufpp;
+	void *varbufptr = *abp->ab_varbufpp;
+	boolean_t is_64_bit = proc_is64bit(vfs_context_proc(ctx));
+	uid_t cuid = 1;
+	int isroot = 0;
+
+	if (attr & (ATTR_CMN_OWNERID | ATTR_CMN_GRPID)) {
+		cuid = kauth_cred_getuid(vfs_context_ucred(ctx));
+		isroot = cuid == 0;
+	}
+	
+	if (ATTR_CMN_NAME & attr) {
+		packnameattr(abp, vp, cdp->cd_nameptr, cdp->cd_namelen);
+		attrbufptr = *abp->ab_attrbufpp;
+		varbufptr = *abp->ab_varbufpp;
+	}
+	if (ATTR_CMN_DEVID & attr) {
+		*((dev_t *)attrbufptr) = hfsmp->hfs_raw_dev;
+		attrbufptr = ((dev_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_FSID & attr) {
+		fsid_t fsid;
+		
+		fsid.val[0] = hfsmp->hfs_raw_dev;
+		fsid.val[1] = vfs_typenum(mp);
+		*((fsid_t *)attrbufptr) = fsid;
+		attrbufptr = ((fsid_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_OBJTYPE & attr) {
+		*((fsobj_type_t *)attrbufptr) = IFTOVT(cap->ca_mode);
+		attrbufptr = ((fsobj_type_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_OBJTAG & attr) {
+		*((fsobj_tag_t *)attrbufptr) = VT_HFS;
+		attrbufptr = ((fsobj_tag_t *)attrbufptr) + 1;
+	}
+	/*
+	 * Exporting file IDs from HFS Plus:
+	 *
+	 * For "normal" files the c_fileid is the same value as the
+	 * c_cnid.  But for hard link files, they are different - the
+	 * c_cnid belongs to the active directory entry (ie the link)
+	 * and the c_fileid is for the actual inode (ie the data file).
+	 *
+	 * The stat call (getattr) will always return the c_fileid
+	 * and Carbon APIs, which are hardlink-ignorant, will always
+	 * receive the c_cnid (from getattrlist).
+	 */
+	if (ATTR_CMN_OBJID & attr) {
+		((fsobj_id_t *)attrbufptr)->fid_objno = cdp->cd_cnid;
+		((fsobj_id_t *)attrbufptr)->fid_generation = 0;
+		attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_OBJPERMANENTID & attr) {
+		((fsobj_id_t *)attrbufptr)->fid_objno = cdp->cd_cnid;
+		((fsobj_id_t *)attrbufptr)->fid_generation = 0;
+		attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_PAROBJID & attr) {
+		((fsobj_id_t *)attrbufptr)->fid_objno = cdp->cd_parentcnid;
+		((fsobj_id_t *)attrbufptr)->fid_generation = 0;
+		attrbufptr = ((fsobj_id_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_SCRIPT & attr) {
+		*((text_encoding_t *)attrbufptr) = cdp->cd_encoding;
+		attrbufptr = ((text_encoding_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_CRTIME & attr) {
+	    if (is_64_bit) {
+            ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_itime;
+            ((struct user64_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user64_timespec *)attrbufptr) + 1;
+	    }
+	    else {
+            ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_itime;
+            ((struct user32_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user32_timespec *)attrbufptr) + 1;
+	    }
+	}
+	if (ATTR_CMN_MODTIME & attr) {
+	    if (is_64_bit) {
+             ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_mtime;
+             ((struct user64_timespec *)attrbufptr)->tv_nsec = 0;
+			 attrbufptr = ((struct user64_timespec *)attrbufptr) + 1;
+	    }
+	    else {
+            ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_mtime;
+            ((struct user32_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user32_timespec *)attrbufptr) + 1;
+	    }
+	}
+	if (ATTR_CMN_CHGTIME & attr) {
+	    if (is_64_bit) {
+            ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_ctime;
+            ((struct user64_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user64_timespec *)attrbufptr) + 1;
+	    }
+	    else {
+            ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_ctime;
+            ((struct user32_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user32_timespec *)attrbufptr) + 1;
+	    }
+	}
+	if (ATTR_CMN_ACCTIME & attr) {
+	    if (is_64_bit) {
+            ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_atime;
+            ((struct user64_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user64_timespec *)attrbufptr) + 1;
+	    }
+	    else {
+            ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_atime;
+            ((struct user32_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user32_timespec *)attrbufptr) + 1;
+	    }
+	}
+	if (ATTR_CMN_BKUPTIME & attr) {
+	    if (is_64_bit) {
+            ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_btime;
+            ((struct user64_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user64_timespec *)attrbufptr) + 1;
+	    }
+	    else {
+            ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_btime;
+            ((struct user32_timespec *)attrbufptr)->tv_nsec = 0;
+			attrbufptr = ((struct user32_timespec *)attrbufptr) + 1;
+	    }
+	}
+	if (ATTR_CMN_FNDRINFO & attr) {
+		u_int8_t *finfo = NULL;
+		bcopy(&cap->ca_finderinfo, attrbufptr, sizeof(u_int8_t) * 32);
+		finfo = (u_int8_t*)attrbufptr;
+
+		/* Don't expose a symlink's private type/creator. */
+		if (S_ISLNK(cap->ca_mode)) {
+			struct FndrFileInfo *fip;
+
+			fip = (struct FndrFileInfo *)attrbufptr;
+			fip->fdType = 0;
+			fip->fdCreator = 0;
+		}
+
+		/* advance 16 bytes into the attrbuf */
+		finfo = finfo + 16;
+
+		/* also don't expose the date_added or write_gen_counter fields */
+		if (S_ISREG(cap->ca_mode) || S_ISLNK(cap->ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			extinfo->document_id = 0;
+			extinfo->date_added = 0;
+			extinfo->write_gen_counter = 0;
+		}
+		else if (S_ISDIR(cap->ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			extinfo->document_id = 0;
+			extinfo->date_added = 0;
+			extinfo->write_gen_counter = 0;
+		}
+
+		attrbufptr = (char *)attrbufptr + sizeof(u_int8_t) * 32;
+	}
+	if (ATTR_CMN_OWNERID & attr) {
+		uid_t nuid = cap->ca_uid;
+
+		if (!isroot) {
+			if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS)
+				nuid = cuid;
+			else if (nuid == UNKNOWNUID)
+				nuid = cuid;
+		}
+
+		*((uid_t *)attrbufptr) = nuid;
+		attrbufptr = ((uid_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_GRPID & attr) {
+		gid_t ngid = cap->ca_gid;
+
+		if (!isroot) {
+			gid_t cgid = kauth_cred_getgid(vfs_context_ucred(ctx));
+			if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS)
+				ngid = cgid;
+			else if (ngid == UNKNOWNUID)
+				ngid = cgid;
+		}
+
+		*((gid_t *)attrbufptr) = ngid;
+		attrbufptr = ((gid_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_ACCESSMASK & attr) {
+		/*
+		 * [2856576]  Since we are dynamically changing the owner, also
+		 * effectively turn off the set-user-id and set-group-id bits,
+		 * just like chmod(2) would when changing ownership.  This prevents
+		 * a security hole where set-user-id programs run as whoever is
+		 * logged on (or root if nobody is logged in yet!)
+		 */
+		*((u_int32_t *)attrbufptr) = (cap->ca_uid == UNKNOWNUID) ?
+			cap->ca_mode & ~(S_ISUID | S_ISGID) : cap->ca_mode;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_FLAGS & attr) {
+		*((u_int32_t *)attrbufptr) = cap->ca_flags;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_USERACCESS & attr) {
+		u_int32_t user_access;
+
+		/* Take the long path when we have an ACL */
+		if ((vp != NULLVP) && (cap->ca_recflags & kHFSHasSecurityMask)) {
+			user_access = hfs_real_user_access(vp, abp->ab_context);
+		} else {
+			user_access = DerivePermissionSummary(cap->ca_uid, cap->ca_gid,
+			                  cap->ca_mode, mp, vfs_context_ucred(ctx), 0);
+		}
+		/* Also consider READ-ONLY file system. */
+		if (vfs_flags(mp) & MNT_RDONLY) {
+			user_access &= ~W_OK;
+		}
+		/* Locked objects are not writable either */
+		if ((cap->ca_flags & UF_IMMUTABLE) && (vfs_context_suser(abp->ab_context) != 0))
+			user_access &= ~W_OK;
+		if ((cap->ca_flags & SF_IMMUTABLE) && (vfs_context_suser(abp->ab_context) == 0))
+			user_access &= ~W_OK;
+
+		*((u_int32_t *)attrbufptr) = user_access;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_FILEID & attr) {
+		*((u_int64_t *)attrbufptr) = cap->ca_fileid;
+		attrbufptr = ((u_int64_t *)attrbufptr) + 1;
+	}
+	if (ATTR_CMN_PARENTID & attr) {
+		*((u_int64_t *)attrbufptr) = cdp->cd_parentcnid;
+		attrbufptr = ((u_int64_t *)attrbufptr) + 1;
+	}
+	
+	*abp->ab_attrbufpp = attrbufptr;
+	*abp->ab_varbufpp = varbufptr;
+}
+
+static void
+packdirattr(
+	struct attrblock *abp,
+	struct hfsmount *hfsmp,
+	struct vnode *vp,
+	struct cat_desc * descp,
+	struct cat_attr * cattrp)
+{
+	attrgroup_t attr = abp->ab_attrlist->dirattr;
+	void *attrbufptr = *abp->ab_attrbufpp;
+	u_int32_t entries;
+
+	/*
+	 * The DIR_LINKCOUNT is the count of real directory hard links.
+	 * (i.e. its not the sum of the implied "." and ".." references
+	 *  typically used in stat's st_nlink field)
+	 */
+	if (ATTR_DIR_LINKCOUNT & attr) {
+		*((u_int32_t *)attrbufptr) = cattrp->ca_linkcount;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_DIR_ENTRYCOUNT & attr) {
+		entries = cattrp->ca_entries;
+
+		if (descp->cd_parentcnid == kHFSRootParentID) {
+			if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0)
+				--entries;	    /* hide private dir */
+			if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0)
+				--entries;	    /* hide private dir */
+			if (hfsmp->jnl ||
+			    ((hfsmp->vcbAtrb & kHFSVolumeJournaledMask) &&
+			     (hfsmp->hfs_flags & HFS_READ_ONLY)))
+				entries -= 2;	/* hide the journal files */
+		}
+
+		*((u_int32_t *)attrbufptr) = entries;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_DIR_MOUNTSTATUS & attr) {
+		if (vp != NULL && vnode_mountedhere(vp) != NULL)
+			*((u_int32_t *)attrbufptr) = DIR_MNTSTATUS_MNTPOINT;
+		else
+			*((u_int32_t *)attrbufptr) = 0;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	*abp->ab_attrbufpp = attrbufptr;
+}
+
+static void
+packfileattr(
+	struct attrblock *abp,
+	struct hfsmount *hfsmp,
+	struct cat_attr *cattrp,
+	struct cat_fork *datafork,
+	struct cat_fork *rsrcfork,
+	struct vnode *vp)
+{
+#if !HFS_COMPRESSION
+#pragma unused(vp)
+#endif
+	attrgroup_t attr = abp->ab_attrlist->fileattr;
+	void *attrbufptr = *abp->ab_attrbufpp;
+	void *varbufptr = *abp->ab_varbufpp;
+	u_int32_t allocblksize;
+
+	allocblksize = HFSTOVCB(hfsmp)->blockSize;
+
+	off_t datasize = datafork->cf_size;
+	off_t totalsize = datasize + rsrcfork->cf_size;
+#if HFS_COMPRESSION
+	int handle_compressed;
+	handle_compressed =  (cattrp->ca_flags & UF_COMPRESSED);// && hfs_file_is_compressed(VTOC(vp), 1);
+	
+	if (handle_compressed) {
+		if (attr & (ATTR_FILE_DATALENGTH|ATTR_FILE_TOTALSIZE)) {
+			if ( 0 == hfs_uncompressed_size_of_compressed_file(hfsmp, vp, cattrp->ca_fileid, &datasize, 1) ) { /* 1 == don't take the cnode lock */
+				/* total size of a compressed file is just the data size */
+				totalsize = datasize;
+			}
+		}
+	}
+#endif
+
+	if (ATTR_FILE_LINKCOUNT & attr) {
+		*((u_int32_t *)attrbufptr) = cattrp->ca_linkcount;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_TOTALSIZE & attr) {
+		*((off_t *)attrbufptr) = totalsize;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_ALLOCSIZE & attr) {
+		*((off_t *)attrbufptr) =
+			(off_t)cattrp->ca_blocks * (off_t)allocblksize;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_IOBLOCKSIZE & attr) {
+		*((u_int32_t *)attrbufptr) = hfsmp->hfs_logBlockSize;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_CLUMPSIZE & attr) {
+		*((u_int32_t *)attrbufptr) = hfsmp->vcbClpSiz;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	if (ATTR_FILE_DEVTYPE & attr) {
+		if (S_ISBLK(cattrp->ca_mode) || S_ISCHR(cattrp->ca_mode))
+			*((u_int32_t *)attrbufptr) = (u_int32_t)cattrp->ca_rdev;
+		else
+			*((u_int32_t *)attrbufptr) = 0;
+		attrbufptr = ((u_int32_t *)attrbufptr) + 1;
+	}
+	
+	if (ATTR_FILE_DATALENGTH & attr) {
+		*((off_t *)attrbufptr) = datasize;
+		attrbufptr = ((off_t *)attrbufptr) + 1;
+	}
+	
+#if HFS_COMPRESSION
+	/* fake the data fork size on a decmpfs compressed file to reflect the 
+	 * uncompressed size. This ensures proper reading and copying of these files.
+	 * NOTE: we may need to get the vnode here because the vnode parameter
+	 * passed by hfs_vnop_readdirattr() may be null. 
+	 */
+	
+	if ( handle_compressed ) {
+		if (attr & ATTR_FILE_DATAALLOCSIZE) {
+			*((off_t *)attrbufptr) = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+		if (attr & ATTR_FILE_RSRCLENGTH) {
+			*((off_t *)attrbufptr) = 0;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+		if (attr & ATTR_FILE_RSRCALLOCSIZE) {
+			*((off_t *)attrbufptr) = 0;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+	}
+	else
+#endif
+	{
+		if (ATTR_FILE_DATAALLOCSIZE & attr) {
+			*((off_t *)attrbufptr) = (off_t)datafork->cf_blocks * (off_t)allocblksize;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+		if (ATTR_FILE_RSRCLENGTH & attr) {
+			*((off_t *)attrbufptr) = rsrcfork->cf_size;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+		if (ATTR_FILE_RSRCALLOCSIZE & attr) {
+			*((off_t *)attrbufptr) = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize;
+			attrbufptr = ((off_t *)attrbufptr) + 1;
+		}
+	}
+	*abp->ab_attrbufpp = attrbufptr;
+	*abp->ab_varbufpp = varbufptr;
+}
+
+/*
+ * Calculate the total size of an attribute block.
+ */
+int
+hfs_attrblksize(struct attrlist *attrlist)
+{
+	int size;
+	attrgroup_t a;
+	int sizeof_timespec;
+	boolean_t is_64_bit = proc_is64bit(current_proc());
+	
+    if (is_64_bit) 
+        sizeof_timespec = sizeof(struct user64_timespec);
+    else
+        sizeof_timespec = sizeof(struct user32_timespec);
+
+	hfs_assert((attrlist->commonattr & ~ATTR_CMN_VALIDMASK) == 0);
+
+	hfs_assert((attrlist->volattr & ~ATTR_VOL_VALIDMASK) == 0);
+
+	hfs_assert((attrlist->dirattr & ~ATTR_DIR_VALIDMASK) == 0);
+
+	hfs_assert((attrlist->fileattr & ~ATTR_FILE_VALIDMASK) == 0);
+
+	// disable this because it will break the simulator/build machines each
+	// time a new _CMNEXT_ bit is added
+	// hfs_assert(((attrlist->forkattr & ~ATTR_FORK_VALIDMASK) == 0) ||
+	// 	((attrlist->forkattr & ~ATTR_CMNEXT_VALIDMASK) == 0));
+
+	size = 0;
+	
+	if ((a = attrlist->commonattr) != 0) {
+		if (a & ATTR_CMN_NAME) size += sizeof(struct attrreference);
+		if (a & ATTR_CMN_DEVID) size += sizeof(dev_t);
+		if (a & ATTR_CMN_FSID) size += sizeof(fsid_t);
+		if (a & ATTR_CMN_OBJTYPE) size += sizeof(fsobj_type_t);
+		if (a & ATTR_CMN_OBJTAG) size += sizeof(fsobj_tag_t);
+		if (a & ATTR_CMN_OBJID) size += sizeof(fsobj_id_t);
+		if (a & ATTR_CMN_OBJPERMANENTID) size += sizeof(fsobj_id_t);
+		if (a & ATTR_CMN_PAROBJID) size += sizeof(fsobj_id_t);
+		if (a & ATTR_CMN_SCRIPT) size += sizeof(text_encoding_t);
+		if (a & ATTR_CMN_CRTIME) size += sizeof_timespec;
+		if (a & ATTR_CMN_MODTIME) size += sizeof_timespec;
+		if (a & ATTR_CMN_CHGTIME) size += sizeof_timespec;
+		if (a & ATTR_CMN_ACCTIME) size += sizeof_timespec;
+		if (a & ATTR_CMN_BKUPTIME) size += sizeof_timespec;
+		if (a & ATTR_CMN_FNDRINFO) size += 32 * sizeof(u_int8_t);
+		if (a & ATTR_CMN_OWNERID) size += sizeof(uid_t);
+		if (a & ATTR_CMN_GRPID) size += sizeof(gid_t);
+		if (a & ATTR_CMN_ACCESSMASK) size += sizeof(u_int32_t);
+		if (a & ATTR_CMN_FLAGS) size += sizeof(u_int32_t);
+		if (a & ATTR_CMN_USERACCESS) size += sizeof(u_int32_t);
+		if (a & ATTR_CMN_FILEID) size += sizeof(u_int64_t);
+		if (a & ATTR_CMN_PARENTID) size += sizeof(u_int64_t);
+	}
+	if ((a = attrlist->dirattr) != 0) {
+		if (a & ATTR_DIR_LINKCOUNT) size += sizeof(u_int32_t);
+		if (a & ATTR_DIR_ENTRYCOUNT) size += sizeof(u_int32_t);
+		if (a & ATTR_DIR_MOUNTSTATUS) size += sizeof(u_int32_t);
+	}
+	if ((a = attrlist->fileattr) != 0) {
+		if (a & ATTR_FILE_LINKCOUNT) size += sizeof(u_int32_t);
+		if (a & ATTR_FILE_TOTALSIZE) size += sizeof(off_t);
+		if (a & ATTR_FILE_ALLOCSIZE) size += sizeof(off_t);
+		if (a & ATTR_FILE_IOBLOCKSIZE) size += sizeof(u_int32_t);
+		if (a & ATTR_FILE_CLUMPSIZE) size += sizeof(u_int32_t);
+		if (a & ATTR_FILE_DEVTYPE) size += sizeof(u_int32_t);
+		if (a & ATTR_FILE_DATALENGTH) size += sizeof(off_t);
+		if (a & ATTR_FILE_DATAALLOCSIZE) size += sizeof(off_t);
+		if (a & ATTR_FILE_RSRCLENGTH) size += sizeof(off_t);
+		if (a & ATTR_FILE_RSRCALLOCSIZE) size += sizeof(off_t);
+	}
+
+	return (size);
+}
+
+#define KAUTH_DIR_WRITE_RIGHTS		(KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | \
+                                	 KAUTH_VNODE_ADD_SUBDIRECTORY | \
+                                	 KAUTH_VNODE_DELETE_CHILD)
+
+#define KAUTH_DIR_READ_RIGHTS		(KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY)
+
+#define KAUTH_DIR_EXECUTE_RIGHTS	(KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH)
+
+#define KAUTH_FILE_WRITE_RIGHTS		(KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA)
+
+#define KAUTH_FILE_READRIGHTS		(KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA)
+
+#define KAUTH_FILE_EXECUTE_RIGHTS	(KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE)
+
+
+/*
+ * Compute the same [expensive] user_access value as getattrlist does
+ */
+static u_int32_t
+hfs_real_user_access(vnode_t vp, vfs_context_t ctx)
+{
+	u_int32_t user_access = 0;
+
+	if (vnode_isdir(vp)) {
+		if (vnode_authorize(vp, NULLVP, KAUTH_DIR_WRITE_RIGHTS, ctx) == 0)
+			user_access |= W_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_DIR_READ_RIGHTS, ctx) == 0)
+			user_access |= R_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_DIR_EXECUTE_RIGHTS, ctx) == 0)
+			user_access |= X_OK;
+	} else {
+		if (vnode_authorize(vp, NULLVP, KAUTH_FILE_WRITE_RIGHTS, ctx) == 0)
+			user_access |= W_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_FILE_READRIGHTS, ctx) == 0)
+			user_access |= R_OK;
+		if (vnode_authorize(vp, NULLVP, KAUTH_FILE_EXECUTE_RIGHTS, ctx) == 0)
+			user_access |= X_OK;
+	}
+	return (user_access);
+}
+		
+
+u_int32_t
+DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode,
+		struct mount *mp, kauth_cred_t cred, __unused struct proc *p)
+{
+	u_int32_t permissions;
+
+	if (obj_uid == UNKNOWNUID)
+		obj_uid = kauth_cred_getuid(cred);
+
+	/* User id 0 (root) always gets access. */
+	if (!suser(cred, NULL)) {
+		permissions = R_OK | W_OK | X_OK;
+		goto Exit;
+	};
+
+	/* Otherwise, check the owner. */
+	if (hfs_owner_rights(VFSTOHFS(mp), obj_uid, cred, NULL, false) == 0) {
+		permissions = ((u_int32_t)obj_mode & S_IRWXU) >> 6;
+		goto Exit;
+	}
+
+	/* Otherwise, check the groups. */
+	if (! (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)) {
+		int is_member;
+
+		if (kauth_cred_ismember_gid(cred, obj_gid, &is_member) == 0 && is_member) {
+			permissions = ((u_int32_t)obj_mode & S_IRWXG) >> 3;
+			goto Exit;
+		}
+	}
+
+	/* Otherwise, settle for 'others' access. */
+	permissions = (u_int32_t)obj_mode & S_IRWXO;
+
+Exit:
+	return (permissions);    
+}
+
+
+/*
+ * ===========================================================================
+ * Support functions for filling up a vnode_attr structure based on attributes
+ * requested.
+ * ===========================================================================
+ */
+void
+get_vattr_data_for_attrs(struct attrlist *alp, struct vnode_attr *vap,
+    struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc *descp,
+    struct cat_attr *atrp, struct cat_fork *datafork, struct cat_fork *rsrcfork,
+    vfs_context_t ctx)
+{
+	if (alp->commonattr || alp->forkattr) {
+		vattr_data_for_common_attrs(alp, vap, hfsmp, vp, descp, atrp,
+		    ctx);
+	}
+
+	if (alp->dirattr && S_ISDIR(atrp->ca_mode))
+		vattr_data_for_dir_attrs(alp, vap, hfsmp, vp, descp, atrp);
+
+	if (alp->fileattr && !S_ISDIR(atrp->ca_mode)) {
+		vattr_data_for_file_attrs(alp, vap, hfsmp, atrp, datafork,
+		    rsrcfork, vp);
+	}
+}
+
+static void
+copy_name_attr(struct vnode_attr *vap, struct vnode *vp, const u_int8_t *name,
+    int namelen)
+{
+	char *mpname;
+	size_t mpnamelen;
+	u_int32_t attrlength;
+	u_int8_t empty = 0;
+
+	/* A cnode's name may be incorrect for the root of a mounted
+	 * filesystem (it can be mounted on a different directory name
+	 * than the name of the volume, such as "blah-1").  So for the
+	 * root directory, it's best to return the last element of the
+	 location where the volume's mounted:
+	 */
+	if ((vp != NULL) && vnode_isvroot(vp) &&
+	    (mpname = mountpointname(vnode_mount(vp)))) {
+		mpnamelen = strlen(mpname);
+
+		/* Trim off any trailing slashes: */
+		while ((mpnamelen > 0) && (mpname[mpnamelen-1] == '/'))
+			--mpnamelen;
+
+		/* If there's anything left, use it instead of the volume's name */
+		if (mpnamelen > 0) {
+			name = (u_int8_t *)mpname;
+			namelen = mpnamelen;
+		}
+	}
+
+	if (name == NULL) {
+		name = &empty;
+		namelen = 0;
+	}
+
+	attrlength = namelen + 1;
+	(void) strncpy((char *)vap->va_name, (const char *) name, attrlength);
+	/*
+	 * round upto 8 and zero out the rounded up bytes.
+	 */
+	attrlength = min(kHFSPlusMaxFileNameBytes, ((attrlength + 7) & ~0x07));
+	bzero(vap->va_name + attrlength, kHFSPlusMaxFileNameBytes - attrlength);
+}
+
+static void
+vattr_data_for_common_attrs( struct attrlist *alp, struct vnode_attr *vap,
+    struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc *cdp,
+    struct cat_attr *cap, vfs_context_t ctx)
+{
+	attrgroup_t attr = alp->commonattr;
+	struct mount *mp = HFSTOVFS(hfsmp);
+	uid_t cuid = 1;
+	int isroot = 0;
+
+	if (attr & (ATTR_CMN_OWNERID | ATTR_CMN_GRPID)) {
+		cuid = kauth_cred_getuid(vfs_context_ucred(ctx));
+		isroot = cuid == 0;
+	}
+
+	if (ATTR_CMN_NAME & attr) {
+		if (vap->va_name) {
+			copy_name_attr(vap, vp, cdp->cd_nameptr,
+			    cdp->cd_namelen);
+			VATTR_SET_SUPPORTED(vap, va_name);
+		} else {
+			VATTR_CLEAR_SUPPORTED(vap, va_name);
+		}
+	}
+
+	if (ATTR_CMN_DEVID & attr) {
+		vap->va_devid = hfsmp->hfs_raw_dev;
+		VATTR_SET_SUPPORTED(vap, va_devid);
+	}
+
+	if (ATTR_CMN_FSID & attr) {
+		vap->va_fsid64.val[0] = hfsmp->hfs_raw_dev;
+		vap->va_fsid64.val[1] = vfs_typenum(mp);
+		VATTR_SET_SUPPORTED(vap, va_fsid64);
+	}
+	/*
+	 * We always provide the objtype even if not asked because VFS helper
+	 * functions depend on knowing the object's type.
+	 */
+	vap->va_objtype = IFTOVT(cap->ca_mode);
+	VATTR_SET_SUPPORTED(vap, va_objtype);
+
+	if (ATTR_CMN_OBJTAG & attr) {
+		vap->va_objtag = VT_HFS;
+		VATTR_SET_SUPPORTED(vap, va_objtag);
+	}
+	/*
+	 * Exporting file IDs from HFS Plus:
+	 *
+	 * For "normal" files the c_fileid is the same value as the
+	 * c_cnid.  But for hard link files, they are different - the
+	 * c_cnid belongs to the active directory entry (ie the link)
+	 * and the c_fileid is for the actual inode (ie the data file).
+	 *
+	 * The stat call (getattr) will always return the c_fileid
+	 * and Carbon APIs, which are hardlink-ignorant, will always
+	 * receive the c_cnid (from getattrlist).
+	 *
+	 * Forkattrs are now repurposed for Common Extended Attributes.
+	 */
+	if ((ATTR_CMN_OBJID & attr) || (ATTR_CMN_OBJPERMANENTID & attr) ||
+	    alp->forkattr & ATTR_CMNEXT_LINKID) {
+		vap->va_linkid = cdp->cd_cnid;
+		VATTR_SET_SUPPORTED(vap, va_linkid);
+	}
+
+	if (ATTR_CMN_PAROBJID & attr) {
+		vap->va_parentid = cdp->cd_parentcnid;
+		VATTR_SET_SUPPORTED(vap, va_parentid);
+	}
+
+	if (ATTR_CMN_SCRIPT & attr) {
+		vap->va_encoding = cdp->cd_encoding;
+		VATTR_SET_SUPPORTED(vap, va_encoding);
+	}
+
+	if (ATTR_CMN_CRTIME & attr) {
+		vap->va_create_time.tv_sec = cap->ca_itime;
+		vap->va_create_time.tv_nsec = 0;
+		VATTR_SET_SUPPORTED(vap, va_create_time);
+	}
+
+	if (ATTR_CMN_MODTIME & attr) {
+		vap->va_modify_time.tv_sec = cap->ca_mtime;
+		vap->va_modify_time.tv_nsec = 0;
+		VATTR_SET_SUPPORTED(vap, va_modify_time);
+	}
+
+	if (ATTR_CMN_CHGTIME & attr) {
+		vap->va_change_time.tv_sec = cap->ca_ctime;
+		vap->va_change_time.tv_nsec = 0;
+		VATTR_SET_SUPPORTED(vap, va_change_time);
+	}
+
+	if (ATTR_CMN_ACCTIME & attr) {
+		vap->va_access_time.tv_sec = cap->ca_atime;
+		vap->va_access_time.tv_nsec = 0;
+		VATTR_SET_SUPPORTED(vap, va_access_time);
+	}
+
+	if (ATTR_CMN_BKUPTIME & attr) {
+		vap->va_backup_time.tv_sec = cap->ca_btime;
+		vap->va_backup_time.tv_nsec = 0;
+		VATTR_SET_SUPPORTED(vap, va_backup_time);
+	}
+
+	if (ATTR_CMN_FNDRINFO & attr) {
+		u_int8_t *finfo = NULL;
+
+		bcopy(&cap->ca_finderinfo, &vap->va_finderinfo[0],
+		    sizeof(u_int8_t) * 32);
+		finfo = (u_int8_t*)(&vap->va_finderinfo[0]);
+
+		/* Don't expose a symlink's private type/creator. */
+		if (S_ISLNK(cap->ca_mode)) {
+			struct FndrFileInfo *fip;
+
+			fip = (struct FndrFileInfo *)finfo;
+			fip->fdType = 0;
+			fip->fdCreator = 0;
+		}
+
+		/* advance 16 bytes into the attrbuf */
+		finfo = finfo + 16;
+
+		/* also don't expose the date_added or write_gen_counter fields */
+		if (S_ISREG(cap->ca_mode) || S_ISLNK(cap->ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo =
+			    (struct FndrExtendedFileInfo *)finfo;
+			extinfo->document_id = 0;
+			extinfo->date_added = 0;
+			extinfo->write_gen_counter = 0;
+		} else if (S_ISDIR(cap->ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo =
+			    (struct FndrExtendedDirInfo *)finfo;
+			extinfo->document_id = 0;
+			extinfo->date_added = 0;
+			extinfo->write_gen_counter = 0;
+		}
+
+		VATTR_SET_SUPPORTED(vap, va_finderinfo);
+	}
+
+	if (ATTR_CMN_OWNERID & attr) {
+		uid_t nuid = cap->ca_uid;
+
+		if (!isroot) {
+			if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS)
+				nuid = cuid;
+			else if (nuid == UNKNOWNUID)
+				nuid = cuid;
+		}
+
+		vap->va_uid = nuid;
+		VATTR_SET_SUPPORTED(vap, va_uid);
+	}
+
+	if (ATTR_CMN_GRPID & attr) {
+		gid_t ngid = cap->ca_gid;
+
+		if (!isroot) {
+			gid_t cgid = kauth_cred_getgid(vfs_context_ucred(ctx));
+			if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS)
+				ngid = cgid;
+			else if (ngid == UNKNOWNUID)
+				ngid = cgid;
+		}
+
+		vap->va_gid = ngid;
+		VATTR_SET_SUPPORTED(vap, va_gid);
+	}
+
+	if (ATTR_CMN_ACCESSMASK & attr) {
+		uint32_t nmode;
+		/*
+		 * [2856576]  Since we are dynamically changing the owner, also
+		 * effectively turn off the set-user-id and set-group-id bits,
+		 * just like chmod(2) would when changing ownership.  This prevents
+		 * a security hole where set-user-id programs run as whoever is
+		 * logged on (or root if nobody is logged in yet!)
+		 */
+		nmode = (cap->ca_uid == UNKNOWNUID) ?
+		    cap->ca_mode & ~(S_ISUID | S_ISGID) : cap->ca_mode;
+
+		vap->va_mode = nmode;
+		VATTR_SET_SUPPORTED(vap, va_mode);
+	}
+
+	if (ATTR_CMN_FLAGS & attr) {
+		vap->va_flags = cap->ca_flags;
+		VATTR_SET_SUPPORTED(vap, va_flags);
+	}
+
+	if (ATTR_CMN_GEN_COUNT & attr) {
+		vap->va_write_gencount = hfs_get_gencount_from_blob(
+		    (const uint8_t *)cap->ca_finderinfo, cap->ca_mode);
+		VATTR_SET_SUPPORTED(vap, va_write_gencount);
+	}
+
+	if (ATTR_CMN_DOCUMENT_ID & attr) {
+		vap->va_document_id = hfs_get_document_id_from_blob(
+		    (const uint8_t *)cap->ca_finderinfo, cap->ca_mode);
+		VATTR_SET_SUPPORTED(vap, va_document_id);
+	}
+
+	if (ATTR_CMN_USERACCESS & attr) {
+		u_int32_t user_access;
+
+		/* Take the long path when we have an ACL */
+		if ((vp != NULLVP) && (cap->ca_recflags & kHFSHasSecurityMask)) {
+			user_access = hfs_real_user_access(vp, ctx);
+		} else {
+			user_access = DerivePermissionSummary(cap->ca_uid, cap->ca_gid,
+			                  cap->ca_mode, mp, vfs_context_ucred(ctx), 0);
+		}
+		/* Also consider READ-ONLY file system. */
+		if (vfs_flags(mp) & MNT_RDONLY) {
+			user_access &= ~W_OK;
+		}
+		/* Locked objects are not writable either */
+		if ((cap->ca_flags & UF_IMMUTABLE) && (vfs_context_suser(ctx) != 0))
+			user_access &= ~W_OK;
+		if ((cap->ca_flags & SF_IMMUTABLE) && (vfs_context_suser(ctx) == 0))
+			user_access &= ~W_OK;
+
+		vap->va_user_access = user_access;
+		VATTR_SET_SUPPORTED(vap, va_user_access);
+	}
+
+	/*
+	 * Right now the best we can do is tell if we *don't* have extended
+	 * security (like hfs_vnop_getattr).
+	 */
+	if (ATTR_CMN_EXTENDED_SECURITY & attr) {
+		if (!(cap->ca_recflags & kHFSHasSecurityMask)) {
+			vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE;
+			VATTR_SET_SUPPORTED(vap, va_acl);
+		}
+	}
+
+	if (ATTR_CMN_FILEID & attr) {
+		vap->va_fileid = cap->ca_fileid;
+		VATTR_SET_SUPPORTED(vap, va_fileid);
+	}
+
+	if (ATTR_CMN_PARENTID & attr) {
+		vap->va_parentid = cdp->cd_parentcnid;
+		VATTR_SET_SUPPORTED(vap, va_parentid);
+	}
+
+	if (ATTR_CMN_ADDEDTIME & attr) {
+		if (cap->ca_recflags & kHFSHasDateAddedMask) {
+			vap->va_addedtime.tv_sec = hfs_get_dateadded_from_blob(
+			    (const uint8_t *)cap->ca_finderinfo, cap->ca_mode);
+			vap->va_addedtime.tv_nsec = 0;
+			VATTR_SET_SUPPORTED(vap, va_addedtime);
+		}
+	}
+}
+
+static void
+vattr_data_for_dir_attrs(struct attrlist *alp, struct vnode_attr *vap,
+    struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * descp,
+    struct cat_attr * cattrp)
+{
+	attrgroup_t attr = alp->dirattr;
+	u_int32_t entries;
+
+	/*
+	 * The DIR_LINKCOUNT is the count of real directory hard links.
+	 * (i.e. its not the sum of the implied "." and ".." references
+	 *  typically used in stat's st_nlink field)
+	 */
+	if (ATTR_DIR_LINKCOUNT & attr) {
+			vap->va_dirlinkcount = cattrp->ca_linkcount;
+			VATTR_SET_SUPPORTED(vap, va_dirlinkcount);
+	}
+	if (ATTR_DIR_ENTRYCOUNT & attr) {
+		entries = cattrp->ca_entries;
+
+		if (descp->cd_parentcnid == kHFSRootParentID) {
+			if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0)
+				--entries;	    /* hide private dir */
+			if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0)
+				--entries;	    /* hide private dir */
+			if (hfsmp->jnl ||
+			    ((hfsmp->vcbAtrb & kHFSVolumeJournaledMask) &&
+			     (hfsmp->hfs_flags & HFS_READ_ONLY)))
+				entries -= 2;	/* hide the journal files */
+		}
+
+		vap->va_nchildren = entries;
+		VATTR_SET_SUPPORTED(vap, va_nchildren);
+	}
+
+	if (ATTR_DIR_MOUNTSTATUS & attr) {
+		/*
+		 * There is not vnode_attr for mount point status.
+		 * XXX. Should there be ?
+		 */
+		u_int32_t mstatus = 0;
+
+		if (vp != NULL && vnode_mountedhere(vp) != NULL)
+			mstatus = DIR_MNTSTATUS_MNTPOINT;
+	}
+}
+
+static void
+vattr_data_for_file_attrs(struct attrlist *alp, struct vnode_attr *vap,
+    struct hfsmount *hfsmp, struct cat_attr *cattrp, struct cat_fork *datafork,
+    struct cat_fork *rsrcfork, struct vnode *vp)
+{
+#if !HFS_COMPRESSION
+#pragma unused(vp)
+#endif
+	attrgroup_t attr = alp->fileattr;
+	off_t da_size, rsrc_len, rsrc_alloc;
+	u_int32_t allocblksize;
+
+	allocblksize = HFSTOVCB(hfsmp)->blockSize;
+
+	off_t datasize = datafork->cf_size;
+	off_t totalsize = datasize + rsrcfork->cf_size;
+#if HFS_COMPRESSION
+	int handle_compressed;
+	handle_compressed =  (cattrp->ca_flags & UF_COMPRESSED);// && hfs_file_is_compressed(VTOC(vp), 1);
+
+	if (handle_compressed) {
+		if (attr & (ATTR_FILE_DATALENGTH|ATTR_FILE_TOTALSIZE)) {
+			if ( 0 == hfs_uncompressed_size_of_compressed_file(hfsmp, vp, cattrp->ca_fileid, &datasize, 1) ) { /* 1 == don't take the cnode lock */
+				/* total size of a compressed file is just the data size */
+				totalsize = datasize;
+			}
+		}
+	}
+#endif
+
+	if (ATTR_FILE_LINKCOUNT & attr) {
+		vap->va_nlink = cattrp->ca_linkcount;
+		VATTR_SET_SUPPORTED(vap, va_nlink);
+	}
+	if (ATTR_FILE_TOTALSIZE & attr) {
+		VATTR_RETURN(vap, va_total_size, totalsize);
+	}
+	if (ATTR_FILE_ALLOCSIZE & attr) {
+		 VATTR_RETURN(vap, va_total_alloc,
+			    (off_t)cattrp->ca_blocks * (off_t)allocblksize );
+	}
+	if (ATTR_FILE_IOBLOCKSIZE & attr) {
+		VATTR_RETURN(vap, va_iosize,  hfsmp->hfs_logBlockSize);
+	}
+
+	/* ATTR_FILE_CLUMPSIZE is obsolete */
+
+	if (ATTR_FILE_DEVTYPE & attr) {
+		dev_t dev = 0;
+
+		if (S_ISBLK(cattrp->ca_mode) || S_ISCHR(cattrp->ca_mode))
+			dev = (u_int32_t)cattrp->ca_rdev;
+
+		VATTR_RETURN(vap, va_rdev, dev);
+	}
+
+	if (ATTR_FILE_DATALENGTH & attr) {
+		VATTR_RETURN(vap, va_data_size, datasize);
+	}
+#if HFS_COMPRESSION
+	/* fake the data fork size on a decmpfs compressed file to reflect the
+	 * uncompressed size. This ensures proper reading and copying of these
+	 * files.
+	 * NOTE: we may need to get the vnode here because the vnode parameter
+	 * passed by hfs_vnop_readdirattr() may be null.
+	 */
+
+	if (handle_compressed) {
+		da_size = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize;
+		rsrc_len = 0;
+		rsrc_alloc = 0;
+	}
+	else
+#endif
+	{
+		da_size = (off_t)datafork->cf_blocks * (off_t)allocblksize;
+		rsrc_len = rsrcfork->cf_size;
+		rsrc_alloc = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize;
+	}
+
+	if (ATTR_FILE_DATAALLOCSIZE & attr) {
+		VATTR_RETURN(vap, va_data_alloc, da_size);
+	}
+
+	if (ATTR_FILE_RSRCLENGTH & attr) {
+		VATTR_RETURN(vap, va_rsrc_length, rsrc_len);
+	}
+
+	if (ATTR_FILE_RSRCALLOCSIZE & attr) {
+		VATTR_RETURN(vap, va_rsrc_alloc, rsrc_alloc);
+	}
+}
diff --git a/core/hfs_attrlist.h b/core/hfs_attrlist.h
new file mode 100644
index 0000000..b9ceb09
--- /dev/null
+++ b/core/hfs_attrlist.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _HFS_ATTRLIST_H_
+#define _HFS_ATTRLIST_H_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <sys/attr.h>
+#include <sys/vnode.h>
+
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+
+
+struct attrblock {
+	struct attrlist * ab_attrlist;
+	void **		  ab_attrbufpp;
+	void **		  ab_varbufpp;
+	int		  ab_flags;
+	int		  ab_blocksize;
+	vfs_context_t	  ab_context;
+};
+
+/* 
+ * The following define the attributes that HFS supports:
+ */
+
+#define HFS_ATTR_CMN_VALID				\
+	(ATTR_CMN_NAME | ATTR_CMN_DEVID	|		\
+	 ATTR_CMN_FSID | ATTR_CMN_OBJTYPE |		\
+	 ATTR_CMN_OBJTAG | ATTR_CMN_OBJID |		\
+	 ATTR_CMN_OBJPERMANENTID | ATTR_CMN_PAROBJID |	\
+	 ATTR_CMN_SCRIPT | ATTR_CMN_CRTIME |		\
+	 ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME |		\
+	 ATTR_CMN_ACCTIME | ATTR_CMN_BKUPTIME |		\
+	 ATTR_CMN_FNDRINFO |ATTR_CMN_OWNERID |		\
+	 ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK |		\
+	 ATTR_CMN_FLAGS | ATTR_CMN_USERACCESS |		\
+	 ATTR_CMN_FILEID | ATTR_CMN_PARENTID )
+
+#define HFS_ATTR_CMN_SEARCH_VALID	\
+	(ATTR_CMN_NAME | ATTR_CMN_OBJID |	\
+	 ATTR_CMN_PAROBJID | ATTR_CMN_CRTIME |	\
+	 ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME | 	\
+	 ATTR_CMN_ACCTIME | ATTR_CMN_BKUPTIME |	\
+	 ATTR_CMN_FNDRINFO | ATTR_CMN_OWNERID |	\
+	 ATTR_CMN_GRPID	| ATTR_CMN_ACCESSMASK | \
+	 ATTR_CMN_FILEID | ATTR_CMN_PARENTID ) 
+
+
+
+#define HFS_ATTR_DIR_VALID				\
+	(ATTR_DIR_LINKCOUNT | ATTR_DIR_ENTRYCOUNT | ATTR_DIR_MOUNTSTATUS)
+
+#define HFS_ATTR_DIR_SEARCH_VALID	\
+	(ATTR_DIR_ENTRYCOUNT)
+
+#define HFS_ATTR_FILE_VALID				  \
+	(ATTR_FILE_LINKCOUNT |ATTR_FILE_TOTALSIZE |	  \
+	 ATTR_FILE_ALLOCSIZE | ATTR_FILE_IOBLOCKSIZE |	  \
+	 ATTR_FILE_CLUMPSIZE | ATTR_FILE_DEVTYPE |	  \
+	 ATTR_FILE_DATALENGTH | ATTR_FILE_DATAALLOCSIZE | \
+	 ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)
+
+#define HFS_ATTR_FILE_SEARCH_VALID		\
+	(ATTR_FILE_DATALENGTH | ATTR_FILE_DATAALLOCSIZE |	\
+	 ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE )
+
+extern int hfs_attrblksize(struct attrlist *attrlist);
+
+extern u_int32_t DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
+			mode_t obj_mode, struct mount *mp,
+			kauth_cred_t cred, struct proc *p);
+
+extern void hfs_packattrblk(struct attrblock *abp, struct hfsmount *hfsmp,
+		struct vnode *vp, struct cat_desc *descp, struct cat_attr *attrp,
+		struct cat_fork *datafork, struct cat_fork *rsrcfork, struct vfs_context *ctx);
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* ! _HFS_ATTRLIST_H_ */
diff --git a/core/hfs_btreeio.c b/core/hfs_btreeio.c
new file mode 100644
index 0000000..ec2072e
--- /dev/null
+++ b/core/hfs_btreeio.c
@@ -0,0 +1,948 @@
+/*
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+
+#include "hfs.h"
+#include "hfs_cnode.h"
+#include "hfs_dbg.h"
+#include "hfs_endian.h"
+#include "hfs_btreeio.h"
+
+#include "FileMgrInternal.h"
+#include "BTreesPrivate.h"
+
+/* From bsd/vfs/vfs_bio.c */
+extern int bdwrite_internal(struct buf *, int);
+
+static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount);
+static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
+
+void btree_swap_node(struct buf *bp, __unused void *arg);
+
+/* 
+ * Return btree node size for given vnode.
+ *
+ * Returns: 
+ * 	For btree vnode, returns btree node size. 
+ * 	For non-btree vnodes, returns 0.
+ */
+u_int16_t get_btree_nodesize(struct vnode *vp)
+{
+	BTreeControlBlockPtr btree;
+	u_int16_t node_size = 0; 
+
+	if (vnode_issystem(vp)) {
+		btree = (BTreeControlBlockPtr) VTOF(vp)->fcbBTCBPtr;
+		if (btree) {
+			node_size = btree->nodeSize;
+		}
+	}
+
+	return node_size;
+}
+
+OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
+{
+	BTreeControlBlockPtr	bTreePtr;
+	
+	hfs_assert(vp != NULL);
+	hfs_assert(blockSize >= kMinNodeSize);
+    if (blockSize > MAXBSIZE )
+        return (fsBTBadNodeSize);
+
+	bTreePtr = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
+	bTreePtr->nodeSize = blockSize;
+	
+    return (E_NONE);
+}
+
+
+OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
+{
+    OSStatus	 retval = E_NONE;
+    struct buf   *bp = NULL;
+	u_int8_t     allow_empty_node;	  
+
+	/* If the btree block is being read using hint, it is 
+	 * fine for the swap code to find zeroed out nodes. 
+	 */
+	if (options & kGetBlockHint) {
+			allow_empty_node = true;
+	} else {
+			allow_empty_node = false;
+	}
+
+    if (options & kGetEmptyBlock) {
+        daddr64_t blkno;
+        off_t offset;
+
+        offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize;
+        bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META);
+		if (bp && !hfs_vnop_blockmap(&(struct vnop_blockmap_args){
+											.a_vp 		= vp,
+											.a_foffset 	= offset,
+											.a_size 	= block->blockSize,
+											.a_bpn 		= &blkno
+										})) {
+            buf_setblkno(bp, blkno);
+        }
+    } else {
+        retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp);
+    }
+    if (bp == NULL)
+        retval = -1;	//XXX need better error
+
+    if (retval == E_NONE) {
+        block->blockHeader = bp;
+        block->buffer = (char *)buf_dataptr(bp);
+    	block->blockNum = buf_lblkno(bp);
+        block->blockReadFromDisk = (buf_fromcache(bp) == 0);	/* not found in cache ==> came from disk */
+
+		// XXXdbg 
+		block->isModified = 0;
+
+		/* Check and endian swap B-Tree node (only if it's a valid block) */
+		if (!(options & kGetEmptyBlock)) {
+
+			/* This happens when we first open the b-tree, we might not have all the node data on hand */
+			if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
+					(((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
+					(SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
+
+				/*
+				 * Don't swap the node descriptor, record offsets, or other records.
+				 * This record will be invalidated and re-read with the correct node
+				 * size once the B-tree control block is set up with the node size
+				 * from the header record.
+				 */
+				retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
+
+			} else {
+				/*
+				 * In this case, we have enough data in-hand to do basic validation
+				 * on the B-Tree node.
+				 */
+				if (block->blockReadFromDisk) {
+					/*
+					 * The node was just read from disk, so always swap/check it.
+					 * This is necessary on big endian since the test below won't trigger.
+					 */
+					retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
+				} 
+				else {
+					/*
+					 * Block wasn't read from disk; it was found in the cache.  
+					 */
+					if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
+						/*
+						 * The node was left in the cache in non-native order, so swap it.
+						 * This only happens on little endian, after the node is written
+						 * back to disk.
+						 */
+						retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
+					}
+					else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) {
+						/*
+						 * The node was in-cache in native-endianness.  We don't need to do 
+						 * anything here, because the node is ready to use.  Set retval == 0.
+						 */
+						retval = 0;
+					}
+					/*
+					 * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer, 
+					 * it doesn't necessarily mean that this is a bad node.  Zeroed nodes that are
+					 * marked as unused in the b-tree map node would be OK and not have valid content.
+					 */
+				}
+			}
+
+			/*
+			 * If we got an error, then the node is only partially swapped.
+			 * We mark the buffer invalid so that the next attempt to get the
+			 * node will read it and attempt to swap again, and will notice
+			 * the error again.  If we didn't do this, the next attempt to get
+			 * the node might use the partially swapped node as-is.
+			 */
+			if (retval)
+				buf_markinvalid(bp);
+		}
+	}
+    
+    if (retval) {
+    	if (bp)
+			buf_brelse(bp);
+        block->blockHeader = NULL;
+        block->buffer = NULL;
+    }
+
+    return (retval);
+}
+
+
+void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
+{
+	struct hfsmount	*hfsmp = VTOHFS(vp);
+    struct buf *bp = NULL;
+
+	if (hfsmp->jnl == NULL) {
+		return;
+	}
+	
+    bp = (struct buf *) blockPtr->blockHeader;
+    if (bp == NULL) {
+		panic("hfs: ModifyBlockStart: null bp  for blockdescptr %p?!?\n", blockPtr);
+		return;
+    }
+
+	journal_modify_block_start(hfsmp->jnl, bp);
+	blockPtr->isModified = 1;
+}
+
+void
+btree_swap_node(struct buf *bp, __unused void *arg)
+{
+    //	struct hfsmount *hfsmp = (struct hfsmount *)arg;
+	int retval;
+    struct vnode *vp = buf_vnode(bp);
+    BlockDescriptor block;
+				    
+    /* Prepare the block pointer */
+    block.blockHeader = bp;
+    block.buffer = (char *)buf_dataptr(bp);
+    block.blockNum = buf_lblkno(bp);
+    /* not found in cache ==> came from disk */
+    block.blockReadFromDisk = (buf_fromcache(bp) == 0);
+    block.blockSize = buf_count(bp);
+
+    /* Swap the data now that this node is ready to go to disk.
+     * We allow swapping of zeroed out nodes here because we might
+     * be writing node whose last record just got deleted.
+     */
+    retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
+    if (retval)
+    	panic("hfs: btree_swap_node: about to write corrupt node!\n");
+}
+
+
+static int
+btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
+{
+    return journal_modify_block_end(hfsmp->jnl, bp, btree_swap_node, hfsmp);
+}
+
+
+OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
+{
+    struct hfsmount	*hfsmp = VTOHFS(vp);
+    OSStatus	retval = E_NONE;
+    struct buf *bp = NULL;
+
+    bp = (struct buf *) blockPtr->blockHeader;
+
+    if (bp == NULL) {
+        retval = -1;
+        goto exit;
+    }
+
+    if (options & kTrashBlock) {
+                buf_markinvalid(bp);
+
+		if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) {
+			journal_kill_block(hfsmp->jnl, bp);
+		} else {
+			buf_brelse(bp);	/* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
+		}
+		
+		/* Don't let anyone else try to use this bp, it's been consumed */
+		blockPtr->blockHeader = NULL;
+		
+    } else {
+        if (options & kForceWriteBlock) {
+			if (hfsmp->jnl) {
+				if (blockPtr->isModified == 0) {
+					panic("hfs: releaseblock: modified is 0 but forcewrite set! bp %p\n", bp);
+				}
+
+				retval = btree_journal_modify_block_end(hfsmp, bp);
+				blockPtr->isModified = 0;
+			} else {
+				retval = VNOP_BWRITE(bp);
+			}
+			
+			/* Don't let anyone else try to use this bp, it's been consumed */
+			blockPtr->blockHeader = NULL;
+			
+        } else if (options & kMarkBlockDirty) {
+			struct timeval tv;
+			microuptime(&tv);
+            if ((options & kLockTransaction) && hfsmp->jnl == NULL) {
+                /*
+                 *
+                 * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move
+                 * the buffer onto the LOCKED free list.  This is necessary, otherwise
+                 * getnewbuf() would try to reclaim the buffers using buf_bawrite, which
+                 * isn't going to work.
+                 *
+                 */
+                /* Don't hog all the buffers... */
+                if (count_lock_queue() > kMaxLockedMetaBuffers) {
+                     hfs_btsync(vp, HFS_SYNCTRANS);
+                     /* Rollback sync time to cause a sync on lock release... */
+                     (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
+                }
+		buf_setflags(bp, B_LOCKED);
+            }
+
+            /* 
+             * Delay-write this block.
+             * If the maximum delayed buffers has been exceeded then
+             * free up some buffers and fall back to an asynchronous write.
+             */
+			if (hfsmp->jnl) {
+				if (blockPtr->isModified == 0) {
+					panic("hfs: releaseblock: modified is 0 but markdirty set! bp %p\n", bp);
+				}
+				retval = btree_journal_modify_block_end(hfsmp, bp);
+				blockPtr->isModified = 0;
+			} else if (bdwrite_internal(bp, 1) != 0) {
+                hfs_btsync(vp, 0);
+                /* Rollback sync time to cause a sync on lock release... */
+                (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1));
+
+                buf_clearflags(bp, B_LOCKED);
+                buf_bawrite(bp);
+            }
+            
+            /* Don't let anyone else try to use this bp, it's been consumed */
+			blockPtr->blockHeader = NULL;
+			
+        } else {
+			// check if we had previously called journal_modify_block_start() 
+			// on this block and if so, abort it (which will call buf_brelse()).
+			if (hfsmp->jnl && blockPtr->isModified) {
+				// XXXdbg - I don't want to call modify_block_abort()
+				//          because I think it may be screwing up the
+				//          journal and blowing away a block that has
+				//          valid data in it.
+				//   
+				//    journal_modify_block_abort(hfsmp->jnl, bp);
+				//panic("hfs: releaseblock called for 0x%x but mod_block_start previously called.\n", bp);
+				btree_journal_modify_block_end(hfsmp, bp);
+				blockPtr->isModified = 0;
+			} else {
+				buf_brelse(bp);	/* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */
+			}
+			
+			/* Don't let anyone else try to use this bp, it's been consumed */
+			blockPtr->blockHeader = NULL;
+        }
+    }
+
+exit:
+    return (retval);
+}
+
+
+OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
+{
+#pragma unused (maxEOF)
+
+	OSStatus	retval = 0, ret = 0;
+	int64_t		actualBytesAdded, origSize;
+	u_int64_t	bytesToAdd;
+	u_int32_t	startAllocation;
+	u_int32_t	fileblocks;
+	BTreeInfoRec 	btInfo;
+	ExtendedVCB	*vcb;
+	FCB		*filePtr;
+    	struct proc 	*p = NULL;
+	int64_t 	trim = 0;
+	int  		lockflags = 0;
+
+	filePtr = GetFileControlBlock(vp);
+
+	if ( (off_t)minEOF > filePtr->fcbEOF )
+	{
+		bytesToAdd = minEOF - filePtr->fcbEOF;
+
+		if (bytesToAdd < filePtr->ff_clumpsize)
+			bytesToAdd = filePtr->ff_clumpsize;		//XXX why not always be a mutiple of clump size?
+	}
+	else
+	{
+		return -1;
+	}
+
+	vcb = VTOVCB(vp);
+	
+	/*
+	 * The Extents B-tree can't have overflow extents. ExtendFileC will
+	 * return an error if an attempt is made to extend the Extents B-tree
+	 * when the resident extents are exhausted.
+	 */
+
+	/* Protect allocation bitmap and extents overflow file. */
+	lockflags = SFL_BITMAP;
+	if (VTOC(vp)->c_fileid != kHFSExtentsFileID)
+		lockflags |= SFL_EXTENTS;
+	lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK);
+
+	(void) BTGetInformation(filePtr, 0, &btInfo);
+
+#if 0  // XXXdbg
+	/*
+	 * The b-tree code expects nodes to be contiguous. So when
+	 * the allocation block size is less than the b-tree node
+	 * size, we need to force disk allocations to be contiguous.
+	 */
+	if (vcb->blockSize >= btInfo.nodeSize) {
+		extendFlags = 0;
+	} else {
+		/* Ensure that all b-tree nodes are contiguous on disk */
+		extendFlags = kEFContigMask;
+	}
+#endif
+
+	origSize = filePtr->fcbEOF;
+	fileblocks = filePtr->ff_blocks;
+	startAllocation = vcb->nextAllocation;
+
+	// loop trying to get a contiguous chunk that's an integer multiple
+	// of the btree node size.  if we can't get a contiguous chunk that
+	// is at least the node size then we break out of the loop and let
+	// the error propagate back up.
+	while((off_t)bytesToAdd >= btInfo.nodeSize) {
+	    do {
+		retval = ExtendFileC(vcb, filePtr, bytesToAdd, 0,
+		                     kEFContigMask | kEFMetadataMask | kEFNoClumpMask,
+		                     (int64_t *)&actualBytesAdded);
+		if (retval == dskFulErr && actualBytesAdded == 0) {
+		    bytesToAdd >>= 1;
+		    if (bytesToAdd < btInfo.nodeSize) {
+			break;
+		    } else if ((bytesToAdd % btInfo.nodeSize) != 0) {
+			// make sure it's an integer multiple of the nodeSize
+			bytesToAdd -= (bytesToAdd % btInfo.nodeSize);
+		    }
+		}
+	    } while (retval == dskFulErr && actualBytesAdded == 0);
+
+	    if (retval == dskFulErr && actualBytesAdded == 0 && bytesToAdd <= btInfo.nodeSize) {
+		break;
+	    }
+
+	    filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
+	    bytesToAdd = minEOF - filePtr->fcbEOF;
+	}
+
+	/*
+	 * If a new extent was added then move the roving allocator
+	 * reference forward by the current b-tree file size so 
+	 * there's plenty of room to grow.
+	 */
+	if ((retval == 0) &&
+	    ((VCBTOHFS(vcb)->hfs_flags & HFS_METADATA_ZONE) == 0) &&
+	    (vcb->nextAllocation > startAllocation) &&
+	    ((vcb->nextAllocation + fileblocks) < vcb->allocLimit)) {
+		HFS_UPDATE_NEXT_ALLOCATION(vcb, vcb->nextAllocation + fileblocks); 
+	}
+		
+	filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
+
+	// XXXdbg ExtendFileC() could have returned an error even though
+	// it grew the file to be big enough for our needs.  If this is
+	// the case, we don't care about retval so we blow it away.
+	//
+	if (filePtr->fcbEOF >= (off_t)minEOF && retval != 0) {
+		retval = 0;
+	}
+
+	// XXXdbg if the file grew but isn't large enough or isn't an
+	// even multiple of the nodeSize then trim things back.  if
+	// the file isn't large enough we trim back to the original
+	// size.  otherwise we trim back to be an even multiple of the
+	// btree node size.
+	//
+	if ((filePtr->fcbEOF < (off_t)minEOF) || ((filePtr->fcbEOF - origSize) % btInfo.nodeSize) != 0) {
+
+		if (filePtr->fcbEOF < (off_t)minEOF) {
+			retval = dskFulErr;
+			
+			if (filePtr->fcbEOF < origSize) {
+				panic("hfs: btree file eof %lld less than orig size %lld!\n",
+					  filePtr->fcbEOF, origSize);
+			}
+			
+			trim = filePtr->fcbEOF - origSize;
+		} else {
+			trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
+		}
+
+		ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
+		filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
+
+		// XXXdbg - panic if the file didn't get trimmed back properly
+		if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
+			panic("hfs: truncate file didn't! fcbEOF %lld nsize %d fcb %p\n",
+				  filePtr->fcbEOF, btInfo.nodeSize, filePtr);
+		}
+
+		if (ret) {
+			// XXXdbg - this probably doesn't need to be a panic()
+			panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n",
+			      filePtr->fcbEOF, trim, (long)ret);
+			goto out;
+		}
+	}
+
+	if(VTOC(vp)->c_fileid != kHFSExtentsFileID) {
+		/*
+		 * Get any extents overflow b-tree changes to disk ASAP!
+		 */
+		(void) BTFlushPath(VTOF(vcb->extentsRefNum));
+		(void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p);
+	}
+	hfs_systemfile_unlock(vcb, lockflags);
+	lockflags = 0;
+
+	if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) {
+		panic("hfs: extendbtree: fcb %p has eof 0x%llx not a multiple of 0x%x (trim %llx)\n",
+			  filePtr, filePtr->fcbEOF, btInfo.nodeSize, trim);
+	}
+
+	/*
+	 * Update the Alternate MDB or Alternate VolumeHeader
+	 */
+	VTOC(vp)->c_flag |= C_MODIFIED;
+	if ((VTOC(vp)->c_fileid == kHFSExtentsFileID)	||
+	    (VTOC(vp)->c_fileid == kHFSCatalogFileID)	||
+	    (VTOC(vp)->c_fileid == kHFSAttributesFileID)
+	   ) {
+		MarkVCBDirty( vcb );
+		ret = hfs_flushvolumeheader(VCBTOHFS(vcb), HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+	} else {
+		VTOC(vp)->c_touch_chgtime = TRUE;
+		VTOC(vp)->c_touch_modtime = TRUE;
+		(void) hfs_update(vp, 0);
+	}
+
+	ret = ClearBTNodes(vp, btInfo.nodeSize, origSize, (filePtr->fcbEOF - origSize));
+out:
+	if (retval == 0)
+		retval = ret;
+	
+	if (lockflags)
+		hfs_systemfile_unlock(vcb, lockflags);
+	
+	return retval;
+}
+
+
+/*
+ * Clear out (zero) new b-tree nodes on disk.
+ */
+static int
+ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount)
+{
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct buf *bp = NULL;
+	daddr64_t blk;
+	daddr64_t blkcnt;
+    
+	blk = offset / blksize;
+	blkcnt = amount / blksize;
+	
+	while (blkcnt > 0) {
+		bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META);
+		if (bp == NULL)
+			continue;
+
+        // XXXdbg
+		if (hfsmp->jnl) {
+			// XXXdbg -- skipping this for now since it makes a transaction
+			//           become *way* too large
+		    //journal_modify_block_start(hfsmp->jnl, bp);
+		}
+		bzero((char *)buf_dataptr(bp), blksize);
+
+		buf_markaged(bp);
+
+        // XXXdbg
+		if (hfsmp->jnl) {
+			// XXXdbg -- skipping this for now since it makes a transaction
+			//           become *way* too large
+			//journal_modify_block_end(hfsmp->jnl, bp);
+
+			// XXXdbg - remove this once we decide what to do with the
+			//          writes to the journal
+			if ((blk % 32) == 0)
+			    VNOP_BWRITE(bp);
+			else
+			    buf_bawrite(bp);
+		} else {
+			/* wait/yield every 32 blocks so we don't hog all the buffers */
+			if ((blk % 32) == 0)
+				VNOP_BWRITE(bp);
+			else
+				buf_bawrite(bp);
+		}
+		--blkcnt;
+		++blk;
+	}
+
+	return (0);
+}
+
+
+extern char  hfs_attrname[];
+
+/*
+ * Create an HFS+ Attribute B-tree File.
+ *
+ * No global resources should be held.
+ */
+int
+hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt)
+{
+	struct vnode* vp = NULLVP;
+	struct cat_desc cndesc;
+	struct cat_attr cnattr;
+	struct cat_fork cfork;
+	BlockDescriptor blkdesc;
+	BTNodeDescriptor  *ndp;
+	BTHeaderRec  *bthp;
+	BTreeControlBlockPtr btcb = NULL;
+	struct buf *bp = NULL;
+	void * buffer;
+	u_int8_t *bitmap;
+	u_int16_t *index;
+	u_int32_t node_num, num_map_nodes;
+	u_int32_t bytes_per_map_record;
+	u_int32_t temp;
+	u_int16_t  offset;
+	int intrans = 0;
+	int result;
+	int newvnode_flags = 0;
+	
+again:
+	/*
+	 * Serialize creation using HFS_CREATING_BTREE flag.
+	 */
+	hfs_lock_mount (hfsmp);
+	if (hfsmp->hfs_flags & HFS_CREATING_BTREE) {
+			/* Someone else beat us, wait for them to finish. */
+			(void) msleep(&hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex,
+			              PDROP | PINOD, "hfs_create_attr_btree", 0);
+			if (hfsmp->hfs_attribute_vp) {
+				return (0);
+			}
+			goto again;
+	}
+	hfsmp->hfs_flags |= HFS_CREATING_BTREE;
+	hfs_unlock_mount (hfsmp);
+
+	/* Check if were out of usable disk space. */
+	if ((hfs_freeblks(hfsmp, 1) == 0)) {
+		result = ENOSPC;
+		goto exit;
+	}
+
+	/*
+	 * Set up Attribute B-tree vnode
+	 * (this must be done before we start a transaction
+	 *  or take any system file locks)
+	 */
+	bzero(&cndesc, sizeof(cndesc));
+	cndesc.cd_parentcnid = kHFSRootParentID;
+	cndesc.cd_flags |= CD_ISMETA;
+	cndesc.cd_nameptr = (const u_int8_t *)hfs_attrname;
+	cndesc.cd_namelen = strlen(hfs_attrname);
+	cndesc.cd_cnid = kHFSAttributesFileID;
+
+	bzero(&cnattr, sizeof(cnattr));
+	cnattr.ca_linkcount = 1;
+	cnattr.ca_mode = S_IFREG;
+	cnattr.ca_fileid = cndesc.cd_cnid;
+
+	bzero(&cfork, sizeof(cfork));
+	cfork.cf_clump = nodesize * nodecnt;
+
+	result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, 
+							 &cfork, &vp, &newvnode_flags);
+	if (result) {
+		goto exit;
+	}
+	/*
+	 * Set up Attribute B-tree control block
+	 */
+	btcb = hfs_mallocz(sizeof(*btcb));
+
+	btcb->nodeSize          = nodesize;
+	btcb->maxKeyLength      = kHFSPlusAttrKeyMaximumLength;
+	btcb->btreeType         = 0xFF;
+	btcb->attributes        = kBTVariableIndexKeysMask | kBTBigKeysMask;
+	btcb->version           = kBTreeVersion;
+	btcb->writeCount        = 1;
+	btcb->flags             = 0;  /* kBTHeaderDirty */
+	btcb->fileRefNum        = vp;
+	btcb->getBlockProc      = GetBTreeBlock;
+	btcb->releaseBlockProc  = ReleaseBTreeBlock;
+	btcb->setEndOfForkProc  = ExtendBTreeFile;
+	btcb->keyCompareProc    = (KeyCompareProcPtr)hfs_attrkeycompare;
+	
+	/* 
+	 * NOTE: We must make sure to zero out this pointer if we error out in this function!
+	 * If we don't, then unmount will treat it as a valid pointer which can lead to a
+	 * use-after-free 
+	 */
+	VTOF(vp)->fcbBTCBPtr    = btcb;
+
+	/*
+	 * Allocate some space
+	 */
+	if (hfs_start_transaction(hfsmp) != 0) {
+		result = EINVAL;
+		goto exit;
+	}
+	intrans = 1;
+
+	/* Note ExtendBTreeFile will acquire the necessary system file locks. */
+	result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump);
+	if (result)
+		goto exit;
+
+	btcb->totalNodes = VTOF(vp)->ff_size / nodesize;
+
+	/*
+	 * Figure out how many map nodes we'll need.
+	 *
+	 * bytes_per_map_record = the number of bytes in the map record of a
+	 * map node.  Since that is the only record in the node, it is the size
+	 * of the node minus the node descriptor at the start, and two record
+	 * offsets at the end of the node.  The "- 2" is to round the size down
+	 * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a
+	 * multiple of 4).
+	 *
+	 * The value "temp" here is the number of *bits* in the map record of
+	 * the header node.
+	 */
+	bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2;
+	temp = 8 * (nodesize - sizeof(BTNodeDescriptor) 
+			- sizeof(BTHeaderRec)
+			- kBTreeHeaderUserBytes
+			- 4 * sizeof(u_int16_t));
+	if (btcb->totalNodes > temp) {
+		num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8);
+	}
+	else {
+		num_map_nodes = 0;
+	}
+	
+	btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes;
+	
+	/*
+	 * Initialize the b-tree header on disk
+	 */
+	bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META);
+	if (bp == NULL) {
+		result = EIO;
+		goto exit;
+	}
+
+	buffer = (void *)buf_dataptr(bp);
+	blkdesc.buffer = buffer;
+	blkdesc.blockHeader = (void *)bp;
+	blkdesc.blockReadFromDisk = 0;
+	blkdesc.isModified = 0;
+
+	ModifyBlockStart(vp, &blkdesc);
+
+	if (buf_size(bp) != nodesize)
+		panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp));
+
+	bzero(buffer, nodesize);
+	index = (u_int16_t *)buffer;
+
+	/* FILL IN THE NODE DESCRIPTOR:  */
+	ndp = (BTNodeDescriptor *)buffer;
+	if (num_map_nodes != 0)
+		ndp->fLink = 1;
+	ndp->kind = kBTHeaderNode;
+	ndp->numRecords = 3;
+	offset = sizeof(BTNodeDescriptor);
+	index[(nodesize / 2) - 1] = offset;
+
+	/* FILL IN THE HEADER RECORD:  */
+	bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
+	bthp->nodeSize     = nodesize;
+	bthp->totalNodes   = btcb->totalNodes;
+	bthp->freeNodes    = btcb->freeNodes;
+	bthp->clumpSize    = cfork.cf_clump;
+	bthp->btreeType    = 0xFF;
+	bthp->attributes   = kBTVariableIndexKeysMask | kBTBigKeysMask;
+	bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength;
+	bthp->keyCompareType = kHFSBinaryCompare;
+	offset += sizeof(BTHeaderRec);
+	index[(nodesize / 2) - 2] = offset;
+
+	/* FILL IN THE USER RECORD:  */
+	offset += kBTreeHeaderUserBytes;
+	index[(nodesize / 2) - 3] = offset;
+
+	/* Mark the header node and map nodes in use in the map record.
+	 *
+	 * NOTE: Assumes that the header node's map record has at least
+	 * (num_map_nodes + 1) bits.
+	 */
+	bitmap = (u_int8_t *) buffer + offset;
+	temp = num_map_nodes + 1;	/* +1 for the header node */
+	while (temp >= 8) {
+		*(bitmap++) = 0xFF;
+		temp -= 8;
+	}
+	*bitmap = ~(0xFF >> temp);
+	
+	offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
+			   - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
+	index[(nodesize / 2) - 4] = offset;
+
+	if (hfsmp->jnl) {
+		result = btree_journal_modify_block_end(hfsmp, bp);
+	} else {
+		result = VNOP_BWRITE(bp);
+	}
+	if (result)
+		goto exit;
+
+	/* Create the map nodes: node numbers 1 .. num_map_nodes */
+	for (node_num=1; node_num <= num_map_nodes; ++node_num) {
+		bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META);
+		if (bp == NULL) {
+			result = EIO;
+			goto exit;
+		}
+		buffer = (void *)buf_dataptr(bp);
+		blkdesc.buffer = buffer;
+		blkdesc.blockHeader = (void *)bp;
+		blkdesc.blockReadFromDisk = 0;
+		blkdesc.isModified = 0;
+	
+		ModifyBlockStart(vp, &blkdesc);
+		
+		bzero(buffer, nodesize);
+		index = (u_int16_t *)buffer;
+	
+		/* Fill in the node descriptor */
+		ndp = (BTNodeDescriptor *)buffer;
+		if (node_num != num_map_nodes)
+			ndp->fLink = node_num + 1;
+		ndp->kind = kBTMapNode;
+		ndp->numRecords = 1;
+		offset = sizeof(BTNodeDescriptor);
+		index[(nodesize / 2) - 1] = offset;
+	
+	
+		/* Fill in the map record's offset */
+		/* Note: We assume that the map record is all zeroes */
+		offset = sizeof(BTNodeDescriptor) + bytes_per_map_record;
+		index[(nodesize / 2) - 2] = offset;
+	
+		if (hfsmp->jnl) {
+			result = btree_journal_modify_block_end(hfsmp, bp);
+		} else {
+			result = VNOP_BWRITE(bp);
+		}
+		if (result)
+			goto exit;
+	}
+	
+	/* Update vp/cp for attribute btree */
+	hfs_lock_mount (hfsmp);
+	hfsmp->hfs_attribute_cp = VTOC(vp);
+	hfsmp->hfs_attribute_vp = vp;
+	hfs_unlock_mount (hfsmp);
+
+	(void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+
+	if (intrans) {
+		hfs_end_transaction(hfsmp);
+		intrans = 0;
+	}
+
+	/* Initialize the vnode for virtual attribute data file */
+	result = init_attrdata_vnode(hfsmp);
+	if (result) {
+		printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result); 
+	}
+
+exit:
+
+	if (vp && result) {
+		/* 
+		 * If we're about to error out, then make sure to zero out the B-Tree control block pointer
+		 * from the filefork of the EA B-Tree cnode/vnode. Failing to do this will lead to a use
+		 * after free at unmount or BTFlushPath. Since we're about to error out anyway, this memory
+		 * will be freed.
+		 */
+		VTOF(vp)->fcbBTCBPtr = NULL;
+	}
+	
+
+	if (vp) {
+		hfs_unlock(VTOC(vp));
+	}
+	if (result) {
+		hfs_free(btcb, sizeof(*btcb));
+		if (vp) {
+			vnode_put(vp);
+		}
+		/* XXX need to give back blocks ? */
+	}
+	if (intrans) {
+		hfs_end_transaction(hfsmp);
+	}
+
+	/*
+	 * All done, clear HFS_CREATING_BTREE, and wake up any sleepers.
+	 */
+	hfs_lock_mount (hfsmp);
+	hfsmp->hfs_flags &= ~HFS_CREATING_BTREE;
+	wakeup((caddr_t)&hfsmp->hfs_attribute_cp);
+	hfs_unlock_mount (hfsmp);
+
+	return (result);
+}
+
diff --git a/core/hfs_btreeio.h b/core/hfs_btreeio.h
new file mode 100644
index 0000000..740734d
--- /dev/null
+++ b/core/hfs_btreeio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2005-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _HFS_BTREEIO_H_
+#define _HFS_BTREEIO_H_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#include "hfs.h"
+#include "BTreesInternal.h"
+
+/* BTree accessor routines */
+extern OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, 
+				ItemCount minBlockCount);
+
+extern OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, 
+				GetBlockOptions options, BlockDescriptor *block);
+
+extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, 
+				ReleaseBlockOptions options);
+
+extern OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF);
+
+extern void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr);
+
+int hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t nodecnt);
+
+u_int16_t get_btree_nodesize(struct vnode *vp);
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* ! _HFS_BTREEIO_H_ */
diff --git a/core/hfs_catalog.c b/core/hfs_catalog.c
new file mode 100644
index 0000000..d48a106
--- /dev/null
+++ b/core/hfs_catalog.c
@@ -0,0 +1,4813 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/dirent.h>
+#include <vfs/vfs_support.h>
+#include <libkern/libkern.h>
+
+#include <sys/utfconv.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_format.h"
+#include "hfs_endian.h"
+
+#include "BTreesInternal.h"
+#include "BTreesPrivate.h"
+#include "HFSUnicodeWrappers.h"
+
+
+/*
+ * Initialization of an FSBufferDescriptor structure.
+ */
+#define BDINIT(bd, addr) { \
+	(bd).bufferAddress = (addr); \
+	(bd).itemSize = sizeof(*(addr)); \
+	(bd).itemCount = 1; \
+}
+
+
+struct btobj {
+	BTreeIterator		iterator;
+	HFSPlusCatalogKey 	key;
+	CatalogRecord		data;
+};
+
+struct update_state {
+	struct cat_desc *	s_desc;	
+	struct cat_attr *	s_attr;
+	const struct cat_fork *	s_datafork;
+	const struct cat_fork *	s_rsrcfork;
+	struct hfsmount *	s_hfsmp;
+};
+
+struct position_state {
+	int        error;
+	u_int32_t  count;
+	u_int32_t  index;
+	u_int32_t  parentID;
+	struct hfsmount *hfsmp;
+};
+
+/* Map file mode type to directory entry types */
+u_char modetodirtype[16] = {
+	DT_REG, DT_FIFO, DT_CHR, DT_UNKNOWN,
+	DT_DIR, DT_UNKNOWN, DT_BLK, DT_UNKNOWN,
+	DT_REG, DT_UNKNOWN, DT_LNK, DT_UNKNOWN,
+	DT_SOCK, DT_UNKNOWN, DT_WHT, DT_UNKNOWN
+};
+#define MODE_TO_DT(mode)  (modetodirtype[((mode) & S_IFMT) >> 12])
+
+
+#define HFS_LOOKUP_SYSFILE	0x1	/* If set, allow lookup of system files */
+#define HFS_LOOKUP_HARDLINK	0x2	/* If set, allow lookup of hard link records and not resolve the hard links */
+#define HFS_LOOKUP_CASESENSITIVE	0x4	/* If set, verify results of a file/directory record match input case */
+static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t hint, int wantrsrc,
+                  struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid);
+
+int cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
+                  struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp);
+
+/* Internal catalog support routines */
+
+static int cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp,
+                            struct position_state *state);
+
+static int resolvelinkid(struct hfsmount *hfsmp, u_int32_t linkref, ino_t *ino);
+
+static int getkey(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key);
+
+static int buildkey(struct hfsmount *hfsmp, struct cat_desc *descp,
+			HFSPlusCatalogKey *key, int retry);
+
+static void buildthreadkey(HFSCatalogNodeID parentID, int std_hfs, CatalogKey *key);
+
+static void buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding, CatalogRecord *crp, u_int32_t *recordSize);
+
+static int catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *state);
+
+static int builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_int32_t hint, u_int32_t encoding,
+			int isdir, struct cat_desc *descp);
+
+static void getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct cat_attr * attrp);
+
+#if CONFIG_HFS_STD
+static void promotekey(struct hfsmount *hfsmp, const HFSCatalogKey *hfskey, HFSPlusCatalogKey *keyp, u_int32_t *encoding);
+static void promotefork(struct hfsmount *hfsmp, const struct HFSCatalogFile *file, int resource, struct cat_fork * forkp);
+static void promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlusCatalogFile *crp);
+#endif
+
+static cnid_t getcnid(const CatalogRecord *crp);
+static u_int32_t getencoding(const CatalogRecord *crp);
+static cnid_t getparentcnid(const CatalogRecord *recp);
+
+static int isadir(const CatalogRecord *crp);
+
+static int buildthread(void *keyp, void *recp, int std_hfs, int directory);
+
+static int cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalogFile *crp);
+
+static int cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp,
+	const struct cat_fork *dataforkp, const struct cat_fork *rsrcforkp);
+
+
+
+/* HFS ID Hashtable Functions */
+#define IDHASH(hfsmp, inum) (&hfsmp->hfs_idhashtbl[(inum) & hfsmp->hfs_idhash])
+
+/* Initialize the HFS ID hash table */
+void
+hfs_idhash_init (struct hfsmount *hfsmp) {
+	/* secured by catalog lock so no lock init needed */
+	hfsmp->hfs_idhashtbl = hashinit(HFS_IDHASH_DEFAULT, M_TEMP, &hfsmp->hfs_idhash);
+}
+
+/* Free the HFS ID hash table */
+void
+hfs_idhash_destroy (struct hfsmount *hfsmp) {
+	/* during failed mounts & unmounts */
+	FREE(hfsmp->hfs_idhashtbl, M_TEMP);
+}
+
+/*
+from hfs_catalog.h:
+typedef struct cat_preflightid {
+	cnid_t fileid;
+	LIST_ENTRY(cat_preflightid) id_hash;
+} cat_preflightid_t;
+
+from hfs.h:
+  u_long hfs_idhash; / size of cnid/fileid hash table -1 /
+  LIST_HEAD(idhashhead, cat_preflightid) *hfs_idhashtbl; / base of ID hash /
+*/
+
+/* 
+ * Check the run-time ID hashtable.  
+ * 
+ * The catalog lock must be held (like other functions in this file).
+ *
+ * Returns: 
+ * 		1 if the ID is in the hash table.
+ *		0 if the ID is not in the hash table
+ */ 
+int cat_check_idhash (struct hfsmount *hfsmp, cnid_t test_fileid) {
+
+	cat_preflightid_t *preflight;
+	int found = 0;
+
+	for (preflight = IDHASH(hfsmp, test_fileid)->lh_first; preflight ; preflight = preflight->id_hash.le_next) {
+		if (preflight->fileid == test_fileid) {
+			found = 1;
+			break;	
+		}
+	}
+
+	return found;   	
+}
+
+/* Insert the supplied preflight into the ID hash table */ 
+int cat_insert_idhash (struct hfsmount *hfsmp, cat_preflightid_t *preflight) {
+
+	if (preflight) {
+		LIST_INSERT_HEAD(IDHASH(hfsmp, (preflight->fileid)), preflight, id_hash);
+		return 0;
+	}
+	return -1;
+}
+
+
+/* Remove the data structure with the specified ID from the hashtable */
+int cat_remove_idhash (cat_preflightid_t *preflight) {
+	
+	if ((preflight) && ((preflight->id_hash.le_next || preflight->id_hash.le_prev))) {
+		LIST_REMOVE (preflight, id_hash);
+		preflight->id_hash.le_next = NULL;
+		preflight->id_hash.le_prev = NULL;
+
+		return 0;
+	}
+
+	return -1;
+}
+
+/*
+ * Acquire a new CNID for use.  
+ * 
+ * This is slightly more complicated than just pulling the value from the
+ * hfsmount data structure.  We need to validate that the ID is not in-use
+ * even if we've not wrapped around and that there are not any lingering
+ * or orphaned fileIDs for this ID. 
+ *
+ * Also validate that there are not any pending insertions into the
+ * catalog by checking the ID hash table. 
+ */
+int 
+cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid) 
+{
+	uint32_t nextCNID;
+	struct BTreeIterator *iterator;
+	FSBufferDescriptor btdata;
+	uint16_t datasize;
+	CatalogRecord *recp;
+	int result = 0;
+	int std_hfs;
+	int wrapped = 0;
+
+	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
+	/*
+	 * Get the next CNID. We can change it since we hold the catalog lock.
+	 */
+nextid:
+	nextCNID = hfsmp->vcbNxtCNID;
+	if (nextCNID == 0xFFFFFFFF) {
+		if (std_hfs) {
+			return (ENOSPC);
+		} else {
+			wrapped++;
+			if (wrapped > 1) {
+				/* don't allow more than one wrap-around */
+				return ENOSPC;
+			}
+			hfs_lock_mount (hfsmp);
+			hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID;
+			hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
+			hfs_unlock_mount (hfsmp);
+		}
+	} else {
+		hfsmp->vcbNxtCNID++;
+	}
+	hfs_note_header_minor_change(hfsmp);
+
+	/* First check that there are not any entries pending in the hash table with this ID */
+	if (cat_check_idhash (hfsmp, nextCNID)) {
+		/* Someone wants to insert this into the catalog but hasn't done so yet. Skip it */
+		goto nextid;
+	}
+
+	/* Check to see if a thread record exists for the target ID we just got */
+	iterator = hfs_mallocz(sizeof(*iterator));
+	buildthreadkey(nextCNID, std_hfs, (CatalogKey *)&iterator->key);
+
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+
+	result = BTSearchRecord(hfsmp->hfs_catalog_cp->c_datafork, iterator, &btdata, &datasize, iterator);
+	hfs_free(recp, sizeof(CatalogRecord));
+	hfs_free(iterator, sizeof(*iterator));
+
+	if (result == btNotFound) {
+		/* Good.  File ID was not in use. Move on to checking EA B-Tree */
+		result = file_attribute_exist (hfsmp, nextCNID);
+		if (result == EEXIST) {
+			/* This CNID has orphaned EAs.  Skip it and move on to the next one */
+			result = 0;
+			goto nextid;
+		}
+		if (result) {
+			/* For any other error, return the result */
+			return result;
+		}
+
+		/*
+		 * Now validate that there are no lingering cnodes with this ID.  If a cnode
+		 * has been removed on-disk (marked C_NOEXISTS), but has not yet been reclaimed,
+		 * then it will still have an entry in the cnode hash table.  This means that
+		 * a subsequent lookup will find THAT entry and believe this one has been deleted
+		 * prematurely.  If there is a lingering cnode, then just skip this entry and move on.
+		 * 
+		 * Note that we pass (existence_only == 1) argument to hfs_chash_snoop.
+		 */
+		if (!std_hfs && (hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) {
+			if (hfs_chash_snoop (hfsmp, nextCNID, 1, NULL, NULL) == 0) {
+				goto nextid;
+			}
+		}
+
+		/* 
+		 * If we get here, then we didn't see any thread records, orphaned EAs, 
+	     * or stale cnodes. This ID is safe to vend out.
+		 */
+		*new_cnid = nextCNID;
+	}
+	else if (result == noErr) {
+		/* move on to the next ID */
+		goto nextid;
+	}	
+	else {
+		/* For any other situation, just bail out */
+		return EIO; 
+	}
+
+	return 0;	
+
+}
+
+int
+cat_preflight(struct hfsmount *hfsmp, catops_t ops, cat_cookie_t *cookie, __unused proc_t p)
+{
+	int lockflags = 0;
+	int result;
+
+	if (hfsmp->hfs_catalog_cp->c_lockowner != current_thread())
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+	 
+	result = BTReserveSpace(hfsmp->hfs_catalog_cp->c_datafork, ops, (void*)cookie);
+
+	if (lockflags)
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+	return MacToVFSError(result);
+}
+
+void
+cat_postflight(struct hfsmount *hfsmp, cat_cookie_t *cookie, __unused proc_t p)
+{
+	int lockflags = 0;
+
+	if (hfsmp->hfs_catalog_cp->c_lockowner != current_thread())
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+	(void) BTReleaseReserve(hfsmp->hfs_catalog_cp->c_datafork, (void*)cookie);
+
+	if (lockflags)
+		hfs_systemfile_unlock(hfsmp, lockflags);
+}
+
+void
+cat_convertattr(
+	struct hfsmount *hfsmp,
+	CatalogRecord * recp,
+	struct cat_attr *attrp,
+	struct cat_fork *datafp,
+	struct cat_fork *rsrcfp)
+{
+	int std_hfs = HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord;
+
+	if (std_hfs == 0) {
+		getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)recp, attrp);
+	}
+#if CONFIG_HFS_STD
+	else {
+		struct HFSPlusCatalogFile cnoderec;
+
+		promoteattr(hfsmp, recp, &cnoderec);
+		getbsdattr(hfsmp, &cnoderec, attrp);
+	} 
+#endif
+
+	if (isadir(recp)) {
+		bzero(datafp, sizeof(*datafp));
+	}
+#if CONFIG_HFS_STD
+	else if (std_hfs) {
+		promotefork(hfsmp, (HFSCatalogFile *)&recp->hfsFile, 0, datafp);
+		promotefork(hfsmp, (HFSCatalogFile *)&recp->hfsFile, 1, rsrcfp);
+	} 
+#endif
+	else {
+		/* Convert the data fork. */
+		datafp->cf_size = recp->hfsPlusFile.dataFork.logicalSize;
+		datafp->cf_new_size = 0;
+		datafp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks;
+		if ((hfsmp->hfc_stage == HFC_RECORDING) &&
+		    (attrp->ca_atime >= hfsmp->hfc_timebase)) {
+			datafp->cf_bytesread =
+				recp->hfsPlusFile.dataFork.clumpSize *
+				HFSTOVCB(hfsmp)->blockSize;
+		} else {
+			datafp->cf_bytesread = 0;
+		}
+		datafp->cf_vblocks = 0;
+		bcopy(&recp->hfsPlusFile.dataFork.extents[0],
+		      &datafp->cf_extents[0], sizeof(HFSPlusExtentRecord));
+
+		/* Convert the resource fork. */
+		rsrcfp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize;
+		rsrcfp->cf_new_size = 0;
+		rsrcfp->cf_blocks = recp->hfsPlusFile.resourceFork.totalBlocks;
+		if ((hfsmp->hfc_stage == HFC_RECORDING) &&
+		    (attrp->ca_atime >= hfsmp->hfc_timebase)) {
+			datafp->cf_bytesread =
+				recp->hfsPlusFile.resourceFork.clumpSize *
+				HFSTOVCB(hfsmp)->blockSize;
+		} else {
+			datafp->cf_bytesread = 0;
+		}
+		rsrcfp->cf_vblocks = 0;
+		bcopy(&recp->hfsPlusFile.resourceFork.extents[0],
+		      &rsrcfp->cf_extents[0], sizeof(HFSPlusExtentRecord));
+	}
+}
+
+/*
+ * Convert a raw catalog key and record into an in-core catalog descriptor.
+ *
+ * Note: The caller is responsible for releasing the catalog descriptor.
+ */
+int
+cat_convertkey(
+	struct hfsmount *hfsmp,
+	CatalogKey *key,
+	CatalogRecord * recp,
+	struct cat_desc *descp)
+{
+	int std_hfs = HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord;
+	HFSPlusCatalogKey * pluskey = NULL;
+	u_int32_t encoding;
+	cnid_t cnid = 0;
+	int err = 0;
+
+	if (std_hfs == 0) {
+		pluskey = (HFSPlusCatalogKey *)key;
+		encoding = getencoding(recp);
+	}
+#if CONFIG_HFS_STD
+	else {
+		pluskey = hfs_malloc(sizeof(HFSPlusCatalogKey));
+		promotekey(hfsmp, (HFSCatalogKey *)key, pluskey, &encoding);
+	}
+#endif
+  
+	/* Get the CNID before calling builddesc.  Need to error check it. */
+	cnid = getcnid(recp);
+	if (cnid == 0) {
+		/* If ths CNID == 0, it's invalid. Mark as corrupt */
+		hfs_mark_inconsistent (hfsmp, HFS_INCONSISTENCY_DETECTED);
+		err = EINVAL;
+	}
+	else {
+		builddesc(pluskey, cnid, 0, encoding, isadir(recp), descp);
+	}
+
+#if CONFIG_HFS_STD
+	if (std_hfs) {
+		hfs_free(pluskey, sizeof(*pluskey));
+	}
+#endif
+
+	return err;
+}
+
+
+/*
+ * cat_releasedesc
+ */
+void
+cat_releasedesc(struct cat_desc *descp)
+{
+	const u_int8_t * name;
+
+	if (descp == NULL)
+		return;
+
+	if ((descp->cd_flags & CD_HASBUF) &&
+	    (descp->cd_nameptr != NULL)) {
+	    	name = descp->cd_nameptr;
+		descp->cd_nameptr = NULL;
+		descp->cd_namelen = 0;
+		vfs_removename((const char *)name);
+	}
+	descp->cd_nameptr = NULL;
+	descp->cd_namelen = 0;
+	descp->cd_flags &= ~CD_HASBUF;
+}
+
+/*
+ * These Catalog functions allow access to the HFS Catalog (database).
+ * The catalog b-tree lock must be acquired before calling any of these routines.
+ */
+
+/*
+ * cat_lookup - lookup a catalog node using a cnode descriptor
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+ */
+int
+cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, int force_casesensitive_lookup,
+             struct cat_desc *outdescp, struct cat_attr *attrp,
+             struct cat_fork *forkp, cnid_t *desc_cnid)
+{
+	CatalogKey * keyp;
+	int std_hfs;
+	int result;
+	int flags;
+
+	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
+	flags = force_casesensitive_lookup ? HFS_LOOKUP_CASESENSITIVE : 0;
+
+	keyp = hfs_malloc(sizeof(CatalogKey));
+
+	result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)keyp, 1);
+	if (result)
+		goto exit;
+
+	result = cat_lookupbykey(hfsmp, keyp, flags, descp->cd_hint, wantrsrc, outdescp, attrp, forkp, desc_cnid);
+	
+	if (result == ENOENT) {
+		if (!std_hfs) {
+			struct cat_desc temp_desc;
+			if (outdescp == NULL) {
+				bzero(&temp_desc, sizeof(temp_desc));
+				outdescp = &temp_desc;
+			}
+			result = cat_lookupmangled(hfsmp, descp, wantrsrc, outdescp, attrp, forkp);
+			if (desc_cnid) {
+			    *desc_cnid = outdescp->cd_cnid;
+			}
+			if (outdescp == &temp_desc) {
+				/* Release the local copy of desc */
+				cat_releasedesc(outdescp);
+			}
+		} else if (hfsmp->hfs_encoding != kTextEncodingMacRoman) {
+		//	make MacRoman key from utf-8
+		//	result = cat_lookupbykey(hfsmp, keyp, descp->cd_hint, attrp, forkp);
+		//	update desc text encoding so that other catalog ops succeed
+		}
+	}
+exit:	
+	hfs_free(keyp, sizeof(*keyp));
+
+	return (result);
+}
+
+int
+cat_insertfilethread(struct hfsmount *hfsmp, struct cat_desc *descp)
+{
+	struct BTreeIterator *iterator;
+	struct FSBufferDescriptor file_data;
+	struct HFSCatalogFile file_rec;
+	u_int16_t datasize;
+	FCB *fcb;
+	int result;
+
+	if (HFSTOVCB(hfsmp)->vcbSigWord != kHFSSigWord)
+		return (EINVAL);
+
+	fcb = GetFileControlBlock(HFSTOVCB(hfsmp)->catalogRefNum);
+
+	iterator = hfs_mallocz(2 * sizeof(*iterator));
+	result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator[0].key, 0);
+	if (result)
+		goto exit;
+
+	BDINIT(file_data, &file_rec);
+	result = BTSearchRecord(fcb, &iterator[0], &file_data, &datasize, &iterator[0]);
+	if (result) 
+		goto exit;
+
+	if (file_rec.recordType != kHFSFileRecord) {
+		result = EISDIR;
+		goto exit;
+	}
+
+	if ((file_rec.flags & kHFSThreadExistsMask) == 0) {
+		struct FSBufferDescriptor thread_data;
+		struct HFSCatalogThread thread_rec;
+
+		file_rec.flags |= kHFSThreadExistsMask;
+		BDINIT(thread_data, &thread_rec);
+		thread_data.itemSize = buildthread(&iterator[0].key, &thread_rec, 1, 0);
+		buildthreadkey(file_rec.fileID, 1, (CatalogKey *)&iterator[1].key);
+	
+		result = BTInsertRecord(fcb, &iterator[1], &thread_data, thread_data.itemSize);
+		if (result)
+			goto exit;
+	
+		(void) BTReplaceRecord(fcb, &iterator[0], &file_data, datasize);
+		(void) BTFlushPath(fcb);
+	}	
+exit:
+	(void) BTFlushPath(fcb);
+	hfs_free(iterator, 2 * sizeof(*iterator));
+
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_findname - obtain a descriptor from cnid
+ *
+ * Only a thread lookup is performed.
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+
+ */
+int
+cat_findname(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp)
+{
+	struct BTreeIterator * iterator;
+	FSBufferDescriptor btdata;
+	CatalogKey * keyp;
+	CatalogRecord * recp;
+	int isdir;
+	int result;
+	int std_hfs;
+
+	isdir = 0;
+#if CONFIG_HFS_STD
+	std_hfs = (hfsmp->hfs_flags & HFS_STANDARD);
+#else
+	std_hfs = 0;
+#endif
+
+	iterator = hfs_malloc(sizeof(*iterator));
+	buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key);
+	iterator->hint.nodeNum = 0;
+
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+
+	result = BTSearchRecord(VTOF(hfsmp->hfs_catalog_vp), iterator, &btdata, NULL, NULL);
+	if (result)
+		goto exit;
+
+	/* Turn thread record into a cnode key (in place). */
+	switch (recp->recordType) {
+
+#if CONFIG_HFS_STD
+		case kHFSFolderThreadRecord:
+			isdir = 1;
+			/* fall through */
+		case kHFSFileThreadRecord:
+			keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6);
+			keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0];
+			break;
+#endif
+
+		case kHFSPlusFolderThreadRecord:
+			isdir = 1;
+			/* fall through */
+		case kHFSPlusFileThreadRecord:
+			keyp = (CatalogKey *)&recp->hfsPlusThread.reserved;
+			keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength +
+				(keyp->hfsPlus.nodeName.length * 2);
+			break;
+		default:
+			result = ENOENT;
+			goto exit;
+	}
+
+#if CONFIG_HFS_STD
+	if (std_hfs) {
+		HFSPlusCatalogKey * pluskey = NULL;
+		u_int32_t encoding;
+
+		pluskey = hfs_malloc(sizeof(HFSPlusCatalogKey));
+		promotekey(hfsmp, &keyp->hfs, pluskey, &encoding);
+		builddesc(pluskey, cnid, 0, encoding, isdir, outdescp);
+		hfs_free(pluskey, sizeof(*pluskey));
+	} else
+#endif
+	{
+		builddesc((HFSPlusCatalogKey *)keyp, cnid, 0, 0, isdir, outdescp);
+	}
+
+exit:
+	hfs_free(recp, sizeof(*recp));
+	hfs_free(iterator, sizeof(*iterator));
+
+	return MacToVFSError(result);
+}
+
+/*
+ * cat_idlookup - lookup a catalog node using a cnode id
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+ */
+int
+cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, int allow_system_files, int wantrsrc,
+    struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp)
+{
+	struct BTreeIterator * iterator;
+	FSBufferDescriptor btdata;
+	u_int16_t	datasize;
+	CatalogKey * keyp;
+	CatalogRecord * recp;
+	int result;
+	int std_hfs;
+
+	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+	buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key);
+
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+
+	result = BTSearchRecord(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), iterator,
+				&btdata, &datasize, iterator);
+	if (result)
+		goto exit;
+
+	/* Turn thread record into a cnode key (in place) */
+	switch (recp->recordType) {
+
+#if CONFIG_HFS_STD
+		case kHFSFileThreadRecord:
+		case kHFSFolderThreadRecord:
+			keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6);
+
+			/* check for NULL name */
+			if (keyp->hfs.nodeName[0] == 0) {
+				result = ENOENT;
+				goto exit;
+			}
+
+			keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0];
+			break;
+#endif
+
+		case kHFSPlusFileThreadRecord:
+		case kHFSPlusFolderThreadRecord:
+			keyp = (CatalogKey *)&recp->hfsPlusThread.reserved;
+
+			/* check for NULL name */
+			if (keyp->hfsPlus.nodeName.length == 0) {
+				result = ENOENT;
+				goto exit;
+			}
+
+			keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength +
+				(keyp->hfsPlus.nodeName.length * 2);
+			break;
+
+		default:
+			result = ENOENT;
+			goto exit;
+	}
+
+	result = cat_lookupbykey(hfsmp, keyp, 
+			((allow_system_files != 0) ? HFS_LOOKUP_SYSFILE : 0), 
+			0, wantrsrc, outdescp, attrp, forkp, NULL);
+	/* No corresponding file/folder record found for a thread record,
+	 * mark the volume inconsistent.
+	 */
+	if (result == 0 && outdescp) {
+		cnid_t dcnid = outdescp->cd_cnid;
+		/*
+		 * Just for sanity's case, let's make sure that
+		 * the key in the thread matches the key in the record.
+		 */
+		if (cnid != dcnid) {
+			printf("hfs: cat_idlookup: Requested cnid (%d / %08x) != dcnid (%d / %08x)\n", cnid, cnid, dcnid, dcnid);
+			result = ENOENT;
+		}
+	}
+exit:
+	hfs_free(recp, sizeof(*recp));
+	hfs_free(iterator, sizeof(*iterator));
+
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_lookupmangled - lookup a catalog node using a mangled name
+ */
+int
+cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
+                  struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp)
+{
+	cnid_t fileID;
+	u_int32_t prefixlen;
+	int result;
+	u_int8_t utf8[NAME_MAX + 1];
+	u_int32_t utf8len;
+	u_int16_t unicode[kHFSPlusMaxFileNameChars + 1];
+	size_t unicodelen;
+	
+	if (wantrsrc)
+		return (ENOENT);
+
+	fileID = GetEmbeddedFileID(descp->cd_nameptr, descp->cd_namelen, &prefixlen);
+	if (fileID < (cnid_t)kHFSFirstUserCatalogNodeID)
+		return (ENOENT);
+
+	if (fileID == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+		fileID == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid ||
+		fileID == hfsmp->hfs_jnlfileid ||
+		fileID == hfsmp->hfs_jnlinfoblkid) {
+		return (ENOENT);
+	}
+
+	result = cat_idlookup(hfsmp, fileID, 0, 0, outdescp, attrp, forkp);
+	if (result)
+		return (ENOENT);
+	/* It must be in the correct directory */
+	if (descp->cd_parentcnid != outdescp->cd_parentcnid)
+		goto falsematch;
+
+	/*
+	 * Compare the mangled version of file name looked up from the 
+	 * disk with the mangled name provided by the user.  Note that 
+	 * this comparison is case-sensitive, which should be fine
+	 * since we're trying to prevent user space from constructing
+	 * a mangled name that differs from the one they'd get from the
+	 * file system.
+	 */
+	result = utf8_decodestr(outdescp->cd_nameptr, outdescp->cd_namelen,
+			unicode, &unicodelen, sizeof(unicode), ':', 0);
+	if (result) {
+		goto falsematch;
+	}
+	result = ConvertUnicodeToUTF8Mangled(unicodelen, unicode, 
+			sizeof(utf8), &utf8len, utf8, fileID);
+	if ((result != 0) || 
+	    ((u_int16_t)descp->cd_namelen != utf8len) ||
+	    (bcmp(descp->cd_nameptr, utf8, utf8len) != 0)) { 
+		goto falsematch;
+	}
+
+	return (0);
+
+falsematch:
+	cat_releasedesc(outdescp);
+	return (ENOENT);
+}
+
+
+/*
+ * cat_lookupbykey - lookup a catalog node using a cnode key
+ */
+static int
+cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t hint, int wantrsrc,
+                  struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid)
+{
+	struct BTreeIterator * iterator;
+	FSBufferDescriptor btdata;
+	CatalogRecord * recp;
+	u_int16_t  datasize;
+	int result;
+	int std_hfs;
+	u_int32_t ilink = 0;
+	cnid_t cnid = 0;
+	u_int32_t encoding = 0;
+	cnid_t parentid = 0;
+
+	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
+
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+	iterator = hfs_mallocz(sizeof(*iterator));
+	iterator->hint.nodeNum = hint;
+	bcopy(keyp, &iterator->key, sizeof(CatalogKey));
+
+	result = BTSearchRecord(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), iterator,
+				&btdata, &datasize, iterator);
+	if (result) 
+		goto exit;
+
+	/* Save the cnid, parentid, and encoding now in case there's a hard link or inode */
+	cnid = getcnid(recp);
+	if (cnid == 0) {
+		/* CNID of 0 is invalid.  Mark as corrupt */
+		hfs_mark_inconsistent (hfsmp, HFS_INCONSISTENCY_DETECTED);
+		result = EINVAL;
+		goto exit;
+	}
+
+	if (std_hfs == 0) {
+		parentid = keyp->hfsPlus.parentID;
+	}
+	
+	encoding = getencoding(recp);
+	hint = iterator->hint.nodeNum;
+
+	/* Hide the journal files (if any) */
+	if ((hfsmp->jnl || ((HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) &&
+		((cnid == hfsmp->hfs_jnlfileid) || (cnid == hfsmp->hfs_jnlinfoblkid)) &&
+		 !(flags & HFS_LOOKUP_SYSFILE)) {
+		result = HFS_ERESERVEDNAME;
+		goto exit;
+	}
+
+	if (!std_hfs && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) {
+		/* Make sure the case of the file was correct if requested */
+		if (flags & HFS_LOOKUP_CASESENSITIVE) {
+			if (0 != cat_binarykeycompare(&keyp->hfsPlus, (HFSPlusCatalogKey *)&iterator->key)) {
+				result = HFS_ERESERVEDNAME;
+				goto exit;
+			}
+		}
+	}
+	
+	/*
+	 * When a hardlink link is encountered, auto resolve it.
+	 *
+	 * The catalog record will change, and possibly its type.
+	 */
+	if (!std_hfs
+	    && (attrp || forkp) 
+	    && (recp->recordType == kHFSPlusFileRecord)
+	    && ((to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)hfsmp->hfs_itime) ||
+	        (to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)hfsmp->hfs_metadata_createdate))) {
+		int isdirlink = 0;
+		int isfilelink = 0;
+
+		if ((SWAP_BE32(recp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) &&
+		    (SWAP_BE32(recp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator)) {
+			isfilelink = 1;
+		} else if ((recp->hfsPlusFile.flags & kHFSHasLinkChainMask) && 
+		           (SWAP_BE32(recp->hfsPlusFile.userInfo.fdType) == kHFSAliasType) && 
+			   (SWAP_BE32(recp->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator)) {
+			isdirlink = 1;
+		}
+		if ((isfilelink || isdirlink) && !(flags & HFS_LOOKUP_HARDLINK)) {
+			ilink = recp->hfsPlusFile.hl_linkReference;
+			(void) cat_resolvelink(hfsmp, ilink, isdirlink, (struct HFSPlusCatalogFile *)recp);
+		}
+	}
+
+	if (attrp != NULL) { 
+		if (std_hfs == 0) {
+			getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)recp, attrp);
+			if (ilink) {
+				/* Update the inode number for this hard link */
+				attrp->ca_linkref = ilink;
+			}
+			
+			/* 
+			 * Set kHFSHasLinkChainBit for hard links, and reset it for all 
+			 * other items.  Also set linkCount to 1 for regular files.
+			 *
+			 * Due to some bug (rdar://8505977), some regular files can have 
+			 * kHFSHasLinkChainBit set and linkCount more than 1 even if they 
+			 * are not really hard links.  The runtime code should not consider 
+			 * these files has hard links.  Therefore we reset the kHFSHasLinkChainBit 
+			 * and linkCount for regular file before we vend it out.  This might 
+			 * also result in repairing the bad files on disk, if the corresponding 
+			 * file is modified and updated on disk.  
+			 */
+			if (ilink) {
+				/* This is a hard link and the link count bit was not set */
+				if (!(attrp->ca_recflags & kHFSHasLinkChainMask)) {
+					printf ("hfs: set hardlink bit on vol=%s cnid=%u inoid=%u\n", hfsmp->vcbVN, cnid, ilink);
+					attrp->ca_recflags |= kHFSHasLinkChainMask;
+				}
+			} else { 
+				/* Make sure that this non-hard link (regular) record is not 
+				 * an inode record that was looked up and we do not end up 
+				 * reseting the hard link bit on it.
+				 */
+				if ((parentid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && 
+				    (parentid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid)) {
+					/* This is not a hard link or inode and the link count bit was set */
+					if (attrp->ca_recflags & kHFSHasLinkChainMask) {
+						printf ("hfs: clear hardlink bit on vol=%s cnid=%u\n", hfsmp->vcbVN, cnid);
+						attrp->ca_recflags &= ~kHFSHasLinkChainMask;
+					}
+					/* This is a regular file and the link count was more than 1 */
+					if (S_ISREG(attrp->ca_mode) && (attrp->ca_linkcount > 1)) {
+						printf ("hfs: set linkcount=1 on vol=%s cnid=%u old=%u\n", hfsmp->vcbVN, cnid, attrp->ca_linkcount);
+						attrp->ca_linkcount = 1;
+					}
+				}
+			}
+		}
+#if CONFIG_HFS_STD
+		else {
+			struct HFSPlusCatalogFile cnoderec;
+
+			promoteattr(hfsmp, recp, &cnoderec);
+			getbsdattr(hfsmp, &cnoderec, attrp);	
+		}
+#endif
+	}
+	if (forkp != NULL) {
+		if (isadir(recp)) {
+			bzero(forkp, sizeof(*forkp));
+		} 
+#if CONFIG_HFS_STD
+		else if (std_hfs) {
+			promotefork(hfsmp, (HFSCatalogFile *)&recp->hfsFile, wantrsrc, forkp);
+		} 
+#endif
+		else if (wantrsrc) {
+			/* Convert the resource fork. */
+			forkp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize;
+			forkp->cf_new_size = 0;
+			forkp->cf_blocks = recp->hfsPlusFile.resourceFork.totalBlocks;
+			if ((hfsmp->hfc_stage == HFC_RECORDING) &&
+			    (to_bsd_time(recp->hfsPlusFile.accessDate) >= hfsmp->hfc_timebase)) {
+				forkp->cf_bytesread =
+					recp->hfsPlusFile.resourceFork.clumpSize *
+					HFSTOVCB(hfsmp)->blockSize;
+			} else {
+				forkp->cf_bytesread = 0;
+			}
+			forkp->cf_vblocks = 0;
+			bcopy(&recp->hfsPlusFile.resourceFork.extents[0],
+			      &forkp->cf_extents[0], sizeof(HFSPlusExtentRecord));
+		} else {
+			int i;
+			u_int32_t validblks;
+
+			/* Convert the data fork. */
+			forkp->cf_size = recp->hfsPlusFile.dataFork.logicalSize;
+			forkp->cf_new_size = 0;
+			forkp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks;
+			if ((hfsmp->hfc_stage == HFC_RECORDING) &&
+			    (to_bsd_time(recp->hfsPlusFile.accessDate) >= hfsmp->hfc_timebase)) {
+				forkp->cf_bytesread =
+					recp->hfsPlusFile.dataFork.clumpSize *
+					HFSTOVCB(hfsmp)->blockSize;
+			} else {
+				forkp->cf_bytesread = 0;
+			}
+			forkp->cf_vblocks = 0;
+			bcopy(&recp->hfsPlusFile.dataFork.extents[0],
+			      &forkp->cf_extents[0], sizeof(HFSPlusExtentRecord));
+
+			/* Validate the fork's resident extents. */
+			validblks = 0;
+			for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+				if (forkp->cf_extents[i].startBlock + forkp->cf_extents[i].blockCount >= hfsmp->totalBlocks) {
+					/* Suppress any bad extents so a remove can succeed. */
+					forkp->cf_extents[i].startBlock = 0;
+					forkp->cf_extents[i].blockCount = 0;
+					/* Disable writes */
+					if (attrp != NULL) {
+						attrp->ca_mode &= S_IFMT | S_IRUSR | S_IRGRP | S_IROTH;
+					}
+				} else {
+					validblks += forkp->cf_extents[i].blockCount;
+				}
+			}
+			/* Adjust for any missing blocks. */
+			if ((validblks < forkp->cf_blocks) && (forkp->cf_extents[7].blockCount == 0)) {
+				off_t psize;
+
+				/* 
+				 * This is technically a volume corruption. 
+				 * If the total number of blocks calculated by iterating + summing
+				 * the extents in the resident extent records, is less than that 
+				 * which is reported in the catalog entry, we should force a fsck.  
+				 * Only modifying ca_blocks here is not guaranteed to make it out 
+				 * to disk; it is a runtime-only field. 
+				 * 
+				 * Note that we could have gotten into this state if we had invalid ranges 
+				 * that existed in borrowed blocks that somehow made it out to disk. 
+				 * The cnode's on disk block count should never be greater 
+				 * than that which is in its extent records.
+				 */
+
+				(void) hfs_mark_inconsistent (hfsmp, HFS_INCONSISTENCY_DETECTED);
+
+				forkp->cf_blocks = validblks;
+				if (attrp != NULL) {
+					attrp->ca_blocks = validblks + recp->hfsPlusFile.resourceFork.totalBlocks;
+				}
+				psize = (off_t)validblks * (off_t)hfsmp->blockSize;
+				if (psize < forkp->cf_size) {
+					forkp->cf_size = psize;
+				}
+
+			}
+		}
+	}
+	if (descp != NULL) {
+		HFSPlusCatalogKey * pluskey = NULL;
+
+		if (std_hfs == 0) {
+			pluskey = (HFSPlusCatalogKey *)&iterator->key;
+		}
+#if CONFIG_HFS_STD
+		else {
+			pluskey = hfs_malloc(sizeof(HFSPlusCatalogKey));
+			promotekey(hfsmp, (HFSCatalogKey *)&iterator->key, pluskey, &encoding);
+	
+		}
+#endif	
+	
+		builddesc(pluskey, cnid, hint, encoding, isadir(recp), descp);
+
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			hfs_free(pluskey, sizeof(*pluskey));
+		}
+#endif
+
+	}
+
+	if (desc_cnid != NULL) {
+	    *desc_cnid = cnid;
+	}
+exit:
+	hfs_free(iterator, sizeof(*iterator));
+	hfs_free(recp, sizeof(*recp));
+
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_create - create a node in the catalog
+ * using MacRoman encoding
+ *
+ * NOTE: both the catalog file and attribute file locks must
+ *       be held before calling this function.
+ *
+ * The caller is responsible for releasing the output
+ * catalog descriptor (when supplied outdescp is non-null).
+ */
+int
+cat_create(struct hfsmount *hfsmp, cnid_t new_fileid, struct cat_desc *descp, struct cat_attr *attrp,
+	struct cat_desc *out_descp)
+{
+	FCB * fcb;
+	struct btobj * bto;
+	FSBufferDescriptor btdata;
+	u_int32_t datalen;
+	int std_hfs;
+	int result = 0;
+	u_int32_t encoding = kTextEncodingMacRoman;
+	int modeformat;
+
+	modeformat = attrp->ca_mode & S_IFMT;
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+	std_hfs = (hfsmp->hfs_flags & HFS_STANDARD);
+
+	/* The caller is expected to reserve a CNID before calling this function! */
+	
+	/* Get space for iterator, key and data */	
+	bto = hfs_malloc(sizeof(struct btobj));
+	bto->iterator.hint.nodeNum = 0;
+
+	result = buildkey(hfsmp, descp, &bto->key, 0);
+	if (result)
+		goto exit;
+
+	/*
+	 * Insert the thread record first
+	 */
+	if (!std_hfs || (modeformat == S_IFDIR)) {
+		datalen = buildthread((void*)&bto->key, &bto->data, std_hfs,
+				S_ISDIR(attrp->ca_mode));
+		btdata.bufferAddress = &bto->data;
+		btdata.itemSize = datalen;
+		btdata.itemCount = 1;
+		
+		/* Caller asserts the following:
+		 *	1) this CNID is not in use by any orphaned EAs 
+		 *  2) There are no lingering cnodes (removed on-disk but still in-core) with this CNID
+		 *  3) There are no thread or catalog records for this ID 
+		 */		 
+		buildthreadkey(new_fileid, std_hfs, (CatalogKey *) &bto->iterator.key);
+		result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen);
+		if (result) {
+			goto exit;
+		}
+	}
+
+	/*
+	 * Now insert the file/directory record
+	 */
+	buildrecord(attrp, new_fileid, std_hfs, encoding, &bto->data, &datalen);
+	btdata.bufferAddress = &bto->data;
+	btdata.itemSize = datalen;
+	btdata.itemCount = 1;
+	
+	bcopy(&bto->key, &bto->iterator.key, sizeof(bto->key));
+
+	result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen);
+	if (result) {
+		if (result == btExists)
+			result = EEXIST;
+
+		/* Back out the thread record */
+		if (!std_hfs || S_ISDIR(attrp->ca_mode)) {
+			buildthreadkey(new_fileid, std_hfs, (CatalogKey *)&bto->iterator.key);
+			if (BTDeleteRecord(fcb, &bto->iterator)) {
+				/* Error on deleting extra thread record, mark 
+				 * volume inconsistent 
+				 */
+				printf ("hfs: cat_create() failed to delete thread record id=%u on vol=%s\n", new_fileid, hfsmp->vcbVN);
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+			}
+		}
+		goto exit;
+	}
+
+	/*
+	 * Insert was successful, update name, parent and volume
+	 */
+	if (out_descp != NULL) {
+		HFSPlusCatalogKey * pluskey = NULL;
+
+		if (std_hfs == 0) {
+			pluskey = (HFSPlusCatalogKey *)&bto->iterator.key;
+		}
+#if CONFIG_HFS_STD
+		else {
+			pluskey = hfs_malloc(sizeof(HFSPlusCatalogKey));
+			promotekey(hfsmp, (HFSCatalogKey *)&bto->iterator.key, pluskey, &encoding);
+		} 
+#endif
+
+		builddesc(pluskey, new_fileid, bto->iterator.hint.nodeNum,
+			encoding, S_ISDIR(attrp->ca_mode), out_descp);
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			hfs_free(pluskey, sizeof(*pluskey));
+		}
+#endif
+
+	}
+	attrp->ca_fileid = new_fileid;
+
+exit:
+	(void) BTFlushPath(fcb);
+	hfs_free(bto, sizeof(*bto));
+
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cnode_rename - rename a catalog node
+ *
+ * Assumes that the target's directory exists.
+ *
+ * Order of B-tree operations:
+ *	1. BTSearchRecord(from_cnode, &data);
+ *	2. BTInsertRecord(to_cnode, &data);
+ *	3. BTDeleteRecord(from_cnode);
+ *	4. BTDeleteRecord(from_thread);
+ *	5. BTInsertRecord(to_thread);
+ *
+ * Note: The caller is responsible for releasing the output
+ * catalog descriptor (when supplied out_cdp is non-null).
+ */
+int 
+cat_rename (
+	struct hfsmount * hfsmp,
+	struct cat_desc * from_cdp,
+	struct cat_desc * todir_cdp,
+	struct cat_desc * to_cdp,
+	struct cat_desc * out_cdp )
+{
+	struct BTreeIterator * to_iterator = NULL;
+	struct BTreeIterator * from_iterator = NULL;
+	FSBufferDescriptor btdata;
+	CatalogRecord * recp = NULL;
+	HFSPlusCatalogKey * to_key;
+	ExtendedVCB * vcb;
+	FCB * fcb;
+	u_int16_t	datasize;
+	int result = 0;
+	int sourcegone = 0;
+	int skipthread = 0;
+	int directory = from_cdp->cd_flags & CD_ISDIR;
+	int is_dirlink = 0;
+	int std_hfs;
+	u_int32_t encoding = 0;
+
+	vcb = HFSTOVCB(hfsmp);
+	fcb = GetFileControlBlock(vcb->catalogRefNum);
+	std_hfs = (vcb->vcbSigWord == kHFSSigWord);
+
+	if (from_cdp->cd_namelen == 0 || to_cdp->cd_namelen == 0)
+		return (EINVAL);
+
+	from_iterator = hfs_mallocz(sizeof(*from_iterator));
+	if ((result = buildkey(hfsmp, from_cdp, (HFSPlusCatalogKey *)&from_iterator->key, 0)))
+		goto exit;	
+
+	to_iterator = hfs_mallocz(sizeof(*to_iterator));
+	if ((result = buildkey(hfsmp, to_cdp, (HFSPlusCatalogKey *)&to_iterator->key, 0)))
+		goto exit;	
+
+	to_key = (HFSPlusCatalogKey *)&to_iterator->key;
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+
+	/*
+	 * When moving a directory, make sure its a valid move.
+	 */
+	if (directory && (from_cdp->cd_parentcnid != to_cdp->cd_parentcnid)) {
+		struct BTreeIterator *dir_iterator = NULL;
+
+		cnid_t cnid = from_cdp->cd_cnid;
+		cnid_t pathcnid = todir_cdp->cd_parentcnid;
+	
+		/* First check the obvious ones */
+		if (cnid == fsRtDirID  ||
+		    cnid == to_cdp->cd_parentcnid  ||
+		    cnid == pathcnid) {
+			result = EINVAL;
+			goto exit;
+		}
+		/* now allocate the dir_iterator */
+		dir_iterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+		/*
+		 * Traverse destination path all the way back to the root
+		 * making sure that source directory is not encountered.
+		 *
+		 */
+		while (pathcnid > fsRtDirID) {
+			buildthreadkey(pathcnid, std_hfs, (CatalogKey *)&dir_iterator->key);
+			result = BTSearchRecord(fcb, dir_iterator, &btdata, &datasize, NULL);
+			if (result) {
+				hfs_free(dir_iterator, sizeof(*dir_iterator));
+				goto exit;
+			}
+			pathcnid = getparentcnid(recp);
+			if (pathcnid == cnid || pathcnid == 0) {
+				result = EINVAL;
+				hfs_free(dir_iterator, sizeof(*dir_iterator));
+				goto exit;
+			}
+		}
+		hfs_free(dir_iterator, sizeof(*dir_iterator));
+	}
+
+	/*
+	 * Step 1: Find cnode data at old location
+	 */
+	result = BTSearchRecord(fcb, from_iterator, &btdata,
+				&datasize, from_iterator);
+	if (result) {
+		if (std_hfs || (result != btNotFound)) 
+			goto exit;
+
+		struct cat_desc temp_desc;
+
+		/* Probably the node has mangled name */
+		result = cat_lookupmangled(hfsmp, from_cdp, 0, &temp_desc, NULL, NULL); 
+		if (result)
+			goto exit;
+
+		/* The file has mangled name.  Search the cnode data using full name */
+		bzero(from_iterator, sizeof(*from_iterator));
+		result = buildkey(hfsmp, &temp_desc, (HFSPlusCatalogKey *)&from_iterator->key, 0);
+		if (result) {
+			cat_releasedesc(&temp_desc);
+			goto exit;
+		}
+
+		result = BTSearchRecord(fcb, from_iterator, &btdata, &datasize, from_iterator);
+		if (result) {
+			cat_releasedesc(&temp_desc);
+			goto exit;
+		}
+
+		cat_releasedesc(&temp_desc);
+	}
+
+	/* Check if the source is directory hard link.  We do not change 
+	 * directory flag because it is later used to initialize result descp
+	 */
+	if ((!std_hfs) && 
+	    (directory) && 
+	    (recp->recordType == kHFSPlusFileRecord) &&
+	    (recp->hfsPlusFile.flags & kHFSHasLinkChainMask)) {
+	    	is_dirlink  = 1;
+	}
+
+	/*
+	 * Update the text encoding (on disk and in descriptor),
+	 * using hfs_pickencoding to get the new encoding when available.
+	 *
+	 * Note that hardlink inodes don't require a text encoding hint.
+	 */
+	if (!std_hfs &&
+	    todir_cdp->cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid &&
+	    todir_cdp->cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+
+#if TARGET_OS_OSX
+		encoding = hfs_pickencoding(to_key->nodeName.unicode, to_key->nodeName.length);
+#else // !TARGET_OS_OSX
+		encoding = kTextEncodingMacRoman;
+#endif // TARGET_OS_OSX
+
+		hfs_setencodingbits(hfsmp, encoding);
+		recp->hfsPlusFile.textEncoding = encoding;
+		if (out_cdp)
+			out_cdp->cd_encoding = encoding;
+	}
+
+#if CONFIG_HFS_STD
+	if (std_hfs && !directory &&
+	    !(recp->hfsFile.flags & kHFSThreadExistsMask)) {
+		skipthread = 1;
+	}
+#endif
+
+#if 0
+	/*
+	 * If the keys are identical then there's nothing left to do!
+	 *
+	 * update the hint and exit
+	 *
+	 */
+	if (std_hfs && hfskeycompare(to_key, iter->key) == 0)
+		goto exit;	
+	if (!std_hfs && hfspluskeycompare(to_key, iter->key) == 0)
+		goto exit;	
+#endif
+
+	/* Step 2: Insert cnode at new location */
+	result = BTInsertRecord(fcb, to_iterator, &btdata, datasize);
+	if (result == btExists) {
+		int fromtype = recp->recordType;
+		cnid_t cnid = 0;
+
+		if (from_cdp->cd_parentcnid != to_cdp->cd_parentcnid)
+			goto exit; /* EEXIST */
+
+		/* Find cnode data at new location */
+		result = BTSearchRecord(fcb, to_iterator, &btdata, &datasize, NULL);
+		if (result)
+			goto exit;
+		
+		/* Get the CNID after calling searchrecord */
+		cnid  = getcnid (recp);
+		if (cnid == 0) {
+			hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
+			result = EINVAL;
+			goto exit;
+		}
+
+		if ((fromtype != recp->recordType) ||
+		    (from_cdp->cd_cnid != cnid)) {
+			result = EEXIST;
+			goto exit; /* EEXIST */
+		}
+		/* The old name is a case variant and must be removed */
+		result = BTDeleteRecord(fcb, from_iterator);
+		if (result)
+			goto exit;
+
+		/* Insert cnode (now that case duplicate is gone) */
+		result = BTInsertRecord(fcb, to_iterator, &btdata, datasize);
+		if (result) {
+			/* Try and restore original before leaving */
+		    // XXXdbg
+		    #if 1
+		       {
+		       	int err;
+			err = BTInsertRecord(fcb, from_iterator, &btdata, datasize);
+			if (err) {
+				printf("hfs: cat_create: could not undo (BTInsert = %d)\n", err);
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+				result = err;
+				goto exit;
+			}
+		       }
+		    #else
+			(void) BTInsertRecord(fcb, from_iterator, &btdata, datasize);
+		    #endif
+			goto exit;
+		}
+		sourcegone = 1;
+	}
+	if (result)
+		goto exit;
+
+	/* Step 3: Remove cnode from old location */
+	if (!sourcegone) {
+		result = BTDeleteRecord(fcb, from_iterator);
+		if (result) {
+			/* Try and delete new record before leaving */
+		  // XXXdbg
+		  #if 1
+		     {
+		     	int err;
+			err = BTDeleteRecord(fcb, to_iterator);
+			if (err) {
+				printf("hfs: cat_create: could not undo (BTDelete = %d)\n", err);
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+				result = err;
+				goto exit;
+			}
+		     }			
+		  #else
+			(void) BTDeleteRecord(fcb, to_iterator);
+		  #endif
+			goto exit;
+		}
+	}
+
+	/* #### POINT OF NO RETURN #### */
+
+	/*
+	 * Step 4: Remove cnode's old thread record
+	 */
+	buildthreadkey(from_cdp->cd_cnid, std_hfs, (CatalogKey *)&from_iterator->key);
+	(void) BTDeleteRecord(fcb, from_iterator);
+
+	/*
+	 * Step 5: Insert cnode's new thread record
+	 * (optional for HFS files)
+	 */
+	if (!skipthread) {
+		/* For directory hard links, always create a file thread 
+		 * record.  For everything else, use the directory flag.
+		 */
+		if (is_dirlink) {
+			datasize = buildthread(&to_iterator->key, recp, std_hfs, false);
+		} else {
+			datasize = buildthread(&to_iterator->key, recp, std_hfs, directory);
+		}
+		btdata.itemSize = datasize;
+		buildthreadkey(from_cdp->cd_cnid, std_hfs, (CatalogKey *)&from_iterator->key);
+		result = BTInsertRecord(fcb, from_iterator, &btdata, datasize);
+	}
+
+	if (out_cdp) {
+		HFSPlusCatalogKey * pluskey = NULL;
+
+		if (std_hfs == 0) {	
+			pluskey = (HFSPlusCatalogKey *)&to_iterator->key;
+		}
+#if CONFIG_HFS_STD
+		else {
+			pluskey = hfs_malloc(sizeof(HFSPlusCatalogKey));
+			promotekey(hfsmp, (HFSCatalogKey *)&to_iterator->key, pluskey, &encoding);
+
+			/* Save the real encoding hint in the Finder Info (field 4). */
+			if (directory && from_cdp->cd_cnid == kHFSRootFolderID) {
+				u_int32_t realhint;
+
+				realhint = hfs_pickencoding(pluskey->nodeName.unicode, pluskey->nodeName.length);
+				vcb->vcbFndrInfo[4] = SET_HFS_TEXT_ENCODING(realhint);
+			}
+		}
+#endif
+
+		builddesc(pluskey, from_cdp->cd_cnid, to_iterator->hint.nodeNum,
+			encoding, directory, out_cdp);
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			hfs_free(pluskey, sizeof(*pluskey));
+		}
+#endif
+
+	}
+exit:
+	(void) BTFlushPath(fcb);
+	if (from_iterator)
+		hfs_free(from_iterator, sizeof(*from_iterator));
+	if (to_iterator)
+		hfs_free(to_iterator, sizeof(*to_iterator));
+	if (recp)
+		hfs_free(recp, sizeof(*recp));
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_delete - delete a node from the catalog
+ *
+ * Order of B-tree operations:
+ *	1. BTDeleteRecord(cnode);
+ *	2. BTDeleteRecord(thread);
+ *	3. BTUpdateRecord(parent);
+ */
+int
+cat_delete(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp)
+{
+	FCB * fcb;
+	BTreeIterator *iterator;
+	cnid_t cnid;
+	int std_hfs;
+	int result;
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+	std_hfs = (hfsmp->hfs_flags & HFS_STANDARD);
+
+	/* Preflight check:
+	 *
+	 * The root directory cannot be deleted
+	 * A directory must be empty
+	 * A file must be zero length (no blocks)
+	 */
+	if (descp->cd_cnid < kHFSFirstUserCatalogNodeID ||
+	    descp->cd_parentcnid == kHFSRootParentID)
+		return (EINVAL);
+
+	/* XXX Preflight Missing */
+	
+	/* Borrow the btcb iterator since we have an exclusive catalog lock. */	
+	iterator = &((BTreeControlBlockPtr)(fcb->ff_sysfileinfo))->iterator;
+	iterator->hint.nodeNum = 0;
+
+	/*
+	 * Derive a key from either the file ID (for a virtual inode)
+	 * or the descriptor.
+	 */
+	if (descp->cd_namelen == 0) {
+	    result = getkey(hfsmp, attrp->ca_fileid, (CatalogKey *)&iterator->key);
+	    cnid = attrp->ca_fileid;
+	} else {
+		result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0);
+		cnid = descp->cd_cnid;
+	}
+	if (result)
+		goto exit;
+
+	/* Delete record */
+	result = BTDeleteRecord(fcb, iterator);
+	if (result) {
+		if (std_hfs || (result != btNotFound))
+			goto exit;
+
+		struct cat_desc temp_desc;
+		
+		/* Probably the node has mangled name */
+		result = cat_lookupmangled(hfsmp, descp, 0, &temp_desc, attrp, NULL); 
+		if (result) 
+			goto exit;
+		
+		/* The file has mangled name.  Delete the file using full name  */
+		bzero(iterator, sizeof(*iterator));
+		result = buildkey(hfsmp, &temp_desc, (HFSPlusCatalogKey *)&iterator->key, 0);
+		cnid = temp_desc.cd_cnid;
+		if (result) {
+			cat_releasedesc(&temp_desc);
+			goto exit;
+		}
+
+		result = BTDeleteRecord(fcb, iterator);
+		if (result) { 
+			cat_releasedesc(&temp_desc);
+			goto exit;
+		}
+
+		cat_releasedesc(&temp_desc);
+	}
+
+	/* Delete thread record.  On error, mark volume inconsistent */
+	buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key);
+	if (BTDeleteRecord(fcb, iterator)) {
+		if (!std_hfs) {
+			printf ("hfs: cat_delete() failed to delete thread record id=%u on vol=%s\n", cnid, hfsmp->vcbVN);
+			hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
+		}
+	}
+
+exit:
+	(void) BTFlushPath(fcb);
+
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_update_internal - update the catalog node described by descp
+ * using the data from attrp and forkp.  
+ * If update_hardlink is true, the hard link catalog record is updated
+ * and not the inode catalog record. 
+ */
+static int
+cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp,
+	const struct cat_fork *dataforkp, const struct cat_fork *rsrcforkp)
+{
+	FCB * fcb;
+	BTreeIterator * iterator;
+	struct update_state state;
+	int result;
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+
+	state.s_desc = descp;
+	state.s_attr = attrp;
+	state.s_datafork = dataforkp;
+	state.s_rsrcfork = rsrcforkp;
+	state.s_hfsmp = hfsmp;
+
+	/* Borrow the btcb iterator since we have an exclusive catalog lock. */	
+	iterator = &((BTreeControlBlockPtr)(fcb->ff_sysfileinfo))->iterator;
+
+	/*
+	 * For open-deleted files we need to do a lookup by cnid
+	 * (using thread rec).
+	 *
+	 * For hard links and if not requested by caller, the target 
+	 * of the update is the inode itself (not the link record) 
+	 * so a lookup by fileid (i.e. thread rec) is needed.
+	 */
+	if ((update_hardlink == false) && 
+	    ((descp->cd_cnid != attrp->ca_fileid) ||
+	     (descp->cd_namelen == 0) ||
+	     (attrp->ca_recflags & kHFSHasLinkChainMask))) {
+		result = getkey(hfsmp, attrp->ca_fileid, (CatalogKey *)&iterator->key);
+	} else {
+		result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0);
+	}
+	if (result)
+		goto exit;
+
+	/* Pass a node hint */
+	iterator->hint.nodeNum = descp->cd_hint;
+
+	result = BTUpdateRecord(fcb, iterator,
+	                        (IterateCallBackProcPtr)catrec_update, &state);
+	if (result)
+		goto exit;
+
+	/* Update the node hint. */
+	descp->cd_hint = iterator->hint.nodeNum;
+
+exit:
+	(void) BTFlushPath(fcb);
+
+	return MacToVFSError(result);
+}
+
+/*
+ * cat_update - update the catalog node described by descp
+ * using the data from attrp and forkp. 
+ */
+int
+cat_update(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp,
+	const struct cat_fork *dataforkp, const struct cat_fork *rsrcforkp)
+{
+	return cat_update_internal(hfsmp, false, descp, attrp, dataforkp, rsrcforkp);
+}
+
+/*
+ * catrec_update - Update the fields of a catalog record
+ * This is called from within BTUpdateRecord.
+ */
+static int
+catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *state)
+{
+	struct cat_desc *descp;
+	struct cat_attr *attrp;
+	const struct cat_fork *forkp;
+	struct hfsmount *hfsmp;
+	long blksize;
+
+	descp   = state->s_desc;
+	attrp   = state->s_attr;
+	hfsmp   = state->s_hfsmp;
+	blksize = HFSTOVCB(hfsmp)->blockSize;
+
+	switch (crp->recordType) {
+
+#if CONFIG_HFS_STD
+	case kHFSFolderRecord: {
+		HFSCatalogFolder *dir;
+		
+		dir = (struct HFSCatalogFolder *)crp;
+		/* Do a quick sanity check */
+		if ((ckp->hfs.parentID != descp->cd_parentcnid) ||
+		    (dir->folderID != descp->cd_cnid))
+			return (btNotFound);
+		dir->valence    = attrp->ca_entries;
+		dir->createDate = UTCToLocal(to_hfs_time(attrp->ca_itime));
+		dir->modifyDate = UTCToLocal(to_hfs_time(attrp->ca_mtime));
+		dir->backupDate = UTCToLocal(to_hfs_time(attrp->ca_btime));
+		bcopy(&attrp->ca_finderinfo[0], &dir->userInfo, 16);
+		bcopy(&attrp->ca_finderinfo[16], &dir->finderInfo, 16);
+		break;
+	}
+	case kHFSFileRecord: {
+		HFSCatalogFile *file;
+		int i;
+
+		file = (struct HFSCatalogFile *)crp;
+		/* Do a quick sanity check */
+		if ((ckp->hfs.parentID != descp->cd_parentcnid) ||
+		    (file->fileID != attrp->ca_fileid))
+			return (btNotFound);
+		file->createDate = UTCToLocal(to_hfs_time(attrp->ca_itime));
+		file->modifyDate = UTCToLocal(to_hfs_time(attrp->ca_mtime));
+		file->backupDate = UTCToLocal(to_hfs_time(attrp->ca_btime));
+		bcopy(&attrp->ca_finderinfo[0], &file->userInfo, 16);
+		bcopy(&attrp->ca_finderinfo[16], &file->finderInfo, 16);
+		if (state->s_rsrcfork) {
+			forkp = state->s_rsrcfork;
+			file->rsrcLogicalSize  = forkp->cf_size;
+			file->rsrcPhysicalSize = forkp->cf_blocks * blksize;
+			for (i = 0; i < kHFSExtentDensity; ++i) {
+				file->rsrcExtents[i].startBlock =
+					(u_int16_t)forkp->cf_extents[i].startBlock;
+				file->rsrcExtents[i].blockCount =
+					(u_int16_t)forkp->cf_extents[i].blockCount;
+			}
+		}
+		if (state->s_datafork) {
+			forkp = state->s_datafork;
+			file->dataLogicalSize  = forkp->cf_size;
+			file->dataPhysicalSize = forkp->cf_blocks * blksize;
+			for (i = 0; i < kHFSExtentDensity; ++i) {
+				file->dataExtents[i].startBlock =
+					(u_int16_t)forkp->cf_extents[i].startBlock;
+				file->dataExtents[i].blockCount =
+					(u_int16_t)forkp->cf_extents[i].blockCount;
+			}
+		}
+
+		/* Synchronize the lock state */
+		if (attrp->ca_flags & (SF_IMMUTABLE | UF_IMMUTABLE))
+			file->flags |= kHFSFileLockedMask;
+		else
+			file->flags &= ~kHFSFileLockedMask;
+		break;
+	}
+#endif
+	
+	case kHFSPlusFolderRecord: {
+		HFSPlusCatalogFolder *dir;
+		
+		dir = (struct HFSPlusCatalogFolder *)crp;
+		/* Do a quick sanity check */
+		if (dir->folderID != attrp->ca_fileid) {
+			printf("hfs: catrec_update: id %d != %d, vol=%s\n", dir->folderID, attrp->ca_fileid, hfsmp->vcbVN);
+			return (btNotFound);
+		}
+		dir->flags            = attrp->ca_recflags;
+		dir->valence          = attrp->ca_entries;
+		dir->createDate       = to_hfs_time(attrp->ca_itime);
+		dir->contentModDate   = to_hfs_time(attrp->ca_mtime);
+		dir->backupDate       = to_hfs_time(attrp->ca_btime);
+		dir->accessDate       = to_hfs_time(attrp->ca_atime);
+		attrp->ca_atimeondisk = attrp->ca_atime;	
+		dir->attributeModDate = to_hfs_time(attrp->ca_ctime);
+		/* Note: directory hardlink inodes don't require a text encoding hint. */
+		if (ckp->hfsPlus.parentID != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+			dir->textEncoding = descp->cd_encoding;
+		}
+		dir->folderCount      = attrp->ca_dircount;
+		bcopy(&attrp->ca_finderinfo[0], &dir->userInfo, 32);
+		/*
+		 * Update the BSD Info if it was already initialized on
+		 * disk or if the runtime values have been modified.
+		 *
+		 * If the BSD info was already initialized, but
+		 * MNT_UNKNOWNPERMISSIONS is set, then the runtime IDs are
+		 * probably different than what was on disk.  We don't want
+		 * to overwrite the on-disk values (so if we turn off
+		 * MNT_UNKNOWNPERMISSIONS, the old IDs get used again).
+		 * This way, we can still change fields like the mode or
+		 * dates even when MNT_UNKNOWNPERMISSIONS is set.
+		 *
+		 * Note that if MNT_UNKNOWNPERMISSIONS is set, hfs_chown
+		 * won't change the uid or gid from their defaults.  So, if
+		 * the BSD info wasn't set, and the runtime values are not
+		 * default, then what changed was the mode or flags.  We
+		 * have to set the uid and gid to something, so use the
+		 * supplied values (which will be default), which has the
+		 * same effect as creating a new file while
+		 * MNT_UNKNOWNPERMISSIONS is set.
+		 */
+		if ((dir->bsdInfo.fileMode != 0) ||
+		    (attrp->ca_flags != 0) ||
+		    (attrp->ca_uid != hfsmp->hfs_uid) ||
+		    (attrp->ca_gid != hfsmp->hfs_gid) ||
+		    ((attrp->ca_mode & ALLPERMS) !=
+		     (hfsmp->hfs_dir_mask & ACCESSPERMS))) {
+			if ((dir->bsdInfo.fileMode == 0) ||
+			    (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) == 0) {
+				dir->bsdInfo.ownerID = attrp->ca_uid;
+				dir->bsdInfo.groupID = attrp->ca_gid;
+			}
+			dir->bsdInfo.ownerFlags = attrp->ca_flags & 0x000000FF;
+			dir->bsdInfo.adminFlags = attrp->ca_flags >> 16;
+			dir->bsdInfo.fileMode   = attrp->ca_mode;
+			/* A directory hardlink has a link count. */
+			if (attrp->ca_linkcount > 1 || dir->hl_linkCount > 1) {
+				dir->hl_linkCount = attrp->ca_linkcount;
+			}
+		}
+		break;
+	}
+	case kHFSPlusFileRecord: {
+		HFSPlusCatalogFile *file;
+		int is_dirlink; 
+		
+		file = (struct HFSPlusCatalogFile *)crp;
+		/* Do a quick sanity check */
+		if (file->fileID != attrp->ca_fileid)
+			return (btNotFound);
+		file->flags            = attrp->ca_recflags;
+		file->createDate       = to_hfs_time(attrp->ca_itime);
+		file->contentModDate   = to_hfs_time(attrp->ca_mtime);
+		file->backupDate       = to_hfs_time(attrp->ca_btime);
+		file->accessDate       = to_hfs_time(attrp->ca_atime);
+		attrp->ca_atimeondisk  = attrp->ca_atime;	
+		file->attributeModDate = to_hfs_time(attrp->ca_ctime);
+		/*
+		 * Note: file hardlink inodes don't require a text encoding
+		 * hint, but they do have a first link value.
+		 */
+		if (ckp->hfsPlus.parentID == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) {
+			file->hl_firstLinkID = attrp->ca_firstlink;
+		} else {
+			file->textEncoding = descp->cd_encoding;
+		}
+		bcopy(&attrp->ca_finderinfo[0], &file->userInfo, 32);
+		/*
+		 * Update the BSD Info if it was already initialized on
+		 * disk or if the runtime values have been modified.
+		 *
+		 * If the BSD info was already initialized, but
+		 * MNT_UNKNOWNPERMISSIONS is set, then the runtime IDs are
+		 * probably different than what was on disk.  We don't want
+		 * to overwrite the on-disk values (so if we turn off
+		 * MNT_UNKNOWNPERMISSIONS, the old IDs get used again).
+		 * This way, we can still change fields like the mode or
+		 * dates even when MNT_UNKNOWNPERMISSIONS is set.
+		 *
+		 * Note that if MNT_UNKNOWNPERMISSIONS is set, hfs_chown
+		 * won't change the uid or gid from their defaults.  So, if
+		 * the BSD info wasn't set, and the runtime values are not
+		 * default, then what changed was the mode or flags.  We
+		 * have to set the uid and gid to something, so use the
+		 * supplied values (which will be default), which has the
+		 * same effect as creating a new file while
+		 * MNT_UNKNOWNPERMISSIONS is set.
+		 *
+		 * Do not modify bsdInfo for directory hard link records.
+		 * They are set during creation and are not modifiable, so just 
+		 * leave them alone. 
+		 */
+		is_dirlink = (file->flags & kHFSHasLinkChainMask) &&     
+			     (SWAP_BE32(file->userInfo.fdType) == kHFSAliasType) && 
+			     (SWAP_BE32(file->userInfo.fdCreator) == kHFSAliasCreator);
+
+		if (!is_dirlink && 
+		    ((file->bsdInfo.fileMode != 0) ||
+		     (attrp->ca_flags != 0) ||
+		     (attrp->ca_uid != hfsmp->hfs_uid) ||
+		     (attrp->ca_gid != hfsmp->hfs_gid) ||
+		     ((attrp->ca_mode & ALLPERMS) !=
+		      (hfsmp->hfs_file_mask & ACCESSPERMS)))) {
+			if ((file->bsdInfo.fileMode == 0) ||
+			    (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) == 0) {
+				file->bsdInfo.ownerID = attrp->ca_uid;
+				file->bsdInfo.groupID = attrp->ca_gid;
+			}
+			file->bsdInfo.ownerFlags = attrp->ca_flags & 0x000000FF;
+			file->bsdInfo.adminFlags = attrp->ca_flags >> 16;
+			file->bsdInfo.fileMode   = attrp->ca_mode;
+		}
+		if (state->s_rsrcfork) {
+			forkp = state->s_rsrcfork;
+			file->resourceFork.logicalSize = forkp->cf_size;
+			file->resourceFork.totalBlocks = forkp->cf_blocks;
+			bcopy(&forkp->cf_extents[0], &file->resourceFork.extents,
+				sizeof(HFSPlusExtentRecord));
+			/* Push blocks read to disk */
+			file->resourceFork.clumpSize =
+					howmany(forkp->cf_bytesread, blksize);
+		}
+		if (state->s_datafork) {
+			forkp = state->s_datafork;
+			file->dataFork.logicalSize = forkp->cf_size;
+			file->dataFork.totalBlocks = forkp->cf_blocks;
+			bcopy(&forkp->cf_extents[0], &file->dataFork.extents,
+				sizeof(HFSPlusExtentRecord));
+			/* Push blocks read to disk */
+			file->dataFork.clumpSize =
+					howmany(forkp->cf_bytesread, blksize);
+		}
+
+		if ((file->resourceFork.extents[0].startBlock != 0) &&
+		    (file->resourceFork.extents[0].startBlock ==
+		     file->dataFork.extents[0].startBlock)) {
+			panic("hfs: catrec_update: rsrc fork == data fork");
+		}
+
+		/* Synchronize the lock state */
+		if (attrp->ca_flags & (SF_IMMUTABLE | UF_IMMUTABLE))
+			file->flags |= kHFSFileLockedMask;
+		else
+			file->flags &= ~kHFSFileLockedMask;
+
+		/* Push out special field if necessary */
+		if (S_ISBLK(attrp->ca_mode) || S_ISCHR(attrp->ca_mode)) {
+			file->bsdInfo.special.rawDevice = attrp->ca_rdev;
+		} 
+		else {
+			/* 
+			 * Protect against the degenerate case where the descriptor contains the
+			 * raw inode ID in its CNID field.  If the HFSPlusCatalogFile record indicates
+			 * the linkcount was greater than 1 (the default value), then it must have become
+			 * a hardlink.  In this case, update the linkcount from the cat_attr passed in.
+			 */
+			if ((descp->cd_cnid != attrp->ca_fileid) || (attrp->ca_linkcount > 1 ) ||
+				(file->hl_linkCount > 1)) {
+				file->hl_linkCount = attrp->ca_linkcount;
+			}
+		}
+		break;
+	}
+	default:
+		return (btNotFound);
+	}
+	return (0);
+}
+
+/* This function sets kHFSHasChildLinkBit in a directory hierarchy in the 
+ * catalog btree of given cnid by walking up the parent chain till it reaches 
+ * either the root folder, or the private metadata directory for storing 
+ * directory hard links.  This function updates the corresponding in-core 
+ * cnode, if any, and the directory record in the catalog btree.
+ * On success, returns zero.  On failure, returns non-zero value.
+ */
+int 
+cat_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
+{
+	int retval = 0;
+	int lockflags = 0;
+	struct cat_desc desc;
+	struct cat_attr attr;
+
+	while ((cnid != kHFSRootFolderID) && (cnid != kHFSRootParentID) &&
+	       (cnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid)) {
+		/* Update the bit in corresponding cnode, if any, in the hash.
+		 * If the cnode has the bit already set, stop the traversal.
+		 */
+		retval = hfs_chash_set_childlinkbit(hfsmp, cnid);
+		if (retval == 0) {
+			break;
+		}
+
+		/* Update the catalog record on disk if either cnode was not
+		 * found in the hash, or if a cnode was found and the cnode 
+		 * did not have the bit set previously.
+		 */
+		retval = hfs_start_transaction(hfsmp);
+		if (retval) {
+			break;
+		}
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+		/* Look up our catalog folder record */
+		retval = cat_idlookup(hfsmp, cnid, 0, 0, &desc, &attr, NULL);
+		if (retval) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			hfs_end_transaction(hfsmp);
+			break;
+		}
+
+		/* Update the bit in the catalog record */
+		attr.ca_recflags |= kHFSHasChildLinkMask;
+		retval = cat_update(hfsmp, &desc, &attr, NULL, NULL);
+		if (retval) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			hfs_end_transaction(hfsmp);
+			cat_releasedesc(&desc);
+			break;
+		}
+
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		hfs_end_transaction(hfsmp);
+
+		cnid = desc.cd_parentcnid;
+		cat_releasedesc(&desc);
+	}
+
+	return retval;
+}
+
+/* This function traverses the parent directory hierarchy from the given 
+ * directory to one level below root directory and checks if any of its 
+ * ancestors is - 
+ * 	1. A directory hard link.  
+ * 	2. The 'pointed at' directory.
+ * If any of these conditions fail or an internal error is encountered 
+ * during look up of the catalog record, this function returns non-zero value.
+ */
+int
+cat_check_link_ancestry(struct hfsmount *hfsmp, cnid_t cnid, cnid_t pointed_at_cnid)
+{
+	HFSPlusCatalogKey *keyp;
+	BTreeIterator *ip;
+	FSBufferDescriptor btdata;
+	HFSPlusCatalogFolder folder;
+	FCB *fcb;
+	int invalid;
+	int result;
+
+	invalid = 0;
+	BDINIT(btdata, &folder);
+	ip = hfs_malloc(sizeof(*ip));
+	keyp = (HFSPlusCatalogKey *)&ip->key;
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+
+	while (cnid != kHFSRootParentID) {
+		/* Check if the 'pointed at' directory is an ancestor */
+		if (pointed_at_cnid == cnid) {
+			invalid = 1;
+			break;
+		}
+		if ((result = getkey(hfsmp, cnid, (CatalogKey *)keyp))) {
+			printf("hfs: cat_check_link_ancestry: getkey failed id=%u, vol=%s\n", cnid, hfsmp->vcbVN);
+			invalid = 1;  /* On errors, assume an invalid parent */
+			break;
+		}
+		if ((result = BTSearchRecord(fcb, ip, &btdata, NULL, NULL))) {
+			printf("hfs: cat_check_link_ancestry: cannot find id=%u, vol=%s\n", cnid, hfsmp->vcbVN);
+			invalid = 1;  /* On errors, assume an invalid parent */
+			break;
+		}
+		/* Check if this ancestor is a directory hard link */
+		if (folder.flags & kHFSHasLinkChainMask) {
+			invalid = 1;
+			break;
+		}
+		cnid = keyp->parentID;
+	}
+	hfs_free(ip, sizeof(*ip));
+	return (invalid);
+}
+
+
+/*
+ * update_siblinglinks_callback - update a link's chain
+ */
+
+struct linkupdate_state {
+	cnid_t filelinkid;
+	cnid_t prevlinkid;
+	cnid_t nextlinkid;
+};
+
+static int
+update_siblinglinks_callback(__unused const CatalogKey *ckp, CatalogRecord *crp, struct linkupdate_state *state)
+{
+	HFSPlusCatalogFile *file;
+
+	if (crp->recordType != kHFSPlusFileRecord) {
+		printf("hfs: update_siblinglinks_callback: unexpected rec type %d\n", crp->recordType);
+		return (btNotFound);
+	}
+
+	file = (struct HFSPlusCatalogFile *)crp;
+	if (file->flags & kHFSHasLinkChainMask) {
+		if (state->prevlinkid != HFS_IGNORABLE_LINK) {
+			file->hl_prevLinkID = state->prevlinkid;
+		}
+		if (state->nextlinkid != HFS_IGNORABLE_LINK) {
+			file->hl_nextLinkID = state->nextlinkid;
+		}
+	} else {
+		printf("hfs: update_siblinglinks_callback: file %d isn't a chain\n", file->fileID);
+	}
+	return (0);
+}
+
+/*
+ * cat_update_siblinglinks - update a link's chain
+ */
+int
+cat_update_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevlinkid, cnid_t nextlinkid)
+{
+	FCB * fcb;
+	BTreeIterator * iterator;
+	struct linkupdate_state state;
+	int result;
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+	state.filelinkid = linkfileid;
+	state.prevlinkid = prevlinkid;
+	state.nextlinkid = nextlinkid;
+
+	/* Create an iterator for use by us temporarily */
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key);
+	if (result == 0) {
+		result = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr)update_siblinglinks_callback, &state);
+		(void) BTFlushPath(fcb);
+	} else {
+		printf("hfs: cat_update_siblinglinks: couldn't resolve cnid=%d, vol=%s\n", linkfileid, hfsmp->vcbVN);
+	}
+	
+	hfs_free(iterator, sizeof(*iterator));
+	return MacToVFSError(result);
+}
+
+/*
+ * cat_lookuplink - lookup a link by it's name
+ */
+int
+cat_lookuplink(struct hfsmount *hfsmp, struct cat_desc *descp, cnid_t *linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
+{
+	FCB * fcb;
+	BTreeIterator * iterator;
+	struct FSBufferDescriptor btdata;
+	struct HFSPlusCatalogFile file;
+	int result;
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+
+	/* Create an iterator for use by us temporarily */
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	if ((result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0))) {
+		goto exit;
+	}
+	BDINIT(btdata, &file);
+
+	if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) {
+		goto exit;
+	}
+	if (file.recordType != kHFSPlusFileRecord) {
+		result = ENOENT;
+		goto exit;
+	}
+	*linkfileid = file.fileID;
+
+	if (file.flags & kHFSHasLinkChainMask) {
+		*prevlinkid = file.hl_prevLinkID;
+		*nextlinkid = file.hl_nextLinkID;
+	} else {
+		*prevlinkid = 0;
+		*nextlinkid = 0;
+	}
+exit:
+	hfs_free(iterator, sizeof(*iterator));
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_lookup_siblinglinks - lookup previous and next link ID for link using its cnid
+ */
+int
+cat_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
+{
+	FCB * fcb;
+	BTreeIterator * iterator;
+	struct FSBufferDescriptor btdata;
+	struct HFSPlusCatalogFile file;
+	int result;
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+
+	/* Create an iterator for use by us temporarily */
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	if ((result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key))) {
+		goto exit;
+	}
+	BDINIT(btdata, &file);
+
+	if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) {
+		goto exit;
+	}
+	/* The prev/next chain is only valid when kHFSHasLinkChainMask is set. */
+	if (file.flags & kHFSHasLinkChainMask) {
+		cnid_t parent;
+
+		parent = ((HFSPlusCatalogKey *)&iterator->key)->parentID;
+
+		/* directory inodes don't have a chain (its in an EA) */
+		if (parent == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+			result = ENOLINK;  /* signal to caller to get head of list */
+		} else if (parent == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) {
+			*prevlinkid = 0;
+			*nextlinkid = file.hl_firstLinkID;
+		} else {
+			*prevlinkid = file.hl_prevLinkID;
+			*nextlinkid = file.hl_nextLinkID;
+		}
+	} else {
+		*prevlinkid = 0;
+		*nextlinkid = 0;
+	}
+exit:
+	hfs_free(iterator, sizeof(*iterator));		
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_lookup_lastlink - find the last sibling link in the chain (no "next" ptr)
+ */
+int
+cat_lookup_lastlink(struct hfsmount *hfsmp, cnid_t linkfileid, 
+		cnid_t *lastlink, struct cat_desc *cdesc)
+{
+	FCB * fcb;
+	BTreeIterator * iterator;
+	struct FSBufferDescriptor btdata;
+	struct HFSPlusCatalogFile file;
+	int result = 0;
+	int itercount = 0;
+	int foundlast = 0;
+	cnid_t currentlink = linkfileid;
+	
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+	
+	/* Create an iterator for use by us temporarily */
+	iterator = hfs_malloc(sizeof(*iterator));
+
+	while ((foundlast == 0) && (itercount < HFS_LINK_MAX )) {
+		itercount++;
+		bzero(iterator, sizeof(*iterator));
+
+		if ((result = getkey(hfsmp, currentlink, (CatalogKey *)&iterator->key))) {
+			goto exit;
+		}
+		BDINIT(btdata, &file);
+
+		if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) {
+			goto exit;
+		}
+
+		/* The prev/next chain is only valid when kHFSHasLinkChainMask is set. */
+		if (file.flags & kHFSHasLinkChainMask) {
+			cnid_t parent;
+
+			parent = ((HFSPlusCatalogKey *)&iterator->key)->parentID;
+			/* 
+			 * The raw inode for a directory hardlink doesn't have a chain.
+			 * Its link information lives in an EA. 
+			 */
+			if (parent == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+				/* We don't iterate to find the oldest directory hardlink. */
+				result = ENOLINK; 
+				goto exit;
+			} 
+			else if (parent == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) {
+				/* Raw inode for file hardlink (the base inode) */
+				currentlink = file.hl_firstLinkID;
+			
+				/* 
+				 * One minor special-casing here is necessary.
+				 * If our ID brought us to the raw hardlink inode, and it does
+				 * not have any siblings, then it's an open-unlinked file, and we
+				 * should not proceed any further.
+				 */ 
+				if (currentlink == 0) {
+					result = ENOLINK;
+					goto exit;
+				}
+			} 
+			else {
+				/* Otherwise, this item's parent is a legitimate directory in the namespace */
+				if (file.hl_nextLinkID == 0) {
+					/* If nextLinkID is 0, then we found the end; no more hardlinks */
+					foundlast = 1;
+					*lastlink = currentlink;
+					/* 
+					 * Since we had to construct a catalog key to do this lookup
+					 * we still hold it in-hand.  We might as well use it to build 
+					 * the descriptor that the caller asked for.
+					 */
+					builddesc ((HFSPlusCatalogKey*)&iterator->key, currentlink, 0, 0, 0, cdesc);
+					break;
+				}
+
+				currentlink = file.hl_nextLinkID;
+			}
+		} 
+		else {
+			/* Sorry, can't help you without a link chain */
+			result = ENOLINK;
+			goto exit;
+		}
+	}
+exit:
+	/* If we didn't find what we were looking for, zero out the args */
+	if (foundlast == 0) {
+		if (cdesc) {
+			bzero (cdesc, sizeof(struct cat_desc));
+		}
+		if (lastlink) {
+			*lastlink = 0;
+		}
+	}
+
+	hfs_free(iterator, sizeof(*iterator));		
+	return MacToVFSError(result);
+}
+
+
+/*
+ * cat_createlink - create a link in the catalog
+ *
+ * The following cat_attr fields are expected to be set:
+ *	 ca_linkref
+ *	 ca_itime
+ *	 ca_mode (S_IFREG)
+ *	 ca_recflags
+ *	 ca_flags
+ *	 ca_finderinfo (type and creator)
+ */
+int
+cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp,
+               cnid_t nextlinkid, cnid_t *linkfileid)
+{
+	FCB * fcb;
+	struct btobj * bto;
+	FSBufferDescriptor btdata;
+	HFSPlusForkData *rsrcforkp;
+	u_int32_t nextCNID;
+	u_int32_t datalen;
+	int thread_inserted = 0;
+	int alias_allocated = 0;
+	int result = 0;
+	int std_hfs;
+
+	std_hfs = (hfsmp->hfs_flags & HFS_STANDARD);
+
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+
+	/*
+	 * Get the next CNID.  Note that we are currently holding catalog lock.
+	 */
+	result = cat_acquire_cnid(hfsmp, &nextCNID);
+	if (result) {
+		return result;
+	}
+
+	/* Get space for iterator, key and data */	
+	bto = hfs_malloc(sizeof(struct btobj));
+	bto->iterator.hint.nodeNum = 0;
+	rsrcforkp = &bto->data.hfsPlusFile.resourceFork;
+
+	result = buildkey(hfsmp, descp, &bto->key, 0);
+	if (result) {
+		printf("hfs: cat_createlink: err %d from buildkey\n", result);
+		goto exit;
+	}
+
+	/*
+	 * Insert the thread record first.
+	 */
+	datalen = buildthread((void*)&bto->key, &bto->data, 0, 0);
+	btdata.bufferAddress = &bto->data;
+	btdata.itemSize = datalen;
+	btdata.itemCount = 1;
+
+	buildthreadkey(nextCNID, 0, (CatalogKey *) &bto->iterator.key);
+	result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen);
+	if (result) {
+		goto exit;
+	}
+	thread_inserted = 1;
+
+	/*
+	 * Now insert the link record.
+	 */
+	buildrecord(attrp, nextCNID, 0, kTextEncodingMacUnicode, &bto->data, &datalen);
+	
+	bto->data.hfsPlusFile.hl_prevLinkID = 0;
+	bto->data.hfsPlusFile.hl_nextLinkID = nextlinkid;
+	bto->data.hfsPlusFile.hl_linkReference = attrp->ca_linkref;
+
+	/* For directory hard links, create alias in resource fork */
+	if (descp->cd_flags & CD_ISDIR) {
+		if ((result = cat_makealias(hfsmp, attrp->ca_linkref, &bto->data.hfsPlusFile))) {
+			goto exit;
+		}
+		alias_allocated = 1;
+	}
+	btdata.bufferAddress = &bto->data;
+	btdata.itemSize = datalen;
+	btdata.itemCount = 1;
+	
+	bcopy(&bto->key, &bto->iterator.key, sizeof(bto->key));
+
+	result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen);
+	if (result) {
+		if (result == btExists)
+			result = EEXIST;
+		goto exit;
+	}
+	if (linkfileid != NULL) {
+		*linkfileid = nextCNID;
+	}
+exit:	
+	if (result) {
+		if (thread_inserted) {
+			printf("hfs: cat_createlink: BTInsertRecord err=%d, vol=%s\n", MacToVFSError(result), hfsmp->vcbVN);
+
+			buildthreadkey(nextCNID, 0, (CatalogKey *)&bto->iterator.key);
+			if (BTDeleteRecord(fcb, &bto->iterator)) {
+				printf("hfs: cat_createlink() failed to delete thread record on volume %s\n", hfsmp->vcbVN);
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+			}
+		}
+		if (alias_allocated && rsrcforkp->extents[0].startBlock != 0) {
+			(void) BlockDeallocate(hfsmp, rsrcforkp->extents[0].startBlock,
+					       rsrcforkp->extents[0].blockCount, 0);
+			rsrcforkp->extents[0].startBlock = 0;
+			rsrcforkp->extents[0].blockCount = 0;
+		}
+	}
+	(void) BTFlushPath(fcb);
+	hfs_free(bto, sizeof(*bto));
+
+	return MacToVFSError(result);
+}
+
+/* Directory hard links are visible as aliases on pre-Leopard systems and 
+ * as normal directories on Leopard or later.  All directory hard link aliases 
+ * have the same resource fork content except for the three uniquely 
+ * identifying values that are updated in the resource fork data when the alias 
+ * is created.  The following array is the constant resource fork data used 
+ * only for creating directory hard link aliases.
+ */
+static const char hfs_dirlink_alias_rsrc[] = {
+	0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x9e, 0x00, 0x00, 0x00, 0x9e, 0x00, 0x00, 0x00, 0x32, 
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x2b,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+	0x01, 0x00, 0x00, 0x00, 0x01, 0x9e, 0x00, 0x00, 0x00, 0x9e, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x32, 0x00, 0x00, 0x61, 0x6c, 0x69, 0x73,
+	0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+/* Constants for directory hard link alias */
+enum {
+	/* Size of resource fork data array for directory hard link alias */
+	kHFSAliasSize                = 0x1d0,
+
+	/* Volume type for ejectable devices like disk image */
+	kHFSAliasVolTypeEjectable    = 0x5,
+
+	/* Offset for volume create date, in Mac OS local time */
+	kHFSAliasVolCreateDateOffset = 0x12a,
+
+	/* Offset for the type of volume */
+	kHFSAliasVolTypeOffset       = 0x130,
+
+	/* Offset for folder ID of the parent directory of the directory inode */
+	kHFSAliasParentIDOffset      = 0x132,
+
+	/* Offset for folder ID of the directory inode */
+	kHFSAliasTargetIDOffset	     = 0x176,
+};
+
+/* Create and write an alias that points at the directory represented by given
+ * inode number on the same volume.  Directory hard links are visible as 
+ * aliases in pre-Leopard systems and this function creates these aliases.  
+ *
+ * Note: This code is very specific to creating alias for the purpose 
+ * of directory hard links only, and should not be generalized.
+ */
+static int
+cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalogFile *crp)
+{
+	struct buf *bp;
+	daddr64_t blkno;
+	u_int32_t blkcount;
+	int blksize;
+	int sectorsize;
+	int result;
+	HFSPlusForkData *rsrcforkp;
+	char *alias;
+	uint32_t *valptr;
+
+	rsrcforkp = &(crp->resourceFork);
+
+	blksize = hfsmp->blockSize;
+	blkcount = howmany(kHFSAliasSize, blksize);
+	sectorsize = hfsmp->hfs_logical_block_size;
+	bzero(rsrcforkp, sizeof(HFSPlusForkData));
+
+	/* Allocate some disk space for the alias content. */
+	result = BlockAllocate(hfsmp, 0, blkcount, blkcount, 
+			       HFS_ALLOC_FORCECONTIG | HFS_ALLOC_METAZONE, 
+	                       &rsrcforkp->extents[0].startBlock,
+	                       &rsrcforkp->extents[0].blockCount);
+	/* Did it fail with an out of space error? If so, re-try and allow journal flushing. */
+	if (result == dskFulErr ) {	
+		result = BlockAllocate(hfsmp, 0, blkcount, blkcount, 
+			       HFS_ALLOC_FORCECONTIG | HFS_ALLOC_METAZONE | HFS_ALLOC_FLUSHTXN, 
+	                       &rsrcforkp->extents[0].startBlock,
+	                       &rsrcforkp->extents[0].blockCount);
+	}
+	if (result) {
+		rsrcforkp->extents[0].startBlock = 0;
+		goto exit;
+	}
+
+	/* Acquire a buffer cache block for our block. */
+	blkno = ((u_int64_t)rsrcforkp->extents[0].startBlock * (u_int64_t)blksize) / sectorsize;
+	blkno += hfsmp->hfsPlusIOPosOffset / sectorsize;
+
+	bp = buf_getblk(hfsmp->hfs_devvp, blkno, roundup(kHFSAliasSize, hfsmp->hfs_logical_block_size), 0, 0, BLK_META);
+	if (hfsmp->jnl) {
+		journal_modify_block_start(hfsmp->jnl, bp);
+	}
+
+	/* Generate alias content */
+	alias = (char *)buf_dataptr(bp);
+	bzero(alias, buf_size(bp));
+	bcopy(hfs_dirlink_alias_rsrc, alias, kHFSAliasSize);
+
+	/* Set the volume create date, local time in Mac OS format */
+	valptr = (uint32_t *)(alias + kHFSAliasVolCreateDateOffset);
+	*valptr = OSSwapHostToBigInt32(hfsmp->localCreateDate);
+
+	/* If the file system is on a virtual device like disk image, 
+	 * update the volume type to be ejectable device.  
+	 */
+	if (hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) {
+		*(uint16_t *)(alias + kHFSAliasVolTypeOffset) = 
+		OSSwapHostToBigInt16(kHFSAliasVolTypeEjectable);
+	}
+
+	/* Set id of the parent of the target directory */
+	valptr = (uint32_t *)(alias + kHFSAliasParentIDOffset);
+	*valptr = OSSwapHostToBigInt32(hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid);
+
+	/* Set id of the target directory */
+	valptr = (uint32_t *)(alias + kHFSAliasTargetIDOffset);
+	*valptr = OSSwapHostToBigInt32(inode_num);
+
+	/* Write alias content to disk. */
+	if (hfsmp->jnl) {
+		journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+	} else if ((result = buf_bwrite(bp))) {
+		goto exit;
+	}
+
+	/* Finish initializing the fork data. */
+	rsrcforkp->logicalSize = kHFSAliasSize;
+	rsrcforkp->totalBlocks = rsrcforkp->extents[0].blockCount;
+
+exit:
+	if (result && rsrcforkp->extents[0].startBlock != 0) {
+		(void) BlockDeallocate(hfsmp, rsrcforkp->extents[0].startBlock, rsrcforkp->extents[0].blockCount, 0);
+		rsrcforkp->extents[0].startBlock = 0;
+		rsrcforkp->extents[0].blockCount = 0;
+		rsrcforkp->logicalSize = 0;
+		rsrcforkp->totalBlocks = 0;
+	}
+	return (result);
+}
+
+/*
+ * cat_deletelink - delete a link from the catalog
+ */
+int
+cat_deletelink(struct hfsmount *hfsmp, struct cat_desc *descp)
+{
+	struct HFSPlusCatalogFile file;
+	struct cat_attr cattr;
+	uint32_t totalBlocks;
+	int i;
+	int result;
+
+	bzero(&file, sizeof (file));
+	bzero(&cattr, sizeof (cattr));
+	cattr.ca_fileid = descp->cd_cnid;
+
+	/* Directory links have alias content to remove. */
+	if (descp->cd_flags & CD_ISDIR) {
+		FCB * fcb;
+		BTreeIterator * iterator;
+		struct FSBufferDescriptor btdata;
+	
+		fcb = hfsmp->hfs_catalog_cp->c_datafork;
+	
+		/* Borrow the btcb iterator since we have an exclusive catalog lock. */	
+		iterator = &((BTreeControlBlockPtr)(fcb->ff_sysfileinfo))->iterator;
+		iterator->hint.nodeNum = 0;
+	
+		if ((result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0))) {
+			goto exit;
+		}
+		BDINIT(btdata, &file);
+	
+		if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) {
+			goto exit;
+		}
+	}
+
+	result = cat_delete(hfsmp, descp, &cattr);
+	
+	if ((result == 0) && 
+	    (descp->cd_flags & CD_ISDIR) && 
+	    (file.recordType == kHFSPlusFileRecord)) {
+
+		totalBlocks = file.resourceFork.totalBlocks;
+
+		for (i = 0; (i < 8) && (totalBlocks > 0); i++) {
+			if ((file.resourceFork.extents[i].blockCount == 0) &&
+			    (file.resourceFork.extents[i].startBlock == 0)) {
+				break;
+			}
+
+			(void) BlockDeallocate(hfsmp, 
+				file.resourceFork.extents[i].startBlock, 
+				file.resourceFork.extents[i].blockCount, 0);
+
+			totalBlocks -= file.resourceFork.extents[i].blockCount;
+			file.resourceFork.extents[i].startBlock = 0;
+			file.resourceFork.extents[i].blockCount = 0;
+		}
+	}
+exit:	
+	return (result);
+}
+
+
+/*
+ * Callback to collect directory entries.
+ * Called with readattr_state for each item in a directory.
+ */
+struct readattr_state {
+	struct hfsmount *hfsmp;
+	struct cat_entrylist *list;
+	cnid_t	dir_cnid;
+	int stdhfs;
+	int error;
+	int reached_eof;
+};
+
+static int
+getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec,
+             struct readattr_state *state)
+{
+	struct cat_entrylist *list = state->list;
+	struct hfsmount *hfsmp = state->hfsmp;
+	struct cat_entry *cep;
+	cnid_t parentcnid;
+
+	if (list->realentries >= list->maxentries)
+		return (0);  /* stop */
+	
+	parentcnid = state->stdhfs ? key->hfs.parentID : key->hfsPlus.parentID;
+
+	switch(rec->recordType) {
+	case kHFSPlusFolderRecord:
+	case kHFSPlusFileRecord:
+#if CONFIG_HFS_STD
+	case kHFSFolderRecord:
+	case kHFSFileRecord:
+#endif
+		if (parentcnid != state->dir_cnid) {
+			state->error = ENOENT;
+			state->reached_eof = 1;
+			return (0);	/* stop */
+		}
+		break;
+	default:
+		state->error = ENOENT;
+		return (0);	/* stop */
+	}
+
+	/* Hide the private system directories and journal files */
+	if (parentcnid == kHFSRootFolderID) {
+		if (rec->recordType == kHFSPlusFolderRecord) {
+			if (rec->hfsPlusFolder.folderID == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+			    rec->hfsPlusFolder.folderID == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+			    	list->skipentries++;
+				return (1);	/* continue */
+			}
+		}
+		if ((hfsmp->jnl || ((HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) &&
+		    (rec->recordType == kHFSPlusFileRecord) &&
+		    ((rec->hfsPlusFile.fileID == hfsmp->hfs_jnlfileid) ||
+		     (rec->hfsPlusFile.fileID == hfsmp->hfs_jnlinfoblkid))) {
+			list->skipentries++;
+			return (1);	/* continue */
+		}
+	}
+
+	cep = &list->entry[list->realentries++];
+ 
+	if (state->stdhfs == 0) {
+		getbsdattr(hfsmp, (const struct HFSPlusCatalogFile *)rec, &cep->ce_attr);
+		builddesc((const HFSPlusCatalogKey *)key, getcnid(rec), 0, getencoding(rec),
+				isadir(rec), &cep->ce_desc);
+
+		if (rec->recordType == kHFSPlusFileRecord) {
+			cep->ce_datasize = rec->hfsPlusFile.dataFork.logicalSize;
+			cep->ce_datablks = rec->hfsPlusFile.dataFork.totalBlocks;
+			cep->ce_rsrcsize = rec->hfsPlusFile.resourceFork.logicalSize;
+			cep->ce_rsrcblks = rec->hfsPlusFile.resourceFork.totalBlocks;
+
+			/* Save link reference for later processing. */
+			if ((SWAP_BE32(rec->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) &&
+					(SWAP_BE32(rec->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator)) {
+				cep->ce_attr.ca_linkref = rec->hfsPlusFile.bsdInfo.special.iNodeNum;
+			} else if ((rec->hfsPlusFile.flags & kHFSHasLinkChainMask) && 
+					(SWAP_BE32(rec->hfsPlusFile.userInfo.fdType) == kHFSAliasType) && 
+					(SWAP_BE32(rec->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator)) {
+				cep->ce_attr.ca_linkref = rec->hfsPlusFile.bsdInfo.special.iNodeNum;
+			}
+		}
+	}
+#if CONFIG_HFS_STD
+	else {
+		struct HFSPlusCatalogFile cnoderec;
+		HFSPlusCatalogKey * pluskey;
+		u_int32_t encoding;
+
+		promoteattr(hfsmp, rec, &cnoderec);
+		getbsdattr(hfsmp, &cnoderec, &cep->ce_attr);
+
+		pluskey = hfs_malloc(sizeof(HFSPlusCatalogKey));
+		promotekey(hfsmp, (const HFSCatalogKey *)key, pluskey, &encoding);
+		builddesc(pluskey, getcnid(rec), 0, encoding, isadir(rec), &cep->ce_desc);
+		hfs_free(pluskey, sizeof(*pluskey));
+
+		if (rec->recordType == kHFSFileRecord) {
+			int blksize = HFSTOVCB(hfsmp)->blockSize;
+
+			cep->ce_datasize = rec->hfsFile.dataLogicalSize;
+			cep->ce_datablks = rec->hfsFile.dataPhysicalSize / blksize;
+			cep->ce_rsrcsize = rec->hfsFile.rsrcLogicalSize;
+			cep->ce_rsrcblks = rec->hfsFile.rsrcPhysicalSize / blksize;
+		}
+	}
+#endif
+
+	return (list->realentries < list->maxentries);
+}
+
+/*
+ * Pack a cat_entrylist buffer with attributes from the catalog
+ *
+ * Note: index is zero relative
+ */
+int
+cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_entrylist *ce_list, int *reachedeof)
+{
+	FCB* fcb;
+	CatalogKey * key;
+	BTreeIterator * iterator;
+	struct readattr_state state;
+	cnid_t parentcnid;
+	int i;
+	int std_hfs;
+	int index;
+	int have_key;
+	int result = 0;
+	int reached_eof = 0;
+
+	ce_list->realentries = 0;
+
+	fcb = GetFileControlBlock(HFSTOVCB(hfsmp)->catalogRefNum);
+	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
+	parentcnid = dirhint->dh_desc.cd_parentcnid;
+
+	bzero (&state, sizeof(struct readattr_state));
+
+	state.hfsmp = hfsmp;
+	state.list = ce_list;
+	state.dir_cnid = parentcnid;
+	state.stdhfs = std_hfs;
+	state.error = 0;
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+	key = (CatalogKey *)&iterator->key;
+	have_key = 0;
+	iterator->hint.nodeNum = dirhint->dh_desc.cd_hint;
+	index = dirhint->dh_index + 1;
+
+	/*
+	 * Attempt to build a key from cached filename
+	 */
+	if (dirhint->dh_desc.cd_namelen != 0) {
+		if (buildkey(hfsmp, &dirhint->dh_desc, (HFSPlusCatalogKey *)key, 0) == 0) {
+			have_key = 1;
+		}
+	}
+
+	/*
+	 * If the last entry wasn't cached then position the btree iterator
+	 */
+	if ((index == 0) || !have_key) {
+		/*
+		 * Position the iterator at the directory's thread record.
+		 * (i.e. just before the first entry)
+		 */
+		buildthreadkey(dirhint->dh_desc.cd_parentcnid, (hfsmp->hfs_flags & HFS_STANDARD), key);
+		result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
+		if (result) {
+			result = MacToVFSError(result);
+			goto exit;
+		}
+	
+		/*
+		 * Iterate until we reach the entry just
+		 * before the one we want to start with.
+		 */
+		if (index > 0) {
+			struct position_state ps;
+
+			ps.error = 0;
+			ps.count = 0;
+			ps.index = index;
+			ps.parentID = dirhint->dh_desc.cd_parentcnid;
+			ps.hfsmp = hfsmp;
+
+			result = BTIterateRecords(fcb, kBTreeNextRecord, iterator,
+			                          (IterateCallBackProcPtr)cat_findposition, &ps);
+			if (ps.error)
+				result = ps.error;
+			else
+				result = MacToVFSError(result);
+			
+			if (result) {
+				/*
+				* Note: the index may now point to EOF if the directory
+				* was modified in between system calls. We will return
+				* ENOENT from cat_findposition if this is the case, and
+				* when we bail out with an error, our caller (hfs_readdirattr_internal)
+				* will suppress the error and indicate EOF to its caller.
+				*/
+				result = MacToVFSError(result);
+				goto exit;
+			}
+		}
+	}
+
+	/* Fill list with entries starting at iterator->key. */
+	result = BTIterateRecords(fcb, kBTreeNextRecord, iterator,
+			(IterateCallBackProcPtr)getentriesattr_callback, &state);
+
+	if (state.error) {
+		result = state.error;
+		reached_eof = state.reached_eof;
+	}
+	else if (ce_list->realentries == 0) {
+		result = ENOENT;
+		reached_eof = 1;
+	}
+	else {
+		result = MacToVFSError(result);
+	}
+
+	if (std_hfs)
+		goto exit;
+
+	/*
+	 *  Resolve any hard links.
+	 */
+	for (i = 0; i < (int)ce_list->realentries; ++i) {
+		struct FndrFileInfo *fip;
+		struct cat_entry *cep;
+		struct HFSPlusCatalogFile filerec;
+		int isdirlink = 0;
+		int isfilelink = 0;
+
+		cep = &ce_list->entry[i];
+		if (cep->ce_attr.ca_linkref == 0)
+			continue;
+		
+		/* Note: Finder info is still in Big Endian */
+		fip = (struct FndrFileInfo *)&cep->ce_attr.ca_finderinfo;
+
+		if (S_ISREG(cep->ce_attr.ca_mode) &&
+		    (SWAP_BE32(fip->fdType) == kHardLinkFileType) &&
+		    (SWAP_BE32(fip->fdCreator) == kHFSPlusCreator)) {
+			isfilelink = 1;
+		}
+		if (S_ISREG(cep->ce_attr.ca_mode) &&
+		    (SWAP_BE32(fip->fdType) == kHFSAliasType) &&
+		    (SWAP_BE32(fip->fdCreator) == kHFSAliasCreator) &&
+		    (cep->ce_attr.ca_recflags & kHFSHasLinkChainMask)) {
+			isdirlink = 1;
+		}
+		if (isfilelink || isdirlink) {
+			if (cat_resolvelink(hfsmp, cep->ce_attr.ca_linkref, isdirlink, &filerec) != 0)
+				continue;
+			/* Repack entry from inode record. */
+			getbsdattr(hfsmp, &filerec, &cep->ce_attr);		
+			cep->ce_datasize = filerec.dataFork.logicalSize;
+			cep->ce_datablks = filerec.dataFork.totalBlocks;
+			cep->ce_rsrcsize = filerec.resourceFork.logicalSize;
+			cep->ce_rsrcblks = filerec.resourceFork.totalBlocks;
+		}
+	}
+
+exit:
+	hfs_free(iterator, sizeof(*iterator));
+	*reachedeof = reached_eof;
+	return MacToVFSError(result);
+}
+
+#define SMALL_DIRENTRY_SIZE  (int)(sizeof(struct dirent) - (MAXNAMLEN + 1) + 8)
+
+/*
+ * Callback to pack directory entries.
+ * Called with packdirentry_state for each item in a directory.
+ */
+
+/* Hard link information collected during cat_getdirentries. */
+struct linkinfo {
+	u_int32_t       link_ref;
+	user_addr_t  dirent_addr;
+};
+typedef struct linkinfo linkinfo_t;
+
+/* State information for the getdirentries_callback function. */
+struct packdirentry_state {
+	int            cbs_flags;		/* VNODE_READDIR_* flags */
+	u_int32_t      cbs_parentID;
+	u_int32_t      cbs_index;
+	uio_t	       cbs_uio;
+	ExtendedVCB *  cbs_hfsmp;
+	int            cbs_result;
+	int32_t        cbs_nlinks;
+	int32_t        cbs_maxlinks;
+	linkinfo_t *   cbs_linkinfo;
+	struct cat_desc * cbs_desc;
+	u_int8_t        * cbs_namebuf;
+	/*
+	 * The following fields are only used for NFS readdir, which
+	 * uses the next file id as the seek offset of each entry.
+	 */
+	struct direntry * cbs_direntry;
+	struct direntry * cbs_prevdirentry;
+	u_int32_t      cbs_previlinkref;
+	Boolean        cbs_hasprevdirentry;
+	Boolean        cbs_eof;
+};
+
+/*
+ * getdirentries callback for HFS Plus directories.
+ */
+static int
+getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp,
+                 struct packdirentry_state *state)
+{
+	struct hfsmount *hfsmp;
+	const CatalogName *cnp;
+	cnid_t curID;
+	OSErr result;
+	struct dirent catent;
+	struct direntry * entry = NULL;
+	time_t itime;
+	u_int32_t ilinkref = 0;
+	u_int32_t curlinkref = 0;
+	cnid_t  cnid;
+	int hide = 0;
+	u_int8_t type = DT_UNKNOWN;
+	u_int8_t is_mangled = 0;
+	u_int8_t is_link = 0;
+	u_int8_t *nameptr;
+	user_addr_t uiobase = USER_ADDR_NULL;
+	size_t namelen = 0;
+	size_t maxnamelen;
+	size_t uiosize = 0;
+	caddr_t uioaddr;
+	Boolean stop_after_pack = false;
+	
+	hfsmp = state->cbs_hfsmp;
+	curID = ckp->hfsPlus.parentID;
+
+	/* We're done when parent directory changes */
+	if (state->cbs_parentID != curID) {
+		/*
+		 * If the parent ID is different from curID this means we've hit
+		 * the EOF for the directory.  To help future callers, we mark
+		 * the cbs_eof boolean.  However, we should only mark the EOF 
+		 * boolean if we're about to return from this function. 
+		 *
+		 * This is because this callback function does its own uiomove
+		 * to get the data to userspace.  If we set the boolean before determining
+		 * whether or not the current entry has enough room to write its
+		 * data to userland, we could fool the callers of this catalog function
+		 * into thinking they've hit EOF earlier than they really would have.
+		 * In that case, we'd know that we have more entries to process and
+		 * send to userland, but we didn't have enough room.  
+		 * 
+		 * To be safe, we mark cbs_eof here ONLY for the cases where we know we're 
+		 * about to return and won't write any new data back
+		 * to userland.  In the stop_after_pack case, we'll set this boolean
+		 * regardless, so it's slightly safer to let that logic mark the boolean,
+		 * especially since it's closer to the return of this function.
+		 */		 
+			
+		if (state->cbs_flags & VNODE_READDIR_EXTENDED) {
+			/* The last record has not been returned yet, so we 
+			 * want to stop after packing the last item 
+			 */
+			if (state->cbs_hasprevdirentry) { 
+				stop_after_pack = true;
+			} else {
+				state->cbs_eof = true;
+				state->cbs_result = ENOENT;
+				return (0);	/* stop */
+			}				
+		} else {
+			state->cbs_eof = true;
+			state->cbs_result = ENOENT;
+			return (0);	/* stop */
+		}
+	}
+
+	if (state->cbs_flags & VNODE_READDIR_EXTENDED) {
+		entry = state->cbs_direntry;
+		nameptr = (u_int8_t *)&entry->d_name[0];
+		if (state->cbs_flags & VNODE_READDIR_NAMEMAX) {
+			/*
+			 * The NFS server sometimes needs to make filenames fit in
+			 * NAME_MAX bytes (since its client may not be able to
+			 * handle a longer name).  In that case, NFS will ask us
+			 * to mangle the name to keep it short enough.
+			 */
+			maxnamelen = NAME_MAX + 1;
+		} else {
+			maxnamelen = sizeof(entry->d_name);
+		}
+	} else {
+		nameptr = (u_int8_t *)&catent.d_name[0];
+		maxnamelen = sizeof(catent.d_name);
+	}
+
+	if ((state->cbs_flags & VNODE_READDIR_EXTENDED) && stop_after_pack) {
+		/* The last item returns a non-zero invalid cookie */
+		cnid = INT_MAX;		
+	} else {
+		switch(crp->recordType) {
+		case kHFSPlusFolderRecord:
+			type = DT_DIR;
+			cnid = crp->hfsPlusFolder.folderID;
+			/* Hide our private system directories. */
+			if (curID == kHFSRootFolderID) {
+				if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+				    cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+					hide = 1;
+				}
+			}
+			break;
+		case kHFSPlusFileRecord:
+			itime = to_bsd_time(crp->hfsPlusFile.createDate);
+			type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode);
+			cnid = crp->hfsPlusFile.fileID;
+			/*
+			 * When a hardlink link is encountered save its link ref.
+			 */
+			if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) &&
+				(SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) &&
+				((itime == (time_t)hfsmp->hfs_itime) ||
+				 (itime == (time_t)hfsmp->hfs_metadata_createdate))) {
+				/* If link ref is inode's file id then use it directly. */
+				if (crp->hfsPlusFile.flags & kHFSHasLinkChainMask) {
+					cnid = crp->hfsPlusFile.hl_linkReference;
+				} else {
+					ilinkref = crp->hfsPlusFile.hl_linkReference;
+				}
+				is_link =1;
+			} else if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHFSAliasType) &&
+				(SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator) &&
+				(crp->hfsPlusFile.flags & kHFSHasLinkChainMask) &&
+				(crp->hfsPlusFile.hl_linkReference >= kHFSFirstUserCatalogNodeID) &&
+				((itime == (time_t)hfsmp->hfs_itime) ||
+				 (itime == (time_t)hfsmp->hfs_metadata_createdate))) {
+				/* A directory's link resolves to a directory. */
+				type = DT_DIR;
+				/* A directory's link ref is always inode's file id. */
+				cnid = crp->hfsPlusFile.hl_linkReference;
+				is_link = 1;
+			}
+			/* Hide the journal files */
+			if ((curID == kHFSRootFolderID) &&
+				((hfsmp->jnl || ((HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY)))) &&
+				((cnid == hfsmp->hfs_jnlfileid) ||
+				 (cnid == hfsmp->hfs_jnlinfoblkid))) {
+				hide = 1;
+			}
+			break;
+		default:
+			return (0);	/* stop */
+		};
+
+		cnp = (const CatalogName*) &ckp->hfsPlus.nodeName;
+
+		namelen = cnp->ustr.length;
+		/*
+		 * For MacRoman encoded names (textEncoding == 0), assume that it's ascii
+		 * and convert it directly in an attempt to avoid the more
+		 * expensive utf8_encodestr conversion.
+		 */
+		if ((namelen < maxnamelen) && (crp->hfsPlusFile.textEncoding == 0)) {
+			int i;
+			u_int16_t ch;
+			const u_int16_t *chp;
+
+			chp = &cnp->ustr.unicode[0];
+			for (i = 0; i < (int)namelen; ++i) {
+				ch = *chp++;
+				if (ch > 0x007f || ch == 0x0000) {
+					/* Perform expensive utf8_encodestr conversion */
+					goto encodestr;
+				}
+				nameptr[i] = (ch == '/') ? ':' : (u_int8_t)ch;
+			}
+			nameptr[namelen] = '\0';
+			result = 0;
+		} else {
+encodestr:
+			result = utf8_encodestr(cnp->ustr.unicode, namelen * sizeof(UniChar),
+						nameptr, &namelen, maxnamelen, ':', 0);
+		}
+
+		/* Check result returned from encoding the filename to utf8 */
+		if (result == ENAMETOOLONG) {
+			/* 
+			 * If we were looking at a catalog record for a hardlink (not the inode),
+			 * then we want to use its link ID as opposed to the inode ID for 
+			 * a mangled name.  For all other cases, they are the same.  Note that
+			 * due to the way directory hardlinks are implemented, the actual link
+			 * is going to be counted as a file record, so we can catch both
+			 * with is_link.
+			 */
+			cnid_t linkid = cnid;
+			if (is_link) {
+				linkid =  crp->hfsPlusFile.fileID;
+			}
+
+			result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar),
+							     cnp->ustr.unicode, maxnamelen,
+							     (ByteCount*)&namelen, nameptr, linkid);		
+			is_mangled = 1;
+		}
+	}
+
+	if (state->cbs_flags & VNODE_READDIR_EXTENDED) {
+		/*
+		 * The index is 1 relative and includes "." and ".."
+		 *
+		 * Also stuff the cnid in the upper 32 bits of the cookie.
+		 * The cookie is stored to the previous entry, which will
+		 * be packed and copied this time
+		 */
+		state->cbs_prevdirentry->d_seekoff = (state->cbs_index + 3) | ((u_int64_t)cnid << 32);
+		uiosize = state->cbs_prevdirentry->d_reclen;
+		uioaddr = (caddr_t) state->cbs_prevdirentry;
+	} else {
+		catent.d_type = type;
+		catent.d_namlen = namelen;
+		catent.d_reclen = uiosize = STD_DIRENT_LEN(namelen);
+		if (hide)
+			catent.d_fileno = 0;  /* file number = 0 means skip entry */
+		else
+			catent.d_fileno = cnid;
+		uioaddr = (caddr_t) &catent;
+	}
+
+	/* Save current base address for post processing of hard-links. */
+	if (ilinkref || state->cbs_previlinkref) {
+		uiobase = uio_curriovbase(state->cbs_uio);
+	}
+	/* If this entry won't fit then we're done */
+	if ((uiosize > (user_size_t)uio_resid(state->cbs_uio)) ||
+	    (ilinkref != 0 && state->cbs_nlinks == state->cbs_maxlinks)) {
+		return (0);	/* stop */
+	}
+
+	if (!(state->cbs_flags & VNODE_READDIR_EXTENDED) || state->cbs_hasprevdirentry) {
+		state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio);
+		if (state->cbs_result == 0) {
+			++state->cbs_index;
+
+			/* Remember previous entry */
+			state->cbs_desc->cd_cnid = cnid;
+			if (type == DT_DIR) {
+				state->cbs_desc->cd_flags |= CD_ISDIR;
+			} else {
+				state->cbs_desc->cd_flags &= ~CD_ISDIR;
+			}
+			if (state->cbs_desc->cd_nameptr != NULL) {
+				state->cbs_desc->cd_namelen = 0;
+			}
+#if 0
+			state->cbs_desc->cd_encoding = xxxx;
+#endif
+			if (!is_mangled) {
+				state->cbs_desc->cd_namelen = namelen;
+				bcopy(nameptr, state->cbs_namebuf, namelen + 1);
+			} else {
+				/* Store unmangled name for the directory hint else it will 
+				 * restart readdir at the last location again 
+				 */
+				u_int8_t *new_nameptr;
+				size_t bufsize;
+				size_t tmp_namelen = 0;
+			
+				cnp = (const CatalogName *)&ckp->hfsPlus.nodeName;
+				bufsize = 1 + utf8_encodelen(cnp->ustr.unicode,
+				                             cnp->ustr.length * sizeof(UniChar),
+				                             ':', 0);
+				new_nameptr = hfs_malloc(bufsize);
+				result = utf8_encodestr(cnp->ustr.unicode,
+				                        cnp->ustr.length * sizeof(UniChar),
+				                        new_nameptr, &tmp_namelen, bufsize, ':', 0);
+			
+				state->cbs_desc->cd_namelen = tmp_namelen;
+				bcopy(new_nameptr, state->cbs_namebuf, tmp_namelen + 1);
+			
+				hfs_free(new_nameptr, bufsize);
+			} 
+		}
+		if (state->cbs_hasprevdirentry) {
+			curlinkref = ilinkref;               /* save current */
+			ilinkref = state->cbs_previlinkref;  /* use previous */
+		}
+		/*
+		 * Record any hard links for post processing.
+		 */
+		if ((ilinkref != 0) &&
+			(state->cbs_result == 0) &&
+			(state->cbs_nlinks < state->cbs_maxlinks)) {
+			state->cbs_linkinfo[state->cbs_nlinks].dirent_addr = uiobase;
+			state->cbs_linkinfo[state->cbs_nlinks].link_ref = ilinkref;
+			state->cbs_nlinks++;
+		}
+		if (state->cbs_hasprevdirentry) {
+			ilinkref = curlinkref;   /* restore current */
+		}
+	}
+
+	/* Fill the direntry to be used the next time */
+	if (state->cbs_flags & VNODE_READDIR_EXTENDED) {	
+		if (stop_after_pack) {
+			state->cbs_eof = true;
+			return (0);	/* stop */
+		}
+		entry->d_type = type;
+		entry->d_namlen = namelen;
+		entry->d_reclen = EXT_DIRENT_LEN(namelen);
+		if (hide) {
+			/* File number = 0 means skip entry */
+			entry->d_fileno = 0;  
+		} else {
+			entry->d_fileno = cnid;
+		}
+		/* swap the current and previous entry */
+		struct direntry * tmp;
+		tmp = state->cbs_direntry;
+		state->cbs_direntry = state->cbs_prevdirentry;
+		state->cbs_prevdirentry = tmp;
+		state->cbs_hasprevdirentry = true;
+		state->cbs_previlinkref = ilinkref;
+	}
+
+	/* Continue iteration if there's room */
+	return (state->cbs_result == 0  &&
+		uio_resid(state->cbs_uio) >= SMALL_DIRENTRY_SIZE);
+}
+
+#if CONFIG_HFS_STD
+/*
+ * getdirentries callback for standard HFS (non HFS+) directories.
+ */
+static int
+getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp,
+                           struct packdirentry_state *state)
+{
+	struct hfsmount *hfsmp;
+	const CatalogName *cnp;
+	cnid_t curID;
+	OSErr result;
+	struct dirent catent;
+	cnid_t  cnid;
+	u_int8_t type = DT_UNKNOWN;
+	u_int8_t *nameptr;
+	size_t namelen = 0;
+	size_t maxnamelen;
+	size_t uiosize = 0;
+	caddr_t uioaddr;
+	
+	hfsmp = state->cbs_hfsmp;
+
+	curID = ckp->hfs.parentID;
+
+	/* We're done when parent directory changes */
+	if (state->cbs_parentID != curID) {
+		state->cbs_result = ENOENT;
+		return (0);	/* stop */
+	}
+
+	nameptr = (u_int8_t *)&catent.d_name[0];
+	maxnamelen = sizeof(catent.d_name);
+
+	switch(crp->recordType) {
+	case kHFSFolderRecord:
+		type = DT_DIR;
+		cnid = crp->hfsFolder.folderID;
+		break;
+	case kHFSFileRecord:
+		type = DT_REG;
+		cnid = crp->hfsFile.fileID;
+		break;
+	default:
+		return (0);	/* stop */
+	};
+
+	cnp = (const CatalogName*) ckp->hfs.nodeName;
+	result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen, (ByteCount *)&namelen, nameptr);
+	/*
+	 * When an HFS name cannot be encoded with the current
+	 * volume encoding we use MacRoman as a fallback.
+	 */
+	if (result) {
+		result = mac_roman_to_utf8(cnp->pstr, maxnamelen, (ByteCount *)&namelen, nameptr);
+	}
+	catent.d_type = type;
+	catent.d_namlen = namelen;
+	catent.d_reclen = uiosize = STD_DIRENT_LEN(namelen);
+	catent.d_fileno = cnid;
+	uioaddr = (caddr_t) &catent;
+
+	/* If this entry won't fit then we're done */
+	if (uiosize > (user_size_t)uio_resid(state->cbs_uio)) {
+		return (0);	/* stop */
+	}
+
+	state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio);
+	if (state->cbs_result == 0) {
+		++state->cbs_index;
+
+		/* Remember previous entry */
+		state->cbs_desc->cd_cnid = cnid;
+		if (type == DT_DIR) {
+			state->cbs_desc->cd_flags |= CD_ISDIR;
+		} else {
+			state->cbs_desc->cd_flags &= ~CD_ISDIR;
+		}
+		if (state->cbs_desc->cd_nameptr != NULL) {
+			state->cbs_desc->cd_namelen = 0;
+		}
+		state->cbs_desc->cd_namelen = namelen;
+		bcopy(nameptr, state->cbs_namebuf, namelen + 1);
+	}
+
+	/* Continue iteration if there's room */
+	return (state->cbs_result == 0  && uio_resid(state->cbs_uio) >= SMALL_DIRENTRY_SIZE);
+}
+#endif
+
+/*
+ * Pack a uio buffer with directory entries from the catalog
+ */
+int
+cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *dirhint,
+				  uio_t uio, int flags, int * items, int * eofflag)
+{
+	FCB* fcb;
+	BTreeIterator * iterator;
+	CatalogKey * key;
+	struct packdirentry_state state;
+	void * buffer;
+	int bufsize;
+	int maxlinks;
+	int result;
+	int index;
+	int have_key;
+	int extended;
+	
+	extended = flags & VNODE_READDIR_EXTENDED;
+	
+	if (extended && (hfsmp->hfs_flags & HFS_STANDARD)) {
+		return (ENOTSUP);
+	}
+	fcb = hfsmp->hfs_catalog_cp->c_datafork;
+
+	#define MAX_LINKINFO_ENTRIES 275
+	/*
+	 * Get a buffer for link info array, btree iterator and a direntry.
+	 *
+	 * We impose an cap of 275 link entries when trying to compute
+	 * the total number of hardlink entries that we'll allow in the
+	 * linkinfo array, as this has been shown to noticeably impact performance.
+	 *
+	 * Note that in the case where there are very few hardlinks,
+	 * this does not restrict or prevent us from vending out as many entries
+	 * as we can to the uio_resid, because the getdirentries callback
+	 * uiomoves the directory entries to the uio itself and does not use
+	 * this MALLOC'd array. It also limits itself to maxlinks of hardlinks.
+	 */
+
+	// This value cannot underflow: both entrycnt and the rhs are unsigned 32-bit
+	// ints, so the worst-case MIN of them is 0.
+	maxlinks = MIN (entrycnt, (u_int32_t)(uio_resid(uio) / SMALL_DIRENTRY_SIZE));
+	// Prevent overflow.
+	maxlinks = MIN (maxlinks, MAX_LINKINFO_ENTRIES);
+	bufsize = MAXPATHLEN + (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator);	
+	
+	if (extended) {
+		bufsize += 2*sizeof(struct direntry);
+	}
+	buffer = hfs_mallocz(bufsize);
+
+	state.cbs_flags = flags;
+	state.cbs_hasprevdirentry = false;
+	state.cbs_previlinkref = 0;
+	state.cbs_nlinks = 0;
+	state.cbs_maxlinks = maxlinks;
+	state.cbs_linkinfo = (linkinfo_t *)((char *)buffer + MAXPATHLEN);
+	/*
+	 * We need to set cbs_eof to false regardless of whether or not the
+	 * control flow is actually in the extended case, since we use this
+	 * field to track whether or not we've returned EOF from the iterator function.
+	 */
+	state.cbs_eof = false;
+	
+	iterator = (BTreeIterator *) ((char *)state.cbs_linkinfo + (maxlinks * sizeof(linkinfo_t)));
+	key = (CatalogKey *)&iterator->key;
+	have_key = 0;
+	index = dirhint->dh_index + 1;
+	if (extended) {
+		state.cbs_direntry = (struct direntry *)((char *)iterator + sizeof(BTreeIterator));
+		state.cbs_prevdirentry = state.cbs_direntry + 1;
+	}
+	/*
+	 * Attempt to build a key from cached filename
+	 */
+	if (dirhint->dh_desc.cd_namelen != 0) {
+		if (buildkey(hfsmp, &dirhint->dh_desc, (HFSPlusCatalogKey *)key, 0) == 0) {
+			iterator->hint.nodeNum = dirhint->dh_desc.cd_hint;
+			have_key = 1;
+		}
+	}
+
+	if (index == 0 && dirhint->dh_threadhint != 0) {
+		/*
+		 * Position the iterator at the directory's thread record.
+		 * (i.e. just before the first entry)
+		 */
+		buildthreadkey(dirhint->dh_desc.cd_parentcnid, (hfsmp->hfs_flags & HFS_STANDARD), key);
+		iterator->hint.nodeNum = dirhint->dh_threadhint;
+		iterator->hint.index = 0;
+		have_key = 1;
+	}
+
+	/*
+	 * If the last entry wasn't cached then position the btree iterator
+	 */
+	if (!have_key) {
+		/*
+		 * Position the iterator at the directory's thread record.
+		 * (i.e. just before the first entry)
+		 */
+		buildthreadkey(dirhint->dh_desc.cd_parentcnid, (hfsmp->hfs_flags & HFS_STANDARD), key);
+		result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
+		if (result) {
+			result = MacToVFSError(result);
+			goto cleanup;
+		}
+		if (index == 0) {
+			dirhint->dh_threadhint = iterator->hint.nodeNum;
+		}
+		/*
+		 * Iterate until we reach the entry just
+		 * before the one we want to start with.
+		 */
+		if (index > 0) {
+			struct position_state ps;
+
+			ps.error = 0;
+			ps.count = 0;
+			ps.index = index;
+			ps.parentID = dirhint->dh_desc.cd_parentcnid;
+			ps.hfsmp = hfsmp;
+
+			result = BTIterateRecords(fcb, kBTreeNextRecord, iterator,
+			                          (IterateCallBackProcPtr)cat_findposition, &ps);
+			if (ps.error)
+				result = ps.error;
+			else
+				result = MacToVFSError(result);
+			if (result) {
+				result = MacToVFSError(result);
+				if (result == ENOENT) {
+					/*
+					 * ENOENT means we've hit the EOF.
+					 * suppress the error, and set the eof flag.
+					 */
+					result = 0;
+					dirhint->dh_desc.cd_flags |= CD_EOF;
+					*eofflag = 1;
+				}
+				goto cleanup;
+			}
+		}
+	}
+
+	state.cbs_index = index;
+	state.cbs_hfsmp = hfsmp;
+	state.cbs_uio = uio;
+	state.cbs_desc = &dirhint->dh_desc;
+	state.cbs_namebuf = (u_int8_t *)buffer;
+	state.cbs_result = 0;
+	state.cbs_parentID = dirhint->dh_desc.cd_parentcnid;
+
+	/* Use a temporary buffer to hold intermediate descriptor names. */
+	if (dirhint->dh_desc.cd_namelen > 0 && dirhint->dh_desc.cd_nameptr != NULL) {
+		bcopy(dirhint->dh_desc.cd_nameptr, buffer, dirhint->dh_desc.cd_namelen+1);
+		if (dirhint->dh_desc.cd_flags & CD_HASBUF) {
+			dirhint->dh_desc.cd_flags &= ~CD_HASBUF;
+			vfs_removename((const char *)dirhint->dh_desc.cd_nameptr);
+		}
+	}
+	dirhint->dh_desc.cd_nameptr = (u_int8_t *)buffer;
+
+	enum BTreeIterationOperations op;
+	if (extended && index != 0 && have_key)
+		op = kBTreeCurrentRecord;
+	else
+		op = kBTreeNextRecord;
+
+	/*
+	 * Process as many entries as possible starting at iterator->key.
+	 */
+	if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
+		/* HFS+ */
+		result = BTIterateRecords(fcb, op, iterator,
+	                          (IterateCallBackProcPtr)getdirentries_callback, &state);
+	
+		/* For extended calls, every call to getdirentries_callback() 
+		 * transfers the previous directory entry found to the user 
+		 * buffer.  Therefore when BTIterateRecords reaches the end of 
+		 * Catalog BTree, call getdirentries_callback() again with 
+		 * dummy values to copy the last directory entry stored in 
+		 * packdirentry_state 
+		 */
+		if (extended && (result == fsBTRecordNotFoundErr)) {
+			CatalogKey ckp;
+			CatalogRecord crp;
+
+			bzero(&ckp, sizeof(ckp));
+			bzero(&crp, sizeof(crp));
+
+			result = getdirentries_callback(&ckp, &crp, &state);
+		}
+	}
+#if CONFIG_HFS_STD
+	else {
+		/* HFS (standard) */
+		result = BTIterateRecords(fcb, op, iterator,
+	                          (IterateCallBackProcPtr)getdirentries_std_callback, &state);
+	}
+#endif
+
+	/* Note that state.cbs_index is still valid on errors */
+	*items = state.cbs_index - index;
+	index = state.cbs_index;
+
+	/*
+	 * Also note that cbs_eof is set in all cases if we ever hit EOF
+	 * during the enumeration by the catalog callback.  Mark the directory's hint
+	 * descriptor as having hit EOF.
+	 */
+
+	if (state.cbs_eof) {
+		dirhint->dh_desc.cd_flags |= CD_EOF;
+		*eofflag = 1;
+	}
+	
+	/* Finish updating the catalog iterator. */
+	dirhint->dh_desc.cd_hint = iterator->hint.nodeNum;
+	dirhint->dh_desc.cd_flags |= CD_DECOMPOSED;
+	dirhint->dh_index = index - 1;
+	
+	/* Fix up the name. */
+	if (dirhint->dh_desc.cd_namelen > 0) {
+		dirhint->dh_desc.cd_nameptr = (const u_int8_t *)vfs_addname((char *)buffer, dirhint->dh_desc.cd_namelen, 0, 0);
+		dirhint->dh_desc.cd_flags |= CD_HASBUF;
+	} else {
+		dirhint->dh_desc.cd_nameptr = NULL;
+		dirhint->dh_desc.cd_namelen = 0;
+	}
+	
+	/*
+	 * Post process any hard links to get the real file id.
+	 */
+	if (state.cbs_nlinks > 0) {
+		ino_t fileid = 0;
+		user_addr_t address;
+		int i;
+
+		for (i = 0; i < state.cbs_nlinks; ++i) {
+			if (resolvelinkid(hfsmp, state.cbs_linkinfo[i].link_ref, &fileid) != 0)
+				continue;
+			/* This assumes that d_ino is always first field. */
+			address = state.cbs_linkinfo[i].dirent_addr;
+			if (address == (user_addr_t)0)
+				continue;
+			if (uio_isuserspace(uio)) {
+				if (extended) {
+					ino64_t fileid_64 = (ino64_t)fileid;
+					(void) copyout(&fileid_64, address, sizeof(fileid_64));
+				} else {
+					(void) copyout(&fileid, address, sizeof(fileid));
+				}
+			} else /* system space */ {
+				if (extended) {
+					ino64_t fileid_64 = (ino64_t)fileid;
+					bcopy(&fileid_64, (void*) CAST_DOWN(caddr_t, address), sizeof(fileid_64));
+				} else {
+					bcopy(&fileid, (void*) CAST_DOWN(caddr_t, address), sizeof(fileid));
+				}
+			}
+		}
+	}
+
+	if (state.cbs_result)
+		result = state.cbs_result;
+	else
+		result = MacToVFSError(result);
+
+	if (result == ENOENT) {
+		result = 0;
+	}
+
+cleanup:
+	hfs_free(buffer, bufsize);
+
+	return (result);
+}
+
+
+/*
+ * Callback to establish directory position.
+ * Called with position_state for each item in a directory.
+ */
+static int
+cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp,
+                 struct position_state *state)
+{
+	cnid_t curID = 0;
+
+	if ((state->hfsmp->hfs_flags & HFS_STANDARD) == 0) {
+		curID = ckp->hfsPlus.parentID;
+	}
+#if CONFIG_HFS_STD
+	else {
+		curID = ckp->hfs.parentID;
+	}
+#endif
+
+	/* Make sure parent directory didn't change */
+	if (state->parentID != curID) {
+		/*
+		 * The parent ID is different from curID this means we've hit
+		 * the EOF for the directory.
+		 */
+		state->error = ENOENT;
+		return (0);  /* stop */
+	}
+
+	/* Count this entry */
+	switch(crp->recordType) {
+	case kHFSPlusFolderRecord:
+	case kHFSPlusFileRecord:
+#if CONFIG_HFS_STD
+	case kHFSFolderRecord:
+	case kHFSFileRecord:
+#endif
+		++state->count;
+		break;
+	default:
+		printf("hfs: cat_findposition: invalid record type %d in dir %d\n",
+			crp->recordType, curID);
+		state->error = EINVAL;
+		return (0);  /* stop */
+	};
+
+	return (state->count < state->index);
+}
+
+
+/*
+ * cat_binarykeycompare - compare two HFS Plus catalog keys.
+
+ * The name portion of the key is compared using a 16-bit binary comparison. 
+ * This is called from the b-tree code.
+ */
+int
+cat_binarykeycompare(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey)
+{
+	u_int32_t searchParentID, trialParentID;
+	int result;
+
+	searchParentID = searchKey->parentID;
+	trialParentID = trialKey->parentID;
+	result = 0;
+	
+	if (searchParentID > trialParentID) {
+		++result;
+	} else if (searchParentID < trialParentID) {
+		--result;
+	} else {
+		u_int16_t * str1 = &searchKey->nodeName.unicode[0];
+		u_int16_t * str2 = &trialKey->nodeName.unicode[0];
+		int length1 = searchKey->nodeName.length;
+		int length2 = trialKey->nodeName.length;
+
+		result = UnicodeBinaryCompare (str1, length1, str2, length2);
+	}
+
+	return result;
+}
+
+
+#if CONFIG_HFS_STD
+/*
+ * Compare two standard HFS catalog keys
+ *
+ * Result: +n  search key > trial key
+ *          0  search key = trial key
+ *         -n  search key < trial key
+ */
+int
+CompareCatalogKeys(HFSCatalogKey *searchKey, HFSCatalogKey *trialKey)
+{
+	cnid_t searchParentID, trialParentID;
+	int result;
+
+	searchParentID = searchKey->parentID;
+	trialParentID = trialKey->parentID;
+
+	if (searchParentID > trialParentID)
+		result = 1;
+	else if (searchParentID < trialParentID)
+		result = -1;
+	else /* parent dirID's are equal, compare names */
+		result = FastRelString(searchKey->nodeName, trialKey->nodeName);
+
+	return result;
+}
+#endif
+
+
+/*
+ * Compare two HFS+ catalog keys
+ *
+ * Result: +n  search key > trial key
+ *          0  search key = trial key
+ *         -n  search key < trial key
+ */
+int
+CompareExtendedCatalogKeys(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey)
+{
+	cnid_t searchParentID, trialParentID;
+	int result;
+
+	searchParentID = searchKey->parentID;
+	trialParentID = trialKey->parentID;
+	
+	if (searchParentID > trialParentID) {
+		result = 1;
+	}
+	else if (searchParentID < trialParentID) {
+		result = -1;
+	} else {
+		/* parent node ID's are equal, compare names */
+		if ( searchKey->nodeName.length == 0 || trialKey->nodeName.length == 0 )
+			result = searchKey->nodeName.length - trialKey->nodeName.length;
+		else
+			result = FastUnicodeCompare(&searchKey->nodeName.unicode[0],
+			                            searchKey->nodeName.length,
+			                            &trialKey->nodeName.unicode[0],
+			                            trialKey->nodeName.length);
+	}
+
+	return result;
+}
+
+
+/*
+ * buildkey - build a Catalog b-tree key from a cnode descriptor
+ */
+static int
+buildkey(struct hfsmount *hfsmp, struct cat_desc *descp,
+	HFSPlusCatalogKey *key, int retry)
+{
+	int std_hfs = (hfsmp->hfs_flags & HFS_STANDARD);
+	int utf8_flags = UTF_ESCAPE_ILLEGAL;
+	int result = 0;
+	size_t unicodeBytes = 0;
+	
+	if (std_hfs == 0) {
+		retry = 0;
+	}
+
+	if (descp->cd_namelen == 0 || descp->cd_nameptr[0] == '\0')
+		return (EINVAL);  /* invalid name */
+
+	key->parentID = descp->cd_parentcnid;
+	key->nodeName.length = 0;
+	/*
+	 * Convert filename from UTF-8 into Unicode
+	 */
+	
+	if ((descp->cd_flags & CD_DECOMPOSED) == 0)
+		utf8_flags |= UTF_DECOMPOSED;
+	result = utf8_decodestr(descp->cd_nameptr, descp->cd_namelen,
+		key->nodeName.unicode, &unicodeBytes,
+		sizeof(key->nodeName.unicode), ':', utf8_flags);
+	key->nodeName.length = unicodeBytes / sizeof(UniChar);
+	key->keyLength = kHFSPlusCatalogKeyMinimumLength + unicodeBytes;
+	if (result) {
+		if (result != ENAMETOOLONG)
+			result = EINVAL;  /* name has invalid characters */
+		return (result);
+	}
+
+#if CONFIG_HFS_STD
+	/*
+	 * For HFS volumes convert to an HFS compatible key
+	 *
+	 * XXX need to save the encoding that succeeded
+	 */
+	if (std_hfs) {
+		HFSCatalogKey hfskey;
+
+		bzero(&hfskey, sizeof(hfskey));
+		hfskey.keyLength = kHFSCatalogKeyMinimumLength;
+		hfskey.parentID = key->parentID;
+		hfskey.nodeName[0] = 0;
+		if (key->nodeName.length > 0) {
+			int res;
+			if ((res = unicode_to_hfs(HFSTOVCB(hfsmp),
+				key->nodeName.length * 2,
+				key->nodeName.unicode,
+				&hfskey.nodeName[0], retry)) != 0) {
+				if (res != ENAMETOOLONG)
+					res = EINVAL;
+
+				return res;
+			}
+			hfskey.keyLength += hfskey.nodeName[0];
+		}
+		bcopy(&hfskey, key, sizeof(hfskey));
+	}
+#endif
+
+	return (0);
+ }
+
+
+/*
+ * Resolve hard link reference to obtain the inode record.
+ */
+int
+cat_resolvelink(struct hfsmount *hfsmp, u_int32_t linkref, int isdirlink, struct HFSPlusCatalogFile *recp)
+{
+	FSBufferDescriptor btdata;
+	struct BTreeIterator *iterator;
+	struct cat_desc idesc;
+	char inodename[32];
+	cnid_t parentcnid;
+	int result = 0;
+
+	BDINIT(btdata, recp);
+
+	if (isdirlink) {
+		MAKE_DIRINODE_NAME(inodename, sizeof(inodename), (unsigned int)linkref);
+		parentcnid = hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid;
+	} else {
+		MAKE_INODE_NAME(inodename, sizeof(inodename), (unsigned int)linkref);
+		parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+	}
+
+	/* Get space for iterator */	
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	/* Build a descriptor for private dir. */	
+	idesc.cd_parentcnid = parentcnid;
+	idesc.cd_nameptr = (const u_int8_t *)inodename;
+	idesc.cd_namelen = strlen(inodename);
+	idesc.cd_flags = 0;
+	idesc.cd_hint = 0;
+	idesc.cd_encoding = 0;
+	(void) buildkey(hfsmp, &idesc, (HFSPlusCatalogKey *)&iterator->key, 0);
+
+	result = BTSearchRecord(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), iterator,
+				&btdata, NULL, NULL);
+
+	if (result == 0) {
+		/* Make sure there's a reference */
+		if (recp->hl_linkCount == 0)
+			recp->hl_linkCount = 2;
+	} else {
+		printf("hfs: cat_resolvelink: can't find inode=%s on vol=%s\n", inodename, hfsmp->vcbVN);
+	}
+
+	hfs_free(iterator, sizeof(*iterator));
+
+	return (result ? ENOENT : 0);
+}
+
+/*
+ * Resolve hard link reference to obtain the inode number.
+ */
+static int
+resolvelinkid(struct hfsmount *hfsmp, u_int32_t linkref, ino_t *ino)
+{
+	struct HFSPlusCatalogFile record;
+	int error;
+
+	/*
+	 * Since we know resolvelinkid is only called from
+	 * cat_getdirentries, we can assume that only file
+	 * hardlinks need to be resolved (cat_getdirentries
+	 * can resolve directory hardlinks in place).
+	 */
+	error = cat_resolvelink(hfsmp, linkref, 0, &record);
+	if (error == 0) {
+		if (record.fileID == 0)
+			error = ENOENT;
+		else
+			*ino = record.fileID;
+	}
+	return (error);
+}
+
+/*
+ * getkey - get a key from id by doing a thread lookup
+ */
+static int
+getkey(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key)
+{
+	struct BTreeIterator * iterator;
+	FSBufferDescriptor btdata;
+	u_int16_t	datasize;
+	CatalogKey * keyp;
+	CatalogRecord * recp;
+	int result;
+	int std_hfs;
+
+	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+	buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key);
+
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+
+	result = BTSearchRecord(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), iterator,
+				&btdata, &datasize, iterator);
+	if (result)
+		goto exit;
+
+	/* Turn thread record into a cnode key (in place) */
+	switch (recp->recordType) {
+
+#if CONFIG_HFS_STD
+	case kHFSFileThreadRecord:
+	case kHFSFolderThreadRecord:
+		keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6);
+		keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0];
+		bcopy(keyp, key, keyp->hfs.keyLength + 1);
+		break;
+#endif
+
+	case kHFSPlusFileThreadRecord:
+	case kHFSPlusFolderThreadRecord:
+		keyp = (CatalogKey *)&recp->hfsPlusThread.reserved;
+		keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength +
+		                          (keyp->hfsPlus.nodeName.length * 2);
+		bcopy(keyp, key, keyp->hfsPlus.keyLength + 2);
+		break;
+
+	default:
+		result = ENOENT;
+		break;
+	}
+
+exit:
+	hfs_free(iterator, sizeof(*iterator));
+	hfs_free(recp, sizeof(*recp));
+
+	return MacToVFSError(result);
+}
+
+/*
+ * getkeyplusattr - From id, fetch the key and the bsd attrs for a file/dir (could pass
+ * null arguments to cat_idlookup instead, but we save around 10% by not building the 
+ * cat_desc here). Both key and attrp must point to real structures.
+ *
+ * The key's parent id is the only part of the key expected to be used by the caller.
+ * The name portion of the key may not always be valid (ie in the case of a hard link).
+ */
+int
+cat_getkeyplusattr(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key, struct cat_attr *attrp)
+{
+	int result;
+
+	result = getkey(hfsmp, cnid, key);
+       
+	if (result == 0) {
+		result = cat_lookupbykey(hfsmp, key, 0, 0, 0, NULL, attrp, NULL, NULL);
+	}
+	/*
+	 * Check for a raw file hardlink inode.
+	 * Fix up the parent id in the key if necessary.
+	 * Only hard links created by Mac OS X 10.5 or later can be resolved here.
+	 */
+	if ((result == 0) &&
+	    (key->hfsPlus.parentID == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
+	    (attrp->ca_recflags & kHFSHasLinkChainMask)) {
+		cnid_t nextlinkid = 0;
+		cnid_t prevlinkid = 0;
+		struct cat_desc linkdesc;
+
+		/*
+		 * Pick up the first link in the chain and get a descriptor for it.
+		 * This allows blind bulk access checks to work for hardlinks.
+		 */
+		if ((cat_lookup_siblinglinks(hfsmp, cnid, &prevlinkid,  &nextlinkid) == 0) &&
+		    (nextlinkid != 0)) {
+			if (cat_findname(hfsmp, nextlinkid, &linkdesc) == 0) {
+				key->hfsPlus.parentID = linkdesc.cd_parentcnid;
+				cat_releasedesc(&linkdesc);
+			}
+		}	
+	}
+	return MacToVFSError(result);
+}
+
+
+/*
+ * buildrecord - build a default catalog directory or file record
+ */
+static void
+buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding,
+            CatalogRecord *crp, u_int32_t *recordSize)
+{
+	int type = attrp->ca_mode & S_IFMT;
+	u_int32_t createtime = to_hfs_time(attrp->ca_itime);
+
+	if (std_hfs == 0) {
+		struct HFSPlusBSDInfo * bsdp = NULL;
+
+		if (type == S_IFDIR) {
+			crp->recordType = kHFSPlusFolderRecord;
+			crp->hfsPlusFolder.flags = attrp->ca_recflags;
+			crp->hfsPlusFolder.valence = 0;
+			crp->hfsPlusFolder.folderID = cnid;	
+			crp->hfsPlusFolder.createDate = createtime;
+			crp->hfsPlusFolder.contentModDate = createtime;
+			crp->hfsPlusFolder.attributeModDate = createtime;
+			crp->hfsPlusFolder.accessDate = createtime;
+			crp->hfsPlusFolder.backupDate = 0;
+			crp->hfsPlusFolder.textEncoding = encoding;
+			crp->hfsPlusFolder.folderCount = 0;
+			bcopy(attrp->ca_finderinfo, &crp->hfsPlusFolder.userInfo, 32);
+			bsdp = &crp->hfsPlusFolder.bsdInfo;
+			bsdp->special.linkCount = 1;
+			*recordSize = sizeof(HFSPlusCatalogFolder);
+		} else {
+			crp->recordType = kHFSPlusFileRecord;
+			crp->hfsPlusFile.flags = attrp->ca_recflags;
+			crp->hfsPlusFile.reserved1 = 0;
+			crp->hfsPlusFile.fileID = cnid;
+			crp->hfsPlusFile.createDate = createtime;
+			crp->hfsPlusFile.contentModDate = createtime;
+			crp->hfsPlusFile.accessDate = createtime;
+			crp->hfsPlusFile.attributeModDate = createtime;
+			crp->hfsPlusFile.backupDate = 0;
+			crp->hfsPlusFile.textEncoding = encoding;
+			crp->hfsPlusFile.reserved2 = 0;
+			bcopy(attrp->ca_finderinfo, &crp->hfsPlusFile.userInfo, 32);
+			bsdp = &crp->hfsPlusFile.bsdInfo;
+			/* BLK/CHR need to save the device info */
+			if (type == S_IFBLK || type == S_IFCHR) {
+				bsdp->special.rawDevice = attrp->ca_rdev;
+			} else {
+				bsdp->special.linkCount = 1;
+			}
+			bzero(&crp->hfsPlusFile.dataFork, 2*sizeof(HFSPlusForkData));
+			*recordSize = sizeof(HFSPlusCatalogFile);
+		}
+		bsdp->ownerID    = attrp->ca_uid;
+		bsdp->groupID    = attrp->ca_gid;
+		bsdp->fileMode   = attrp->ca_mode;
+		bsdp->adminFlags = attrp->ca_flags >> 16;
+		bsdp->ownerFlags = attrp->ca_flags & 0x000000FF;
+	}
+#if CONFIG_HFS_STD
+	else {
+		createtime = UTCToLocal(createtime);
+		if (type == S_IFDIR) {
+			bzero(crp, sizeof(HFSCatalogFolder));
+			crp->recordType = kHFSFolderRecord;
+			crp->hfsFolder.folderID = cnid;
+			crp->hfsFolder.createDate = createtime;
+			crp->hfsFolder.modifyDate = createtime;
+			bcopy(attrp->ca_finderinfo, &crp->hfsFolder.userInfo, 32);
+			*recordSize = sizeof(HFSCatalogFolder);
+		} else {
+			bzero(crp, sizeof(HFSCatalogFile));
+			crp->recordType = kHFSFileRecord;
+			crp->hfsFile.fileID = cnid;
+			crp->hfsFile.createDate = createtime;
+			crp->hfsFile.modifyDate = createtime;
+			bcopy(attrp->ca_finderinfo, &crp->hfsFile.userInfo, 16);
+			bcopy(&attrp->ca_finderinfo[16], &crp->hfsFile.finderInfo, 16);
+			*recordSize = sizeof(HFSCatalogFile);
+		}
+	}
+#endif
+
+}
+
+
+/*
+ * builddesc - build a cnode descriptor from an HFS+ key
+ */
+static int
+builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_int32_t hint, u_int32_t encoding,
+	int isdir, struct cat_desc *descp)
+{
+	int result = 0;
+	unsigned char * nameptr;
+	size_t bufsize;
+	size_t utf8len;
+	unsigned char tmpbuff[128];
+
+	/* guess a size... */
+	bufsize = (3 * key->nodeName.length) + 1;
+	if (bufsize >= sizeof(tmpbuff) - 1) {
+	    nameptr = hfs_malloc(bufsize);
+	} else {
+	    nameptr = &tmpbuff[0];
+	}
+
+	result = utf8_encodestr(key->nodeName.unicode,
+			key->nodeName.length * sizeof(UniChar),
+			nameptr, (size_t *)&utf8len,
+			bufsize, ':', 0);
+
+	if (result == ENAMETOOLONG) {
+		if (nameptr != &tmpbuff[0])
+			hfs_free(nameptr, bufsize);
+		bufsize = 1 + utf8_encodelen(key->nodeName.unicode,
+		                             key->nodeName.length * sizeof(UniChar),
+		                             ':', 0);
+		nameptr = hfs_malloc(bufsize);
+
+		result = utf8_encodestr(key->nodeName.unicode,
+		                        key->nodeName.length * sizeof(UniChar),
+		                        nameptr, (size_t *)&utf8len,
+		                        bufsize, ':', 0);
+	}
+	descp->cd_parentcnid = key->parentID;
+	descp->cd_nameptr = (const u_int8_t *)vfs_addname((char *)nameptr, utf8len, 0, 0);
+	descp->cd_namelen = utf8len;
+	descp->cd_cnid = cnid;
+	descp->cd_hint = hint;
+	descp->cd_flags = CD_DECOMPOSED | CD_HASBUF;
+	if (isdir)
+		descp->cd_flags |= CD_ISDIR;	
+	descp->cd_encoding = encoding;
+	if (nameptr != &tmpbuff[0]) {
+	    hfs_free(nameptr, bufsize);
+	}
+	return result;
+}
+
+
+/*
+ * getbsdattr - get attributes in bsd format
+ *
+ */
+static void
+getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct cat_attr * attrp)
+{
+	int isDirectory = (crp->recordType == kHFSPlusFolderRecord);
+	const struct HFSPlusBSDInfo *bsd = &crp->bsdInfo;
+
+	attrp->ca_recflags = crp->flags;
+	attrp->ca_atime = to_bsd_time(crp->accessDate);
+	attrp->ca_atimeondisk = attrp->ca_atime;	
+	attrp->ca_mtime = to_bsd_time(crp->contentModDate);
+	attrp->ca_ctime = to_bsd_time(crp->attributeModDate);
+	attrp->ca_itime = to_bsd_time(crp->createDate);
+	attrp->ca_btime = to_bsd_time(crp->backupDate);
+
+	if ((bsd->fileMode & S_IFMT) == 0) {
+		attrp->ca_flags = 0;
+		attrp->ca_uid = hfsmp->hfs_uid;
+		attrp->ca_gid = hfsmp->hfs_gid;
+		if (isDirectory) {
+			attrp->ca_mode = S_IFDIR | (hfsmp->hfs_dir_mask & ACCESSPERMS);
+		} else {
+			attrp->ca_mode = S_IFREG | (hfsmp->hfs_file_mask & ACCESSPERMS);
+		}
+		attrp->ca_linkcount = 1;
+		attrp->ca_rdev = 0;
+	} else {
+		attrp->ca_linkcount = 1;  /* may be overridden below */
+		attrp->ca_rdev = 0;
+		attrp->ca_uid = bsd->ownerID;
+		attrp->ca_gid = bsd->groupID;
+		attrp->ca_flags = bsd->ownerFlags | (bsd->adminFlags << 16);
+		attrp->ca_mode = (mode_t)bsd->fileMode;
+		switch (attrp->ca_mode & S_IFMT) {
+		case S_IFCHR: /* fall through */
+		case S_IFBLK:
+			attrp->ca_rdev = bsd->special.rawDevice;
+			break;
+		case S_IFIFO:
+		case S_IFSOCK:
+		case S_IFDIR:
+		case S_IFREG:
+			/* Pick up the hard link count */
+			if (bsd->special.linkCount > 0)
+				attrp->ca_linkcount = bsd->special.linkCount;
+			break;
+		}
+
+		/*
+		 *  Override the permissions as determined by the mount auguments
+		 *  in ALMOST the same way unset permissions are treated but keep
+		 *  track of whether or not the file or folder is hfs locked
+		 *  by leaving the h_pflags field unchanged from what was unpacked
+		 *  out of the catalog.
+		 */
+		/*
+		 * This code was used to do UID translation with MNT_IGNORE_OWNERS
+		 * (aka MNT_UNKNOWNPERMISSIONS) at the HFS layer.  It's largely done
+		 * at the VFS layer, so there is no need to do it here now; this also
+		 * allows VFS to let root see the real UIDs.
+		 *
+		 * if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) {
+		 * 	attrp->ca_uid = hfsmp->hfs_uid;
+		 * 	attrp->ca_gid = hfsmp->hfs_gid;
+		 * }
+		 */
+	}
+
+	if (isDirectory) {
+		if (!S_ISDIR(attrp->ca_mode)) {
+			attrp->ca_mode &= ~S_IFMT;
+			attrp->ca_mode |= S_IFDIR;
+		}
+		attrp->ca_entries = ((const HFSPlusCatalogFolder *)crp)->valence;
+		attrp->ca_dircount = ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) && (attrp->ca_recflags & kHFSHasFolderCountMask)) ?
+					     ((const HFSPlusCatalogFolder *)crp)->folderCount : 0;
+			
+		/* Keep UF_HIDDEN bit in sync with Finder Info's invisible bit */
+		if (((const HFSPlusCatalogFolder *)crp)->userInfo.frFlags & OSSwapHostToBigConstInt16(kFinderInvisibleMask))
+			attrp->ca_flags |= UF_HIDDEN;
+	} else {
+		/* Keep IMMUTABLE bits in sync with HFS locked flag */
+		if (crp->flags & kHFSFileLockedMask) {
+			/* The file's supposed to be locked:
+			   Make sure at least one of the IMMUTABLE bits is set: */
+			if ((attrp->ca_flags & (SF_IMMUTABLE | UF_IMMUTABLE)) == 0)
+				attrp->ca_flags |= UF_IMMUTABLE;
+		} else {
+			/* The file's supposed to be unlocked: */
+			attrp->ca_flags &= ~(SF_IMMUTABLE | UF_IMMUTABLE);
+		}
+		/* Keep UF_HIDDEN bit in sync with Finder Info's invisible bit */
+		if (crp->userInfo.fdFlags & OSSwapHostToBigConstInt16(kFinderInvisibleMask))
+			attrp->ca_flags |= UF_HIDDEN;
+		/* get total blocks (both forks) */
+		attrp->ca_blocks = crp->dataFork.totalBlocks + crp->resourceFork.totalBlocks;
+
+		/* On HFS+ the ThreadExists flag must always be set. */
+		if ((hfsmp->hfs_flags & HFS_STANDARD) == 0)
+			attrp->ca_recflags |= kHFSThreadExistsMask;
+
+		/* Pick up the hardlink first link, if any. */
+		attrp->ca_firstlink = (attrp->ca_recflags & kHFSHasLinkChainMask) ? crp->hl_firstLinkID : 0;
+	}
+	
+	attrp->ca_fileid = crp->fileID;
+
+	bcopy(&crp->userInfo, attrp->ca_finderinfo, 32);
+}
+
+#if CONFIG_HFS_STD
+/*
+ * promotekey - promote hfs key to hfs plus key
+ *
+ */
+static void
+promotekey(struct hfsmount *hfsmp, const HFSCatalogKey *hfskey,
+           HFSPlusCatalogKey *keyp, u_int32_t *encoding)
+{
+	hfs_to_unicode_func_t hfs_get_unicode = hfsmp->hfs_get_unicode;
+	u_int32_t uniCount;
+	int error;
+
+	*encoding = hfsmp->hfs_encoding;
+
+	error = hfs_get_unicode(hfskey->nodeName, keyp->nodeName.unicode,
+	                        kHFSPlusMaxFileNameChars, &uniCount);
+	/*
+	 * When an HFS name cannot be encoded with the current
+	 * encoding use MacRoman as a fallback.
+	 */
+	if (error && hfsmp->hfs_encoding != kTextEncodingMacRoman) {
+		*encoding = 0;
+		(void) mac_roman_to_unicode(hfskey->nodeName,
+		                            keyp->nodeName.unicode,
+		                            kHFSPlusMaxFileNameChars,
+		                            &uniCount);
+	}
+
+	keyp->nodeName.length = uniCount;
+	keyp->parentID = hfskey->parentID;
+}
+
+/*
+ * promotefork - promote hfs fork info to hfs plus
+ *
+ */
+static void
+promotefork(struct hfsmount *hfsmp, const struct HFSCatalogFile *filep,
+            int resource, struct cat_fork * forkp)
+{
+	struct HFSPlusExtentDescriptor *xp;
+	u_int32_t blocksize = HFSTOVCB(hfsmp)->blockSize;
+
+	bzero(forkp, sizeof(*forkp));
+	xp = &forkp->cf_extents[0];
+	if (resource) {
+		forkp->cf_size = filep->rsrcLogicalSize;
+		forkp->cf_blocks = filep->rsrcPhysicalSize / blocksize;
+		forkp->cf_bytesread = 0;
+		forkp->cf_vblocks = 0;
+		xp[0].startBlock = (u_int32_t)filep->rsrcExtents[0].startBlock;
+		xp[0].blockCount = (u_int32_t)filep->rsrcExtents[0].blockCount;
+		xp[1].startBlock = (u_int32_t)filep->rsrcExtents[1].startBlock;
+		xp[1].blockCount = (u_int32_t)filep->rsrcExtents[1].blockCount;
+		xp[2].startBlock = (u_int32_t)filep->rsrcExtents[2].startBlock;
+		xp[2].blockCount = (u_int32_t)filep->rsrcExtents[2].blockCount;
+	} else {
+		forkp->cf_size = filep->dataLogicalSize;
+		forkp->cf_blocks = filep->dataPhysicalSize / blocksize;
+		forkp->cf_bytesread = 0;
+		forkp->cf_vblocks = 0;
+		xp[0].startBlock = (u_int32_t)filep->dataExtents[0].startBlock;
+		xp[0].blockCount = (u_int32_t)filep->dataExtents[0].blockCount;
+		xp[1].startBlock = (u_int32_t)filep->dataExtents[1].startBlock;
+		xp[1].blockCount = (u_int32_t)filep->dataExtents[1].blockCount;
+		xp[2].startBlock = (u_int32_t)filep->dataExtents[2].startBlock;
+		xp[2].blockCount = (u_int32_t)filep->dataExtents[2].blockCount;
+	}
+}
+
+/*
+ * promoteattr - promote standard hfs catalog attributes to hfs plus
+ *
+ */
+static void
+promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlusCatalogFile *crp)
+{
+	u_int32_t blocksize = HFSTOVCB(hfsmp)->blockSize;
+
+	if (dataPtr->recordType == kHFSFolderRecord) {
+		const struct HFSCatalogFolder * folder;
+
+		folder = (const struct HFSCatalogFolder *) dataPtr;
+		crp->recordType       = kHFSPlusFolderRecord;
+		crp->flags            = folder->flags;
+		crp->fileID           = folder->folderID;
+		crp->createDate       = LocalToUTC(folder->createDate);
+		crp->contentModDate   = LocalToUTC(folder->modifyDate);
+		crp->backupDate       = LocalToUTC(folder->backupDate);
+		crp->reserved1        = folder->valence;
+		crp->reserved2        = 0;
+		bcopy(&folder->userInfo, &crp->userInfo, 32);
+	} else /* file */ {
+		const struct HFSCatalogFile * file;
+
+		file = (const struct HFSCatalogFile *) dataPtr;
+		crp->recordType       = kHFSPlusFileRecord;
+		crp->flags            = file->flags;
+		crp->fileID           = file->fileID;
+		crp->createDate       = LocalToUTC(file->createDate);
+		crp->contentModDate   = LocalToUTC(file->modifyDate);
+		crp->backupDate       = LocalToUTC(file->backupDate);
+		crp->reserved1        = 0;
+		crp->reserved2        = 0;
+		bcopy(&file->userInfo, &crp->userInfo, 16);
+		bcopy(&file->finderInfo, &crp->finderInfo, 16);
+		crp->dataFork.totalBlocks = file->dataPhysicalSize / blocksize;
+		crp->resourceFork.totalBlocks = file->rsrcPhysicalSize / blocksize;
+	}
+	crp->textEncoding = 0;
+	crp->attributeModDate = crp->contentModDate;
+	crp->accessDate = crp->contentModDate;
+	bzero(&crp->bsdInfo, sizeof(HFSPlusBSDInfo));
+}
+#endif
+
+/*
+ * Build a catalog node thread record from a catalog key
+ * and return the size of the record.
+ */
+static int
+buildthread(void *keyp, void *recp, int std_hfs, int directory)
+{
+	int size = 0;
+
+	if (std_hfs == 0) {
+		HFSPlusCatalogKey *key = (HFSPlusCatalogKey *)keyp;
+		HFSPlusCatalogThread *rec = (HFSPlusCatalogThread *)recp;
+
+		size = sizeof(HFSPlusCatalogThread);
+		if (directory)
+			rec->recordType = kHFSPlusFolderThreadRecord;
+		else
+			rec->recordType = kHFSPlusFileThreadRecord;
+		rec->reserved = 0;
+		rec->parentID = key->parentID;			
+		bcopy(&key->nodeName, &rec->nodeName,
+			sizeof(UniChar) * (key->nodeName.length + 1));
+
+		/* HFS Plus has variable sized thread records */
+		size -= (sizeof(rec->nodeName.unicode) -
+			  (rec->nodeName.length * sizeof(UniChar)));
+
+	}
+#if CONFIG_HFS_STD
+	else {
+		HFSCatalogKey *key = (HFSCatalogKey *)keyp;
+		HFSCatalogThread *rec = (HFSCatalogThread *)recp;
+
+		size = sizeof(HFSCatalogThread);
+		bzero(rec, size);
+		if (directory)
+			rec->recordType = kHFSFolderThreadRecord;
+		else
+			rec->recordType = kHFSFileThreadRecord;
+		rec->parentID = key->parentID;
+		bcopy(key->nodeName, rec->nodeName, key->nodeName[0]+1);
+
+	} 
+#endif
+
+	return (size);
+}
+
+/*
+ * Build a catalog node thread key.
+ */
+static void
+buildthreadkey(HFSCatalogNodeID parentID, int std_hfs, CatalogKey *key)
+{
+	if (std_hfs == 0) {
+		key->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength;
+		key->hfsPlus.parentID = parentID;
+		key->hfsPlus.nodeName.length = 0;
+	}
+#if CONFIG_HFS_STD
+	else {
+		key->hfs.keyLength = kHFSCatalogKeyMinimumLength;
+		key->hfs.reserved = 0;
+		key->hfs.parentID = parentID;
+		key->hfs.nodeName[0] = 0;
+	}
+#endif
+
+}
+
+/*
+ * Extract the text encoding from a catalog node record.
+ */
+static u_int32_t 
+getencoding(const CatalogRecord *crp)
+{
+	u_int32_t encoding;
+
+	if (crp->recordType == kHFSPlusFolderRecord)
+		encoding = crp->hfsPlusFolder.textEncoding;
+	else if (crp->recordType == kHFSPlusFileRecord)
+		encoding = crp->hfsPlusFile.textEncoding;
+	else
+		encoding = 0;
+
+	return (encoding);
+}
+
+/*
+ * Extract the CNID from a catalog node record.
+ */
+static cnid_t 
+getcnid(const CatalogRecord *crp)
+{
+	cnid_t cnid = 0;
+
+	switch (crp->recordType) {
+
+#if CONFIG_HFS_STD
+	case kHFSFolderRecord:
+		cnid = crp->hfsFolder.folderID;
+		break;
+	case kHFSFileRecord:
+		cnid = crp->hfsFile.fileID;
+		break;
+#endif
+
+	case kHFSPlusFolderRecord:
+		cnid = crp->hfsPlusFolder.folderID;
+		break;
+	case kHFSPlusFileRecord:
+		cnid = crp->hfsPlusFile.fileID;
+		break;
+	default:
+		printf("hfs: getcnid: unknown recordType=%d\n", crp->recordType);
+		break;
+	}
+
+	return (cnid);
+}
+
+/*
+ * Extract the parent ID from a catalog node record.
+ */
+static cnid_t 
+getparentcnid(const CatalogRecord *recp)
+{
+	cnid_t cnid = 0;
+
+	switch (recp->recordType) {
+
+#if CONFIG_HFS_STD
+	case kHFSFileThreadRecord:
+	case kHFSFolderThreadRecord:
+		cnid = recp->hfsThread.parentID;
+		break;
+#endif
+
+	case kHFSPlusFileThreadRecord:
+	case kHFSPlusFolderThreadRecord:
+		cnid = recp->hfsPlusThread.parentID;
+		break;
+	default:
+		panic("hfs: getparentcnid: unknown recordType (crp @ %p)\n", recp);
+		break;
+	}
+
+	return (cnid);
+}
+
+/*
+ * Determine if a catalog node record is a directory.
+ */
+static int 
+isadir(const CatalogRecord *crp)
+{
+	if (crp->recordType == kHFSPlusFolderRecord) {
+		return 1;
+	}
+#if CONFIG_HFS_STD
+	if (crp->recordType == kHFSFolderRecord) {
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+/*
+ * cat_lookup_dirlink - lookup a catalog record for directory hard link 
+ * (not inode) using catalog record id.  Note that this function does 
+ * NOT resolve directory hard link to its directory inode and return 
+ * the link record.
+ *
+ * Note: The caller is responsible for releasing the output catalog 
+ * descriptor (when supplied outdescp is non-null).
+ */
+int
+cat_lookup_dirlink(struct hfsmount *hfsmp, cnid_t dirlink_id, 
+		u_int8_t forktype, struct cat_desc *outdescp, 
+		struct cat_attr *attrp, struct cat_fork *forkp)
+{
+	struct BTreeIterator *iterator = NULL;
+	FSBufferDescriptor btdata;
+	u_int16_t datasize;
+	CatalogKey *keyp;
+	CatalogRecord *recp = NULL;
+	int error;
+
+	/* No directory hard links on standard HFS */
+	if (hfsmp->vcbSigWord == kHFSSigWord) {
+		return ENOTSUP;
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+	buildthreadkey(dirlink_id, 1, (CatalogKey *)&iterator->key);
+
+	recp = hfs_malloc(sizeof(CatalogRecord));
+	BDINIT(btdata, recp);
+
+	error = BTSearchRecord(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), iterator,
+				&btdata, &datasize, iterator);
+	if (error) {
+		goto out;
+	}
+	/* Directory hard links are catalog file record */
+	if (recp->recordType != kHFSPlusFileThreadRecord) {
+		error = ENOENT;
+		goto out;
+	}
+
+	keyp = (CatalogKey *)&recp->hfsPlusThread.reserved;
+	keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength +
+				  (keyp->hfsPlus.nodeName.length * 2);
+	if (forktype == kHFSResourceForkType) {
+		/* Lookup resource fork for directory hard link */
+		error = cat_lookupbykey(hfsmp, keyp, HFS_LOOKUP_HARDLINK, 0, true, outdescp, attrp, forkp, NULL);
+	} else {
+		/* Lookup data fork, if any, for directory hard link */
+		error = cat_lookupbykey(hfsmp, keyp, HFS_LOOKUP_HARDLINK, 0, false, outdescp, attrp, forkp, NULL);
+	}
+	if (error) {
+		printf ("hfs: cat_lookup_dirlink(): Error looking up file record for id=%u (error=%d)\n", dirlink_id, error);
+		hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
+		goto out;
+	}
+	/* Just for sanity, make sure that id in catalog record and thread record match */
+	if ((outdescp != NULL) && (dirlink_id != outdescp->cd_cnid)) {
+		printf ("hfs: cat_lookup_dirlink(): Requested cnid=%u != found_cnid=%u\n", dirlink_id, outdescp->cd_cnid);
+		hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
+		error = ENOENT;
+	}
+
+out:
+	if (recp) {
+		hfs_free(recp, sizeof(*recp));
+	}
+	hfs_free(iterator, sizeof(*iterator));
+
+	return MacToVFSError(error);
+}
+
+/*
+ * cnode_update_dirlink - update the catalog node for directory hard link 
+ * described by descp using the data from attrp and forkp.
+ */
+int
+cat_update_dirlink(struct hfsmount *hfsmp, u_int8_t forktype, 
+		struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp)
+{
+	if (forktype == kHFSResourceForkType) {
+		return cat_update_internal(hfsmp, true, descp, attrp, NULL, forkp);
+	} else {
+		return cat_update_internal(hfsmp, true, descp, attrp, forkp, NULL);
+	} 
+}
+
+void hfs_fork_copy(struct cat_fork *dst, const struct cat_fork *src,
+				   HFSPlusExtentDescriptor *extents)
+{
+	/* Copy everything but the extents into the dest fork */
+	memcpy(dst, src, offsetof(struct cat_fork, cf_extents));
+	/* Then copy the supplied extents into the fork */
+	memcpy(dst->cf_extents, extents, sizeof(HFSPlusExtentRecord));
+}
diff --git a/core/hfs_catalog.h b/core/hfs_catalog.h
new file mode 100644
index 0000000..0227dc2
--- /dev/null
+++ b/core/hfs_catalog.h
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef __HFS_CATALOG__
+#define __HFS_CATALOG__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <sys/vnode.h>
+
+#include "hfs_format.h"
+
+/* HFS Catalog */
+
+
+/*
+ * Catalog ADTs
+ *
+ * The cat_desc, cat_attr, and cat_fork structures are
+ * use to import/export data to/from the Catalog file.
+ * The fields in these structures are always in BSD
+ * runtime format (e.g. dates and names).
+ */
+ 
+typedef u_int32_t	cnid_t;
+
+/*
+ * Catalog Node Descriptor (runtime)
+ */
+struct cat_desc {
+	u_int8_t  cd_flags;       /* see below (8 bits) */
+	u_int8_t  cd_encoding;    /* name encoding */
+	int16_t   cd_namelen;     /* length of cnode name */
+	cnid_t    cd_parentcnid;  /* parent directory CNID */
+	u_int32_t    cd_hint;        /* catalog file hint */
+	cnid_t    cd_cnid;        /* cnode id (for getattrlist) */
+	const u_int8_t * cd_nameptr; /* pointer to cnode name */
+};
+
+/* cd_flags
+ *
+ * CD_EOF is used by hfs_vnop_readdir / cat_getdirentries to indicate EOF was
+ * encountered during a directory enumeration.  When this flag is observed
+ * on the next call to hfs_vnop_readdir it tells the caller that there's no
+ * need to descend into the catalog as EOF was encountered during the last call.
+ * This flag should only be set on the descriptor embedded in the directoryhint. 
+ */
+
+#define	CD_HASBUF	0x01	/* allocated filename buffer */
+#define CD_DECOMPOSED	0x02	/* name is fully decomposed */
+#define CD_EOF		0x04	/* see above */
+#define	CD_ISMETA	0x40	/* describes a metadata file */
+#define	CD_ISDIR	0x80	/* describes a directory */
+
+/*
+ * Catalog Node Attributes (runtime)
+ */
+struct cat_attr {
+	cnid_t		ca_fileid;	/* inode number (for stat) normally == cnid */
+	mode_t		ca_mode;	/* file access mode and type (16 bits) */
+	u_int16_t	ca_recflags;	/* catalog record flags (16 bit integer) */
+	u_int32_t	ca_linkcount;	/* real hard link count */
+	uid_t		ca_uid;		/* file owner */
+	gid_t		ca_gid;		/* file group */
+	union {
+	    dev_t	cau_rdev;	/* special file device (VBLK or VCHAR only) */
+	    u_int32_t	cau_linkref;	/* hardlink reference number */
+	} ca_union1;
+	time_t		ca_atime;	/* last access time */
+	time_t		ca_atimeondisk;	/* access time value on disk */
+	time_t		ca_mtime;	/* last data modification time */
+	time_t		ca_ctime;	/* last file status change */
+	time_t		ca_itime;	/* file initialization time */
+	time_t		ca_btime;	/* last backup time */
+	u_int32_t	ca_flags;	/* status flags (chflags) */
+	union {
+	    u_int32_t	cau_blocks;	/* total file blocks used (rsrc + data) */
+	    u_int32_t	cau_entries;	/* total directory entries (valence) */
+	} ca_union2;
+	union {
+	    u_int32_t	cau_dircount;	/* count of sub dirs (for posix nlink) */
+	    u_int32_t	cau_firstlink;	/* first hardlink link (files only) */
+	} ca_union3;
+	union {
+		u_int8_t 	ca_finderinfo[32]; /* Opaque Finder information */
+		struct {
+			FndrFileInfo 					ca_finderfileinfo;
+			struct FndrExtendedFileInfo 	ca_finderextendedfileinfo;
+		};
+		struct {
+			FndrDirInfo 					ca_finderdirinfo;
+			struct FndrExtendedDirInfo 		ca_finderextendeddirinfo;
+		};
+	};
+};
+
+/* Aliases for common fields */
+#define	ca_rdev		ca_union1.cau_rdev	
+#define	ca_linkref	ca_union1.cau_linkref
+#define	ca_blocks	ca_union2.cau_blocks
+#define	ca_entries	ca_union2.cau_entries
+#define	ca_dircount	ca_union3.cau_dircount
+#define	ca_firstlink	ca_union3.cau_firstlink
+
+/*
+ * Catalog Node Fork (runtime)
+ *
+ * NOTE: this is not the same as a struct HFSPlusForkData
+ *
+ * NOTE: if cf_new_size > cf_size, then a write is in progress and is extending
+ * the EOF; the new EOF will be cf_new_size.  Writes and pageouts may validly
+ * write up to cf_new_size, but reads should only read up to cf_size.  When
+ * an extending write is not in progress, cf_new_size is zero.
+ */
+struct cat_fork {
+	off_t          cf_size;        /* fork's logical size in bytes */
+	off_t          cf_new_size;    /* fork's logical size after write completes */
+	union {
+	    u_int32_t  cfu_clump;      /* fork's clump size in bytes (sys files only) */
+	    u_int64_t  cfu_bytesread;  /* bytes read from this fork */
+	} cf_union;
+	u_int32_t      cf_vblocks;     /* virtual (unalloated) blocks */
+	u_int32_t      cf_blocks;      /* total blocks used by this fork */
+	struct HFSPlusExtentDescriptor  cf_extents[8];  /* initial set of extents */
+
+	/*
+	 * NOTE: If you change this structure, make sure you change you change
+	 * hfs_fork_copy.
+	 */
+};
+
+#define cf_clump	cf_union.cfu_clump
+#define cf_bytesread	cf_union.cfu_bytesread
+
+void hfs_fork_copy(struct cat_fork *dst, const struct cat_fork *src,
+				   HFSPlusExtentDescriptor *extents);
+
+/*
+ * Directory Hint
+ * Used to hold state across directory enumerations.
+ *
+ */
+struct directoryhint {
+	TAILQ_ENTRY(directoryhint) dh_link; /* chain */
+	int     dh_index;                   /* index into directory (zero relative) */
+	u_int32_t  dh_threadhint;           /* node hint of a directory's thread record */
+	u_int32_t  dh_time;
+	struct  cat_desc  dh_desc;          /* entry's descriptor */
+};
+typedef struct directoryhint directoryhint_t;
+
+/* 
+ * HFS_MAXDIRHINTS cannot be larger than 63 without reducing
+ * HFS_INDEX_BITS, because given the 6-bit tag, at most 63 different
+ * tags can exist.  When HFS_MAXDIRHINTS is larger than 63, the same
+ * list may contain dirhints of the same tag, and a staled dirhint may
+ * be returned.
+ */
+#define HFS_MAXDIRHINTS 32
+#define HFS_DIRHINT_TTL 45
+
+#define HFS_INDEX_MASK  0x03ffffff
+#define HFS_INDEX_BITS  26
+
+
+/*
+ * Catalog Node Entry
+ *
+ * A cat_entry is used for bulk enumerations (hfs_readdirattr).
+ */
+struct cat_entry {
+	struct cat_desc	ce_desc;
+	struct cat_attr	ce_attr;
+	off_t		ce_datasize;
+	off_t		ce_rsrcsize;
+	u_int32_t		ce_datablks;
+	u_int32_t		ce_rsrcblks;
+};
+
+/*
+ * Starting in 10.5, hfs_vnop_readdirattr() only makes one
+ * call to cat_getentriesattr(). So we increased MAXCATENTRIES
+ * while keeping the total size of the CE LIST buffer <= 8K
+ * (which works out to be 60 entries per call).  The 8K limit
+ * keeps the memory coming from a kalloc zone instead of
+ * valuable/fragment-able kernel map space.
+ */
+#define MAXCATENTRIES	\
+	(1 + (8192 - sizeof (struct cat_entrylist)) / sizeof (struct cat_entry))
+
+/*
+ * Catalog Node Entry List
+ *
+ * A cat_entrylist is a list of Catalog Node Entries.
+ */
+struct cat_entrylist {
+	u_int32_t  maxentries;    /* number of entries requested */
+	u_int32_t  realentries;   /* number of valid entries returned */
+	u_int32_t  skipentries;   /* number of entries skipped (reserved HFS+ files) */
+	struct cat_entry  entry[1];   /* array of entries */
+};
+
+#define CE_LIST_SIZE(entries)	\
+	sizeof (*ce_list) + (((entries) - 1) * sizeof (struct cat_entry))
+
+struct hfsmount;
+
+/*
+ * Catalog FileID/CNID Acquisition / Lookup 
+ *
+ * Some use-cases require that we find a valid CNID
+ * before we may be ready to enter the item into the namespace.
+ * In order to resolve this, we support a hashtable attached to
+ * the mount that is secured by the catalog lock.  
+ * 
+ * Finding the next valid CNID is easy if the wraparound bit is
+ * not set -- you just pull from the hfsmp next pointer.  
+ * If it is set then you must find a free entry in the catalog
+ * and also query the hashtable to see if the item is free or not.
+ * 
+ * If you want to request a CNID before there is a backing item
+ * in the catalog, you must find one that is valid, then insert
+ * it into the hash table until such time that the item is
+ * inserted into the catalog.  After successful catalog insertion,
+ * you must remove the item from the hashtable.
+ */
+
+typedef struct cat_preflightid {
+	cnid_t fileid;
+	LIST_ENTRY(cat_preflightid) id_hash;
+} cat_preflightid_t;
+
+extern int cat_remove_idhash (cat_preflightid_t *preflight);
+extern int cat_insert_idhash (struct hfsmount *hfsmp, cat_preflightid_t *preflight);
+extern int cat_check_idhash (struct hfsmount *hfsmp, cnid_t test_fileid);
+
+/* initialize the id look up hashtable during mount */
+extern void hfs_idhash_init (struct hfsmount *hfsmp);
+
+/* release the id lookup hashtable during unmount */
+extern void hfs_idhash_destroy (struct hfsmount *hfsmp);
+
+/* Get a new CNID for use */
+extern int cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid);
+
+
+/* default size of ID hash is 64 entries */
+#define HFS_IDHASH_DEFAULT 64
+
+
+/*
+ * Catalog Operations Hint
+ *
+ * lower 16 bits: count of B-tree insert operations
+ * upper 16 bits: count of B-tree delete operations
+ *
+ */
+#define CAT_DELETE	0x00010000
+#define CAT_CREATE	0x00000002
+#define CAT_RENAME	0x00010002
+#define CAT_EXCHANGE	0x00010002
+
+typedef u_int32_t	catops_t;
+
+/*
+ * The size of cat_cookie_t much match the size of
+ * the nreserve struct (in BTreeNodeReserve.c).
+ */
+typedef	struct cat_cookie_t {
+#if defined(__LP64__)
+	char	opaque[40];
+#else
+	char	opaque[24];
+#endif
+} cat_cookie_t;
+
+/* Universal catalog key */
+union CatalogKey {
+	HFSCatalogKey      hfs;
+	HFSPlusCatalogKey  hfsPlus;
+};
+typedef union CatalogKey  CatalogKey;
+
+/* Universal catalog data record */
+union CatalogRecord {
+	int16_t               recordType;
+	HFSCatalogFolder      hfsFolder;
+	HFSCatalogFile        hfsFile;
+	HFSCatalogThread      hfsThread;
+	HFSPlusCatalogFolder  hfsPlusFolder;
+	HFSPlusCatalogFile    hfsPlusFile;
+	HFSPlusCatalogThread  hfsPlusThread;
+};
+typedef union CatalogRecord  CatalogRecord;
+
+/* Constants for HFS fork types */
+enum {
+	kHFSDataForkType = 0x0, 	/* data fork */
+	kHFSResourceForkType = 0xff	/* resource fork */
+};
+
+/*
+ * Catalog Interface
+ *
+ * These functions perform a catalog transactions. The
+ * catalog b-tree is abstracted through this interface.
+ * (please don't go around it)
+ */
+
+
+extern void cat_releasedesc(struct cat_desc *descp);
+
+extern int cat_create (	struct hfsmount *hfsmp,
+			cnid_t new_fileid,
+			struct cat_desc *descp,
+			struct cat_attr *attrp,
+			struct cat_desc *out_descp);
+
+extern int cat_delete (	struct hfsmount *hfsmp,
+			struct cat_desc *descp,
+			struct cat_attr *attrp);
+
+extern int cat_lookup (	struct hfsmount *hfsmp,
+			struct cat_desc *descp,
+			int wantrsrc,
+			int force_casesensitive_lookup,
+			struct cat_desc *outdescp,
+			struct cat_attr *attrp,
+			struct cat_fork *forkp,
+    			cnid_t          *desc_cnid);
+
+extern int cat_idlookup (struct hfsmount *hfsmp,
+			cnid_t cnid,
+			int allow_system_files,
+			int wantrsrc,
+			struct cat_desc *outdescp,
+			struct cat_attr *attrp,
+			struct cat_fork *forkp);
+
+extern int cat_findname (struct hfsmount *hfsmp,
+                         cnid_t cnid,
+                         struct cat_desc *outdescp);
+
+extern int cat_getentriesattr(
+			struct hfsmount *hfsmp,
+			directoryhint_t *dirhint,
+			struct cat_entrylist *ce_list, 
+			int *reachedeof);
+
+extern int cat_rename (	struct hfsmount * hfsmp,
+			struct cat_desc * from_cdp,
+			struct cat_desc * todir_cdp,
+			struct cat_desc * to_cdp,
+			struct cat_desc * cdp);
+
+extern int cat_update (	struct hfsmount *hfsmp,
+			struct cat_desc *descp,
+			struct cat_attr *attrp,
+			const struct cat_fork *dataforkp,
+			const struct cat_fork *rsrcforkp);
+
+extern int cat_getdirentries(
+			struct hfsmount *hfsmp,
+			u_int32_t entrycnt,
+			directoryhint_t *dirhint,
+			uio_t uio,
+			int extended,
+			int * items,
+			int * eofflag);
+
+extern int cat_insertfilethread (
+			struct hfsmount *hfsmp,
+			struct cat_desc *descp);
+
+extern int cat_preflight(
+			struct hfsmount *hfsmp,
+			catops_t ops,
+			cat_cookie_t *cookie,
+			struct proc *p);
+
+extern void cat_postflight(
+			struct hfsmount *hfsmp,
+			cat_cookie_t *cookie,
+			struct proc *p);
+
+extern int cat_binarykeycompare(
+			HFSPlusCatalogKey *searchKey,
+			HFSPlusCatalogKey *trialKey);
+
+extern int CompareCatalogKeys(
+			HFSCatalogKey *searchKey,
+			HFSCatalogKey *trialKey);
+
+extern int CompareExtendedCatalogKeys(
+			HFSPlusCatalogKey *searchKey,
+			HFSPlusCatalogKey *trialKey);
+
+extern void cat_convertattr(
+			struct hfsmount *hfsmp,
+			CatalogRecord * recp,
+			struct cat_attr *attrp,
+			struct cat_fork *datafp,
+			struct cat_fork *rsrcfp);
+
+extern int cat_convertkey(
+			struct hfsmount *hfsmp,
+			CatalogKey *key,
+			CatalogRecord * recp,
+			struct cat_desc *descp);
+
+extern int cat_getkeyplusattr(
+			struct hfsmount *hfsmp, 
+			cnid_t cnid, 
+			CatalogKey *key, 
+			struct cat_attr *attrp);
+
+/* Hard link functions. */
+
+extern int cat_check_link_ancestry(
+			struct hfsmount *hfsmp,
+			cnid_t parentid, 
+			cnid_t pointed_at_cnid);
+
+extern int cat_set_childlinkbit(
+			struct hfsmount *hfsmp, 
+			cnid_t cnid);
+
+#define HFS_IGNORABLE_LINK  0x00000001
+
+extern int cat_resolvelink( struct hfsmount *hfsmp,
+                            u_int32_t linkref,
+                            int isdirlink,
+                            struct HFSPlusCatalogFile *recp);
+
+extern int cat_createlink( struct hfsmount *hfsmp,
+                           struct cat_desc *descp,
+                           struct cat_attr *attr,
+                           cnid_t nextlinkid,
+                           cnid_t *linkfileid);
+
+/* Finder Info's file type and creator for directory hard link alias */
+enum {
+	kHFSAliasType    = 0x66647270,  /* 'fdrp' */
+	kHFSAliasCreator = 0x4D414353   /* 'MACS' */
+};
+
+extern int cat_deletelink( struct hfsmount *hfsmp,
+                           struct cat_desc *descp);
+
+extern int cat_update_siblinglinks( struct hfsmount *hfsmp,
+                           cnid_t linkfileid,
+                           cnid_t prevlinkid,
+                           cnid_t nextlinkid);
+
+extern int cat_lookuplink( struct hfsmount *hfsmp,
+                           struct cat_desc *descp,
+                           cnid_t *linkfileid,
+                           cnid_t *prevlinkid,
+                           cnid_t *nextlinkid);
+
+extern int cat_lookup_siblinglinks( struct hfsmount *hfsmp,
+                               cnid_t linkfileid,
+                               cnid_t *prevlinkid,
+                               cnid_t *nextlinkid);
+
+extern int cat_lookup_lastlink( struct hfsmount *hfsmp,
+                               cnid_t startid,
+                               cnid_t *nextlinkid,
+							   struct cat_desc *cdesc);
+
+extern int cat_lookup_dirlink(struct hfsmount *hfsmp, 
+			     cnid_t dirlink_id, 
+			     u_int8_t forktype, 
+			     struct cat_desc *outdescp, 
+			     struct cat_attr *attrp, 
+			     struct cat_fork *forkp);
+
+extern int cat_update_dirlink(struct hfsmount *hfsmp, 
+			      u_int8_t forktype, 
+			      struct cat_desc *descp, 
+			      struct cat_attr *attrp, 
+			      struct cat_fork *rsrcforkp);
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* __HFS_CATALOG__ */
diff --git a/core/hfs_chash.c b/core/hfs_chash.c
new file mode 100644
index 0000000..5fe0bc3
--- /dev/null
+++ b/core/hfs_chash.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	  notice, this list of conditions and the following disclaimer in the
+ *	  documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *	  must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *	  may be used to endorse or promote products derived from this software
+ *	  without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.	IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hfs_chash.c
+ *	derived from @(#)ufs_ihash.c	8.7 (Berkeley) 5/17/95
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+
+#include "hfs.h"	/* XXX bringup */
+#include "hfs_cnode.h"
+
+extern lck_attr_t *  hfs_lock_attr;
+extern lck_grp_t *  hfs_mutex_group;
+extern lck_grp_t *  hfs_rwlock_group;
+
+lck_grp_t * chash_lck_grp;
+lck_grp_attr_t * chash_lck_grp_attr;
+lck_attr_t * chash_lck_attr;
+
+#define CNODEHASH(hfsmp, inum) (&hfsmp->hfs_cnodehashtbl[(inum) & hfsmp->hfs_cnodehash])
+
+/*
+ * Initialize cnode hash table.
+ */
+void
+hfs_chashinit()
+{
+	chash_lck_grp_attr= lck_grp_attr_alloc_init();
+	chash_lck_grp  = lck_grp_alloc_init("cnode_hash", chash_lck_grp_attr);
+	chash_lck_attr = lck_attr_alloc_init();
+}
+
+static void hfs_chash_lock(struct hfsmount *hfsmp) 
+{
+	lck_mtx_lock(&hfsmp->hfs_chash_mutex);
+}
+
+static void hfs_chash_lock_spin(struct hfsmount *hfsmp) 
+{
+	lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex);
+}
+
+static void hfs_chash_lock_convert(struct hfsmount *hfsmp)
+{
+	lck_mtx_convert_spin(&hfsmp->hfs_chash_mutex);
+}
+
+static void hfs_chash_unlock(struct hfsmount *hfsmp) 
+{
+	lck_mtx_unlock(&hfsmp->hfs_chash_mutex);
+}
+
+void
+hfs_chashinit_finish(struct hfsmount *hfsmp)
+{
+	lck_mtx_init(&hfsmp->hfs_chash_mutex, chash_lck_grp, chash_lck_attr);
+
+	hfsmp->hfs_cnodehashtbl = hashinit(desiredvnodes / 4, M_TEMP, &hfsmp->hfs_cnodehash);
+}
+
+void
+hfs_delete_chash(struct hfsmount *hfsmp)
+{
+	lck_mtx_destroy(&hfsmp->hfs_chash_mutex, chash_lck_grp);
+
+	FREE(hfsmp->hfs_cnodehashtbl, M_TEMP);
+}
+
+
+/*
+ * Use the device, inum pair to find the incore cnode.
+ *
+ * If it is in core, but locked, wait for it.
+ */
+struct vnode *
+hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock, int allow_deleted)
+{
+	struct cnode *cp;
+	struct vnode *vp;
+	int error;
+	u_int32_t vid;
+
+	/* 
+	 * Go through the hash list
+	 * If a cnode is in the process of being cleaned out or being
+	 * allocated, wait for it to be finished and then try again.
+	 */
+loop:
+	hfs_chash_lock_spin(hfsmp);
+
+	for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
+		if (cp->c_fileid != inum)
+			continue;
+		/* Wait if cnode is being created or reclaimed. */
+		if (ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
+		        SET(cp->c_hflag, H_WAITING);
+
+			(void) msleep(cp, &hfsmp->hfs_chash_mutex, PDROP | PINOD,
+			              "hfs_chash_getvnode", 0);
+			goto loop;
+		}
+		/* Obtain the desired vnode. */
+		vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp;
+		if (vp == NULLVP)
+			goto exit;
+
+		vid = vnode_vid(vp);
+		hfs_chash_unlock(hfsmp);
+
+		if ((error = vnode_getwithvid(vp, vid))) {
+		        /*
+			 * If vnode is being reclaimed, or has
+			 * already changed identity, no need to wait
+			 */
+		        return (NULL);
+		}
+		if (!skiplock && hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+			vnode_put(vp);
+			return (NULL);
+		}
+
+		/*
+		 * Skip cnodes that are not in the name space anymore
+		 * we need to check with the cnode lock held because
+		 * we may have blocked acquiring the vnode ref or the
+		 * lock on the cnode which would allow the node to be
+		 * unlinked
+		 */
+		if (!allow_deleted) {
+			if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+				if (!skiplock) {
+					hfs_unlock(cp);
+				}
+				vnode_put(vp);
+				return (NULL);
+			}
+		}
+		return (vp);
+	}
+exit:
+	hfs_chash_unlock(hfsmp);
+	return (NULL);
+}
+
+
+/*
+ * Use the device, fileid pair to snoop an incore cnode.
+ *
+ * A cnode can exists in chash even after it has been 
+ * deleted from the catalog, so this function returns 
+ * ENOENT if C_NOEXIST is set in the cnode's flag.
+ * 
+ */
+int
+hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int existence_only, 
+				int (*callout)(const cnode_t *cp, void *), void * arg)
+{
+	struct cnode *cp;
+	int result = ENOENT;
+
+	/* 
+	 * Go through the hash list
+	 * If a cnode is in the process of being cleaned out or being
+	 * allocated, wait for it to be finished and then try again.
+	 */
+	hfs_chash_lock(hfsmp);
+
+	for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
+		if (cp->c_fileid != inum)
+			continue;
+	
+		/*
+		 * Under normal circumstances, we would want to return ENOENT if a cnode is in
+		 * the hash and it is marked C_NOEXISTS or C_DELETED.  However, if the CNID
+		 * namespace has wrapped around, then we have the possibility of collisions.  
+		 * In that case, we may use this function to validate whether or not we 
+		 * should trust the nextCNID value in the hfs mount point.  
+		 * 
+		 * If we didn't do this, then it would be possible for a cnode that is no longer backed
+		 * by anything on-disk (C_NOEXISTS) to still exist in the hash along with its
+		 * vnode.  The cat_create routine could then create a new entry in the catalog
+		 * re-using that CNID.  Then subsequent hfs_getnewvnode calls will repeatedly fail
+		 * trying to look it up/validate it because it is marked C_NOEXISTS.  So we want
+		 * to prevent that from happening as much as possible.
+		 */
+		if (existence_only) {
+			result = 0;
+			break;
+		}
+
+		/* Skip cnodes that have been removed from the catalog */
+		if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+			result = EACCES;
+			break;
+		}
+
+		/* Skip cnodes being created or reclaimed. */
+		if (!ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
+			result = callout(cp, arg);
+		}
+		break;
+	}
+	hfs_chash_unlock(hfsmp);
+
+	return (result);
+}
+
+/*
+ * Use the device, fileid pair to find the incore cnode.
+ * If no cnode if found one is created
+ *
+ * If it is in core, but locked, wait for it.
+ *
+ * If the cnode is C_DELETED, then return NULL since that 
+ * inum is no longer valid for lookups (open-unlinked file).
+ *
+ * If the cnode is C_DELETED but also marked C_RENAMED, then that means
+ * the cnode was renamed over and a new entry exists in its place.  The caller
+ * should re-drive the lookup to get the newer entry.  In that case, we'll still
+ * return NULL for the cnode, but also return GNV_CHASH_RENAMED in the output flags
+ * of this function to indicate the caller that they should re-drive.
+ */
+struct cnode *
+hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, 
+				   int wantrsrc, int skiplock, int *out_flags, int *hflags)
+{
+	struct cnode	*cp;
+	struct cnode	*ncp = NULL;
+	vnode_t		vp;
+	u_int32_t	vid;
+
+	/* 
+	 * Go through the hash list
+	 * If a cnode is in the process of being cleaned out or being
+	 * allocated, wait for it to be finished and then try again.
+	 */
+loop:
+	hfs_chash_lock_spin(hfsmp);
+
+loop_with_lock:
+	for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
+		if (cp->c_fileid != inum)
+			continue;
+		/*
+		 * Wait if cnode is being created, attached to or reclaimed.
+		 */
+		if (ISSET(cp->c_hflag, H_ALLOC | H_ATTACH | H_TRANSIT)) {
+		        SET(cp->c_hflag, H_WAITING);
+
+			(void) msleep(cp, &hfsmp->hfs_chash_mutex, PINOD,
+			              "hfs_chash_getcnode", 0);
+			goto loop_with_lock;
+		}
+		vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp;
+		if (vp == NULL) {
+			/*
+			 * The desired vnode isn't there so tag the cnode.
+			 */
+			SET(cp->c_hflag, H_ATTACH);
+			*hflags |= H_ATTACH;
+
+			hfs_chash_unlock(hfsmp);
+		} else {
+			vid = vnode_vid(vp);
+
+			hfs_chash_unlock(hfsmp);
+
+			if (vnode_getwithvid(vp, vid))
+		        	goto loop;
+		}
+		if (ncp) {
+			/*
+			 * someone else won the race to create
+			 * this cnode and add it to the hash
+			 * just dump our allocation
+			 */
+			hfs_zfree(ncp, HFS_CNODE_ZONE);
+			ncp = NULL;
+		}
+
+		if (!skiplock) {
+			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+		}
+
+		/*
+		 * Skip cnodes that are not in the name space anymore
+		 * we need to check with the cnode lock held because
+		 * we may have blocked acquiring the vnode ref or the
+		 * lock on the cnode which would allow the node to be
+		 * unlinked.
+		 *
+		 * Don't return a cnode in this case since the inum
+		 * is no longer valid for lookups.
+		 */
+		if ((cp->c_flag & (C_NOEXISTS | C_DELETED)) && !wantrsrc) {
+			int renamed = 0;
+			if (cp->c_flag & C_RENAMED) {
+				renamed = 1;
+			}
+			if (!skiplock)
+				hfs_unlock(cp);
+			if (vp != NULLVP) {
+				vnode_put(vp);
+			} else {
+				hfs_chash_lock_spin(hfsmp);
+				CLR(cp->c_hflag, H_ATTACH);
+				*hflags &= ~H_ATTACH;
+				if (ISSET(cp->c_hflag, H_WAITING)) {
+					CLR(cp->c_hflag, H_WAITING);
+					wakeup((caddr_t)cp);
+				}
+				hfs_chash_unlock(hfsmp);
+			}
+			vp = NULL;
+			cp = NULL;
+			if (renamed) {
+				*out_flags = GNV_CHASH_RENAMED;
+			}
+		}
+		*vpp = vp;
+		return (cp);
+	}
+
+	/* 
+	 * Allocate a new cnode
+	 */
+	if (skiplock && !wantrsrc)
+		panic("%s - should never get here when skiplock is set \n", __FUNCTION__);
+
+	if (ncp == NULL) {
+		hfs_chash_unlock(hfsmp);
+		
+		ncp = hfs_zalloc(HFS_CNODE_ZONE);
+		
+		/*
+		 * since we dropped the chash lock,
+		 * we need to go back and re-verify
+		 * that this node hasn't come into
+		 * existence...
+		 */
+		goto loop;
+	}
+	hfs_chash_lock_convert(hfsmp);
+
+#if HFS_MALLOC_DEBUG
+	bzero(ncp, __builtin_offsetof(struct cnode, magic));
+#else
+	bzero(ncp, sizeof(*ncp));
+#endif
+
+	SET(ncp->c_hflag, H_ALLOC);
+	*hflags |= H_ALLOC;
+	ncp->c_fileid = inum;
+	TAILQ_INIT(&ncp->c_hintlist); /* make the list empty */
+	TAILQ_INIT(&ncp->c_originlist);
+
+	lck_rw_init(&ncp->c_rwlock, hfs_rwlock_group, hfs_lock_attr);
+	if (!skiplock)
+		(void) hfs_lock(ncp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+	/* Insert the new cnode with it's H_ALLOC flag set */
+	LIST_INSERT_HEAD(CNODEHASH(hfsmp, inum), ncp, c_hash);
+	hfs_chash_unlock(hfsmp);
+
+	*vpp = NULL;
+	return (ncp);
+}
+
+
+void
+hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int hflags)
+{
+	hfs_chash_lock_spin(hfsmp);
+
+	CLR(cp->c_hflag, hflags);
+
+	if (ISSET(cp->c_hflag, H_WAITING)) {
+	        CLR(cp->c_hflag, H_WAITING);
+		wakeup((caddr_t)cp);
+	}
+	hfs_chash_unlock(hfsmp);
+}
+
+
+/*
+ * Re-hash two cnodes in the hash table.
+ */
+void
+hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct cnode *cp2)
+{
+	hfs_chash_lock_spin(hfsmp);
+
+	LIST_REMOVE(cp1, c_hash);
+	LIST_REMOVE(cp2, c_hash);
+	LIST_INSERT_HEAD(CNODEHASH(hfsmp, cp1->c_fileid), cp1, c_hash);
+	LIST_INSERT_HEAD(CNODEHASH(hfsmp, cp2->c_fileid), cp2, c_hash);
+
+	hfs_chash_unlock(hfsmp);
+}
+
+
+/*
+ * Remove a cnode from the hash table.
+ */
+int
+hfs_chashremove(struct hfsmount *hfsmp, struct cnode *cp)
+{
+	hfs_chash_lock_spin(hfsmp);
+
+	/* Check if a vnode is getting attached */
+	if (ISSET(cp->c_hflag, H_ATTACH)) {
+		hfs_chash_unlock(hfsmp);
+		return (EBUSY);
+	}
+	if (cp->c_hash.le_next || cp->c_hash.le_prev) {
+	    LIST_REMOVE(cp, c_hash);
+	    cp->c_hash.le_next = NULL;
+	    cp->c_hash.le_prev = NULL;
+	}
+	hfs_chash_unlock(hfsmp);
+
+	return (0);
+}
+
+/*
+ * Remove a cnode from the hash table and wakeup any waiters.
+ */
+void
+hfs_chash_abort(struct hfsmount *hfsmp, struct cnode *cp)
+{
+	hfs_chash_lock_spin(hfsmp);
+
+	LIST_REMOVE(cp, c_hash);
+	cp->c_hash.le_next = NULL;
+	cp->c_hash.le_prev = NULL;
+
+	CLR(cp->c_hflag, H_ATTACH | H_ALLOC);
+	if (ISSET(cp->c_hflag, H_WAITING)) {
+	        CLR(cp->c_hflag, H_WAITING);
+		wakeup((caddr_t)cp);
+	}
+	hfs_chash_unlock(hfsmp);
+}
+
+
+/*
+ * mark a cnode as in transition
+ */
+void
+hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp)
+{
+	hfs_chash_lock_spin(hfsmp);
+
+        SET(cp->c_hflag, H_TRANSIT);
+
+	hfs_chash_unlock(hfsmp);
+}
+
+/* Search a cnode in the hash.  This function does not return cnode which 
+ * are getting created, destroyed or in transition.  Note that this function
+ * does not acquire the cnode hash mutex, and expects the caller to acquire it.
+ * On success, returns pointer to the cnode found.  On failure, returns NULL.
+ */
+static 
+struct cnode *
+hfs_chash_search_cnid(struct hfsmount *hfsmp, cnid_t cnid) 
+{
+	struct cnode *cp;
+
+	for (cp = CNODEHASH(hfsmp, cnid)->lh_first; cp; cp = cp->c_hash.le_next) {
+		if (cp->c_fileid == cnid) {
+			break;
+		}
+	}
+
+	/* If cnode is being created or reclaimed, return error. */
+	if (cp && ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
+		cp = NULL;
+	}
+
+	return cp;
+}
+
+/* Search a cnode corresponding to given device and ID in the hash.  If the 
+ * found cnode has kHFSHasChildLinkBit cleared, set it.  If the cnode is not 
+ * found, no new cnode is created and error is returned.
+ * 
+ * Return values - 
+ *	-1 : The cnode was not found.
+ * 	 0 : The cnode was found, and the kHFSHasChildLinkBit was already set.
+ *	 1 : The cnode was found, the kHFSHasChildLinkBit was not set, and the 
+ *	     function had to set that bit.
+ */
+int
+hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
+{
+	int retval = -1;
+	struct cnode *cp;
+
+	hfs_chash_lock_spin(hfsmp);
+
+	cp = hfs_chash_search_cnid(hfsmp, cnid);
+	if (cp) {
+		if (cp->c_attr.ca_recflags & kHFSHasChildLinkMask) {
+			retval = 0;
+		} else {
+			cp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
+			retval = 1;
+		}
+	}
+	hfs_chash_unlock(hfsmp);
+
+	return retval;
+}
diff --git a/core/hfs_cnode.c b/core/hfs_cnode.c
new file mode 100644
index 0000000..12b126c
--- /dev/null
+++ b/core/hfs_cnode.c
@@ -0,0 +1,2561 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/time.h>
+#include <sys/ubc.h>
+#include <sys/quota.h>
+#include <sys/kdebug.h>
+#include <libkern/OSByteOrder.h>
+#include <sys/namei.h>
+
+#include <kern/locks.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+#include "hfs_quota.h"
+#include "hfs_format.h"
+#include "hfs_kdebug.h"
+#include "hfs_cprotect.h"
+
+extern int prtactive;
+
+extern lck_attr_t *  hfs_lock_attr;
+extern lck_grp_t *  hfs_mutex_group;
+extern lck_grp_t *  hfs_rwlock_group;
+
+static void  hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *);
+static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
+static int hfs_isordered(struct cnode *, struct cnode *);
+
+extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
+
+
+__inline__ int hfs_checkdeleted (struct cnode *cp) {
+	return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);	
+}
+
+/*
+ * Function used by a special fcntl() that decorates a cnode/vnode that
+ * indicates it is backing another filesystem, like a disk image.
+ *
+ * the argument 'val' indicates whether or not to set the bit in the cnode flags
+ * 
+ * Returns non-zero on failure. 0 on success 
+ */
+int hfs_set_backingstore (struct vnode *vp, int val) {
+	struct cnode *cp = NULL;
+	int err = 0;
+	
+	cp = VTOC(vp);
+	if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
+		return EINVAL;
+	}
+
+	/* lock the cnode */
+	err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	if (err) {
+		return err;
+	}
+	
+	if (val) {
+		cp->c_flag |= C_BACKINGSTORE;
+	}
+	else {
+		cp->c_flag &= ~C_BACKINGSTORE;
+	}
+
+	/* unlock everything */
+	hfs_unlock (cp);
+
+	return err;
+}
+
+/*
+ * Function used by a special fcntl() that check to see if a cnode/vnode
+ * indicates it is backing another filesystem, like a disk image.
+ *
+ * the argument 'val' is an output argument for whether or not the bit is set
+ * 
+ * Returns non-zero on failure. 0 on success 
+ */
+
+int hfs_is_backingstore (struct vnode *vp, int *val) {
+	struct cnode *cp = NULL;
+	int err = 0;
+
+	if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
+		*val = 0;
+		return 0;
+	}
+
+	cp = VTOC(vp);
+
+	/* lock the cnode */
+	err = hfs_lock (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	if (err) {
+		return err;
+	}
+
+	if (cp->c_flag & C_BACKINGSTORE) {
+		*val = 1;
+	}	
+	else {
+		*val = 0;
+	}
+
+	/* unlock everything */
+	hfs_unlock (cp);
+
+	return err;
+}
+
+
+/*
+ * hfs_cnode_teardown
+ *
+ * This is an internal function that is invoked from both hfs_vnop_inactive
+ * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
+ * being recycled and reclaimed, it is important that we do any post-processing
+ * necessary for the cnode in both places.  Important tasks include things such as
+ * releasing the blocks from an open-unlinked file when all references to it have dropped,
+ * and handling resource forks separately from data forks.
+ *
+ * Note that we take only the vnode as an argument here (rather than the cnode).
+ * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
+ * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
+ * vnode we need to reclaim if only the cnode is supplied. 
+ *
+ * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
+ * if both are invoked right after the other.  In the second call, most of this function's if()
+ * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.  
+ * As a quick check to see if this function is necessary, determine if the cnode is already
+ * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that 
+ * remain for cnodes marked in such a fashion is to teardown their fork references and 
+ * release all directory hints and hardlink origins.  However, both of those are done 
+ * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
+ * entry is no longer there.  
+ *
+ * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
+ * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info 
+ * is totally gone by that point.
+ *
+ * Assumes that both truncate and cnode locks for 'cp' are held.
+ */
+static 
+int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) 
+{
+	int forkcount = 0;
+	enum vtype v_type;
+	struct cnode *cp;
+	int error = 0;
+	bool started_tr = false;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct proc *p = vfs_context_proc(ctx);
+	int truncated = 0;
+    cat_cookie_t cookie;
+    int cat_reserve = 0;
+    int lockflags;
+	int ea_error = 0;
+	
+	v_type = vnode_vtype(vp);
+	cp = VTOC(vp);
+	
+	if (cp->c_datafork) {
+		++forkcount;
+	}
+	if (cp->c_rsrcfork) {
+		++forkcount;
+	}
+
+	/* 
+	 * Push file data out for normal files that haven't been evicted from 
+	 * the namespace.  We only do this if this function was not called from reclaim,
+	 * because by that point the UBC information has been totally torn down.  
+	 * 
+	 * There should also be no way that a normal file that has NOT been deleted from 
+	 * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
+	 * when the file becomes open-unlinked. 
+	 */
+	if ((v_type == VREG) && 
+		(!ISSET(cp->c_flag, C_DELETED)) && 
+		(!ISSET(cp->c_flag, C_NOEXISTS)) &&
+		(VTOF(vp)->ff_blocks) &&
+		(reclaim == 0)) {
+		/* 
+		 * If we're called from hfs_vnop_inactive, all this means is at the time
+		 * the logic for deciding to call this function, there were not any lingering
+		 * mmap/fd references for this file.  However, there is nothing preventing the system
+		 * from creating a new reference in between the time that logic was checked
+		 * and we entered hfs_vnop_inactive.  As a result, the only time we can guarantee
+		 * that there aren't any references is during vnop_reclaim.
+		 */
+		hfs_filedone(vp, ctx, 0);
+	}
+
+	/* 
+	 * Remove any directory hints or cached origins
+	 */
+	if (v_type == VDIR) {
+		hfs_reldirhints(cp, 0);
+	}
+	if (cp->c_flag & C_HARDLINK) {
+		hfs_relorigins(cp);
+	}
+
+	/*
+	 * -- Handle open unlinked files --
+	 *
+	 * If the vnode is in use, it means a force unmount is in progress
+	 * in which case we defer cleaning up until either we come back
+	 * through here via hfs_vnop_reclaim, at which point the UBC
+	 * information will have been torn down and the vnode might no
+	 * longer be in use, or if it's still in use, it will get cleaned
+	 * up when next remounted.
+	 */
+	if (ISSET(cp->c_flag, C_DELETED) && !vnode_isinuse(vp, 0)) {
+		/*
+		 * This check is slightly complicated.  We should only truncate data 
+		 * in very specific cases for open-unlinked files.  This is because
+		 * we want to ensure that the resource fork continues to be available
+		 * if the caller has the data fork open.  However, this is not symmetric; 
+		 * someone who has the resource fork open need not be able to access the data
+		 * fork once the data fork has gone inactive.
+		 * 
+		 * If we're the last fork, then we have cleaning up to do.
+		 * 
+		 * A) last fork, and vp == c_vp
+		 *	Truncate away own fork data. If rsrc fork is not in core, truncate it too.
+		 *
+		 * B) last fork, and vp == c_rsrc_vp
+		 *	Truncate ourselves, assume data fork has been cleaned due to C).
+		 *
+		 * If we're not the last fork, then things are a little different:
+		 *
+		 * C) not the last fork, vp == c_vp
+		 *	Truncate ourselves.  Once the file has gone out of the namespace,
+		 *	it cannot be further opened.  Further access to the rsrc fork may 
+		 *	continue, however.
+		 *
+		 * D) not the last fork, vp == c_rsrc_vp
+		 *	Don't enter the block below, just clean up vnode and push it out of core.
+		 */
+	
+		if ((v_type == VREG || v_type == VLNK) && 
+				((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
+				
+			/* Truncate away our own fork data. (Case A, B, C above) */
+			if (VTOF(vp)->ff_blocks != 0) {
+				/*
+				 * SYMLINKS only:
+				 *
+				 * Encapsulate the entire change (including truncating the link) in 
+				 * nested transactions if we are modifying a symlink, because we know that its
+				 * file length will be at most 4k, and we can fit both the truncation and 
+				 * any relevant bitmap changes into a single journal transaction.  We also want
+				 * the kill_block code to execute in the same transaction so that any dirty symlink
+				 * blocks will not be written. Otherwise, rely on
+				 * hfs_truncate doing its own transactions to ensure that we don't blow up
+				 * the journal.
+				 */ 
+				if (!started_tr && (v_type == VLNK)) {
+					if (hfs_start_transaction(hfsmp) != 0) {
+						error = EINVAL;
+						goto out;
+					}
+					else {
+						started_tr = true;
+					}
+				}
+
+				/*
+				 * At this point, we have decided that this cnode is
+				 * suitable for full removal.  We are about to deallocate
+				 * its blocks and remove its entry from the catalog. 
+				 * If it was a symlink, then it's possible that the operation
+				 * which created it is still in the current transaction group
+				 * due to coalescing.  Take action here to kill the data blocks
+				 * of the symlink out of the journal before moving to 
+				 * deallocate the blocks.  We need to be in the middle of
+				 * a transaction before calling buf_iterate like this.
+				 * 
+				 * Note: we have to kill any potential symlink buffers out of 
+				 * the journal prior to deallocating their blocks.  This is so 
+				 * that we don't race with another thread that may be doing an 
+				 * an allocation concurrently and pick up these blocks. It could
+				 * generate I/O against them which could go out ahead of our journal
+				 * transaction.
+				 */
+
+				if (hfsmp->jnl && vnode_islnk(vp)) {
+					buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+				}
+
+
+				/*
+				 * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
+				 * context because we're only removing blocks, not zero-filling new 
+				 * ones.  The C_DELETED check above makes things much simpler. 
+				 */
+				error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 0, ctx);
+				if (error) {
+					goto out;
+				}
+				truncated = 1;
+
+				/* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
+				if (started_tr) {
+					hfs_end_transaction(hfsmp);
+					started_tr = false;
+				}
+
+			}
+			
+			/* 
+			 * Truncate away the resource fork, if we represent the data fork and
+			 * it is the last fork.  That means, by definition, the rsrc fork is not in 
+			 * core.  To avoid bringing a vnode into core for the sole purpose of deleting the
+			 * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
+			 * to get rid of the resource fork's data. Note that because we are holding the 
+			 * cnode lock, it is impossible for a competing thread to create the resource fork
+			 * vnode from underneath us while we do this.
+			 * 
+			 * This is invoked via case A above only.
+			 */
+			if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
+				struct cat_lookup_buffer *lookup_rsrc = NULL;
+				struct cat_desc *desc_ptr = NULL;
+				lockflags = 0;
+
+				lookup_rsrc = hfs_mallocz(sizeof(*lookup_rsrc));
+
+				if (cp->c_desc.cd_namelen == 0) {
+					/* Initialize the rsrc descriptor for lookup if necessary*/
+					MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
+					
+					lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
+					lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
+					lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+					lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;	
+					
+					desc_ptr = &lookup_rsrc->lookup_desc;
+				}
+				else {
+					desc_ptr = &cp->c_desc;	
+				}
+
+				lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+				error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, 
+						(struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
+
+				hfs_systemfile_unlock (hfsmp, lockflags);
+				
+				if (error) {
+					hfs_free(lookup_rsrc, sizeof(*lookup_rsrc));
+					goto out;
+				}
+
+				/*
+				 * Make the filefork in our temporary struct look like a real 
+				 * filefork.  Fill in the cp, sysfileinfo and rangelist fields..
+				 */
+				rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
+				lookup_rsrc->lookup_fork.ff_cp = cp;
+
+				/* 
+				 * If there were no errors, then we have the catalog's fork information 
+				 * for the resource fork in question.  Go ahead and delete the data in it now.
+				 */
+
+				error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
+				hfs_free(lookup_rsrc, sizeof(*lookup_rsrc));
+
+				if (error) {
+					goto out;
+				}
+
+				/*
+				 * This fileid's resource fork extents have now been fully deleted on-disk
+				 * and this CNID is no longer valid. At this point, we should be able to
+				 * zero out cp->c_blocks to indicate there is no data left in this file.
+				 */
+				cp->c_blocks = 0;
+			}
+		}
+
+		/*
+		 * If we represent the last fork (or none in the case of a dir), 
+		 * and the cnode has become open-unlinked...
+		 *
+		 * We check c_blocks here because it is possible in the force
+		 * unmount case for the data fork to be in use but the resource
+		 * fork to not be in use in which case we will truncate the 
+		 * resource fork, but not the data fork.  It will get cleaned
+		 * up upon next mount.
+		 */
+		if (forkcount <= 1 && !cp->c_blocks) {
+			/*
+			 * If it has EA's, then we need to get rid of them.
+			 *
+			 * Note that this must happen outside of any other transactions
+			 * because it starts/ends its own transactions and grabs its
+			 * own locks.  This is to prevent a file with a lot of attributes
+			 * from creating a transaction that is too large (which panics).
+			 */
+			if (ISSET(cp->c_attr.ca_recflags, kHFSHasAttributesMask))
+				ea_error = hfs_removeallattr(hfsmp, cp->c_fileid, &started_tr);
+
+			/*
+			 * Remove the cnode's catalog entry and release all blocks it
+			 * may have been using.
+			 */
+
+			/*
+			 * Mark cnode in transit so that no one can get this 
+			 * cnode from cnode hash.
+			 */
+			// hfs_chash_mark_in_transit(hfsmp, cp);
+			// XXXdbg - remove the cnode from the hash table since it's deleted
+			//          otherwise someone could go to sleep on the cnode and not
+			//          be woken up until this vnode gets recycled which could be
+			//          a very long time...
+			hfs_chashremove(hfsmp, cp);
+			
+			cp->c_flag |= C_NOEXISTS;   // XXXdbg
+			cp->c_rdev = 0;
+			
+			if (!started_tr) {
+				if (hfs_start_transaction(hfsmp) != 0) {
+					error = EINVAL;
+					goto out;
+				}
+				started_tr = true;
+			}
+			
+			/*
+			 * Reserve some space in the Catalog file.
+			 */
+			if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+				goto out;
+			}
+			cat_reserve = 1;
+			
+			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+			
+			if (cp->c_blocks > 0) {
+				printf("hfs_inactive: deleting non-empty%sfile %d, "
+					   "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+					   (int)cp->c_fileid, (int)cp->c_blocks);
+			}
+			
+			//
+			// release the name pointer in the descriptor so that
+			// cat_delete() will use the file-id to do the deletion.
+			// in the case of hard links this is imperative (in the
+			// case of regular files the fileid and cnid are the
+			// same so it doesn't matter).
+			//
+			cat_releasedesc(&cp->c_desc);
+			
+			/*
+			 * The descriptor name may be zero,
+			 * in which case the fileid is used.
+			 */
+			error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+			
+			if (error && truncated && (error != ENXIO)) {
+				printf("hfs_inactive: couldn't delete a truncated file!");
+			}
+			
+			/* Update HFS Private Data dir */
+			if (error == 0) {
+				hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+				if (vnode_isdir(vp)) {
+					DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+				}
+				(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+								 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+			}
+			
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			
+			if (error) {			
+				goto out;
+			}
+			
+	#if QUOTA
+			if (hfsmp->hfs_flags & HFS_QUOTAS)
+				(void)hfs_chkiq(cp, -1, NOCRED, 0);
+	#endif /* QUOTA */
+			
+			/* Already set C_NOEXISTS at the beginning of this block */
+			cp->c_flag &= ~C_DELETED;
+			cp->c_touch_chgtime = TRUE;
+			cp->c_touch_modtime = TRUE;
+			
+			if (error == 0)
+				hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
+		}
+	} // if <open unlinked>
+
+	hfs_update(vp, reclaim ? HFS_UPDATE_FORCE : 0);
+
+	/*
+	 * Since we are about to finish what might be an inactive call, propagate
+	 * any remaining modified or touch bits from the cnode to the vnode.  This
+	 * serves as a hint to vnode recycling that we shouldn't recycle this vnode
+	 * synchronously.
+	 *
+	 * For now, if the node *only* has a dirty atime, we don't mark
+	 * the vnode as dirty.  VFS's asynchronous recycling can actually
+	 * lead to worse performance than having it synchronous.  When VFS
+	 * is fixed to be more performant, we can be more honest about
+	 * marking vnodes as dirty when it's only the atime that's dirty.
+	 */
+	if (hfs_is_dirty(cp) == HFS_DIRTY || ISSET(cp->c_flag, C_DELETED)) {
+		vnode_setdirty(vp);
+	} else {
+		vnode_cleardirty(vp);
+	}
+        
+out:
+    if (cat_reserve)
+        cat_postflight(hfsmp, &cookie, p);
+	
+    if (started_tr) {
+        hfs_end_transaction(hfsmp);
+        started_tr = false;
+    }
+
+	return error;
+}
+
+
+/*
+ * hfs_vnop_inactive
+ *
+ * The last usecount on the vnode has gone away, so we need to tear down
+ * any remaining data still residing in the cnode.  If necessary, write out
+ * remaining blocks or delete the cnode's entry in the catalog.
+ */
+int
+hfs_vnop_inactive(struct vnop_inactive_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct proc *p = vfs_context_proc(ap->a_context);
+	int error = 0;
+	int took_trunc_lock = 0;
+	enum vtype v_type;
+	
+	v_type = vnode_vtype(vp);
+	cp = VTOC(vp);
+
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
+	    (hfsmp->hfs_freezing_proc == p)) {
+		error = 0;
+		goto inactive_done;
+	}	
+	
+	/*
+	 * For safety, do NOT call vnode_recycle from inside this function.  This can cause 
+	 * problems in the following scenario:
+	 * 
+	 * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
+	 * 
+	 * If we're being invoked as a result of a reclaim that was already in-flight, then we
+	 * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
+	 * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
+	 * try to re-enter reclaim again and panic.  
+	 *
+	 * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
+	 * 1) last usecount goes away on the vnode (vnode_rele)
+	 * 2) last iocount goes away on a vnode that previously had usecounts but didn't have 
+	 * 		vnode_recycle called (vnode_put)
+	 * 3) vclean by way of reclaim
+	 *
+	 * In this function we would generally want to call vnode_recycle to speed things 
+	 * along to ensure that we don't leak blocks due to open-unlinked files.  However, by 
+	 * virtue of being in this function already, we can call hfs_cnode_teardown, which 
+	 * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that 
+	 * there's no entry in the catalog and no backing store anymore.  If that's the case, 
+	 * then we really don't care all that much when the vnode actually goes through reclaim.
+	 * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
+	 * unlinked file in the first place should have already called vnode_recycle on the vnode
+	 * to guarantee that it would go through reclaim in a speedy way.
+	 */
+	
+	if (cp->c_flag & C_NOEXISTS) {
+		/* 
+		 * If the cnode has already had its cat entry removed, then 
+		 * just skip to the end. We don't need to do anything here.
+		 */
+		error = 0;
+		goto inactive_done;
+	}
+	
+	if ((v_type == VREG || v_type == VLNK)) {
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		took_trunc_lock = 1;
+	}
+	
+	(void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+	
+	/* 
+	 * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
+	 * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
+	 */
+	error = hfs_cnode_teardown (vp, ap->a_context, 0);
+
+    /*
+     * Drop the truncate lock before unlocking the cnode
+     * (which can potentially perform a vnode_put and
+     * recycle the vnode which in turn might require the
+     * truncate lock)
+     */
+	if (took_trunc_lock) {
+	    hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	}
+
+	hfs_unlock(cp);
+	
+inactive_done: 
+	
+	return error;
+}
+
+
+/*
+ * File clean-up (zero fill and shrink peof).
+ */
+
+int
+hfs_filedone(struct vnode *vp, vfs_context_t context,
+			 hfs_file_done_opts_t opts)
+{
+	struct cnode *cp;
+	struct filefork *fp;
+	struct hfsmount *hfsmp;
+	off_t leof;
+	u_int32_t blks, blocksize;
+
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+	hfsmp = VTOHFS(vp);
+	leof = fp->ff_size;
+
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
+		return (0);
+
+	hfs_flush_invalid_ranges(vp);
+
+	blocksize = VTOVCB(vp)->blockSize;
+	blks = leof / blocksize;
+	if (((off_t)blks * (off_t)blocksize) != leof)
+		blks++;
+	/*
+	 * Shrink the peof to the smallest size neccessary to contain the leof.
+	 */
+	if (blks < fp->ff_blocks) {
+		(void) hfs_truncate(vp, leof, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, context);
+	}
+
+	if (!ISSET(opts, HFS_FILE_DONE_NO_SYNC)) {
+		hfs_unlock(cp);
+		cluster_push(vp, IO_CLOSE);
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+		/*
+		 * If the hfs_truncate didn't happen to flush the vnode's
+		 * information out to disk, force it to be updated now that
+		 * all invalid ranges have been zero-filled and validated:
+		 */
+		hfs_update(vp, 0);
+	}
+
+	return (0);
+}
+
+
+/*
+ * Reclaim a cnode so that it can be used for other purposes.
+ */
+int
+hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp = NULL;
+	struct filefork *altfp = NULL;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	vfs_context_t ctx = ap->a_context;
+	int reclaim_cnode = 0;
+	int err = 0;
+	enum vtype v_type;
+	
+	v_type = vnode_vtype(vp);
+	cp = VTOC(vp);
+	
+	/* 
+	 * We don't take the truncate lock since by the time reclaim comes along,
+	 * all dirty pages have been synced and nobody should be competing
+	 * with us for this thread.
+	 */
+	(void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+	/* 
+	 * Sync to disk any remaining data in the cnode/vnode.  This includes
+	 * a call to hfs_update if the cnode has outbound data.
+	 * 
+	 * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
+	 * because the catalog entry for this cnode is already gone.
+	 */
+	if (!ISSET(cp->c_flag, C_NOEXISTS)) {
+		err = hfs_cnode_teardown(vp, ctx, 1);
+	}
+
+	/*
+	 * Keep track of an inactive hot file.  Don't bother on ssd's since
+	 * the tracking is done differently (it's done at read() time)
+	 */
+	if (!vnode_isdir(vp) &&
+	    !vnode_issystem(vp) &&
+	    !(cp->c_flag & (C_DELETED | C_NOEXISTS)) &&
+	    !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+  		(void) hfs_addhotfile(vp);
+	}
+	vnode_removefsref(vp);
+
+	/*
+	 * Find file fork for this vnode (if any)
+	 * Also check if another fork is active
+	 */
+	if (cp->c_vp == vp) {
+	        fp = cp->c_datafork;
+		altfp = cp->c_rsrcfork;
+
+		cp->c_datafork = NULL;
+		cp->c_vp = NULL;
+	} else if (cp->c_rsrc_vp == vp) {
+	        fp = cp->c_rsrcfork;
+		altfp = cp->c_datafork;
+
+		cp->c_rsrcfork = NULL;
+		cp->c_rsrc_vp = NULL;
+	} else {
+	        panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
+	}
+	/*
+	 * On the last fork, remove the cnode from its hash chain.
+	 */
+	if (altfp == NULL) {
+		/* If we can't remove it then the cnode must persist! */
+		if (hfs_chashremove(hfsmp, cp) == 0)
+			reclaim_cnode = 1;
+		/* 
+		 * Remove any directory hints
+		 */
+		if (vnode_isdir(vp)) {
+			hfs_reldirhints(cp, 0);
+		}
+		
+		if(cp->c_flag & C_HARDLINK) {
+			hfs_relorigins(cp);
+		}
+	}
+	/* Release the file fork and related data */
+	if (fp) {
+		/* Dump cached symlink data */
+		if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
+			hfs_free(fp->ff_symlinkptr, fp->ff_size);
+		}
+		rl_remove_all(&fp->ff_invalidranges);
+		hfs_zfree(fp, HFS_FILEFORK_ZONE);
+	}
+
+	/* 
+	 * If there was only one active fork then we can release the cnode.
+	 */
+	if (reclaim_cnode) {
+		hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
+		hfs_unlock(cp);
+		hfs_reclaim_cnode(hfsmp, cp);
+	} 
+	else  {
+		/* 
+		 * cnode in use.  If it is a directory, it could have 
+		 * no live forks. Just release the lock.
+		 */
+		hfs_unlock(cp);
+	}
+
+	vnode_clearfsnode(vp);
+	return (0);
+}
+
+
+extern int (**hfs_vnodeop_p) (void *);
+#if FIFO
+extern int (**hfs_fifoop_p)  (void *);
+#endif
+
+#if CONFIG_HFS_STD
+extern int (**hfs_std_vnodeop_p) (void *);
+#endif
+
+/*
+ * hfs_getnewvnode - get new default vnode
+ *
+ * The vnode is returned with an iocount and the cnode locked.  
+ * The cnode of the parent vnode 'dvp' may or may not be locked, depending on 
+ * the circumstances.   The cnode in question (if acquiring the resource fork),
+ * may also already be locked at the time we enter this function.
+ *
+ * Note that there are both input and output flag arguments to this function.  
+ * If one of the input flags (specifically, GNV_USE_VP), is set, then 
+ * hfs_getnewvnode will use the parameter *vpp, which is traditionally only 
+ * an output parameter, as both an input and output parameter.  It will use 
+ * the vnode provided in the output, and pass it to vnode_create with the 
+ * proper flavor so that a new vnode is _NOT_ created on our behalf when 
+ * we dispatch to VFS.  This may be important in various HFS vnode creation
+ * routines, such a create or get-resource-fork, because we risk deadlock if
+ * jetsam is involved.
+ *
+ * Deadlock potential exists if jetsam is synchronously invoked while we are waiting
+ * for a vnode to be recycled in order to give it the identity we want.  If jetsam
+ * happens to target a process for termination that is blocked in-kernel, waiting to 
+ * acquire the cnode lock on our parent 'dvp', while our current thread has it locked, 
+ * neither side will make forward progress and the watchdog timer will eventually fire. 
+ * To prevent this, a caller of hfs_getnewvnode may choose to proactively force 
+ * any necessary vnode reclamation/recycling while it is not holding any locks and 
+ * thus not prone to deadlock.  If this is the case, GNV_USE_VP will be set and
+ * the parameter will be used as described above. 
+ *
+ *  !!! <NOTE> !!!!
+ * In circumstances when GNV_USE_VP is set, this function _MUST_ clean up and either consume
+ * or dispose of the provided vnode. We funnel all errors to a single return value so that
+ * if provided_vp is still non-NULL, then we will dispose of the vnode. This will occur in
+ * all error cases of this function --  anywhere we zero/NULL out the *vpp parameter. It may 
+ * also occur if the current thread raced with another to create the same vnode, and we 
+ * find the entry already present in the cnode hash.
+ * !!! </NOTE> !!!
+ */
+int
+hfs_getnewvnode(
+	struct hfsmount *hfsmp,
+	struct vnode *dvp,
+	struct componentname *cnp,
+	struct cat_desc *descp,
+	int flags,
+	struct cat_attr *attrp,
+	struct cat_fork *forkp,
+	struct vnode **vpp,
+	int *out_flags)
+{
+	struct mount *mp = HFSTOVFS(hfsmp);
+	struct vnode *vp = NULL;
+	struct vnode **cvpp;
+	struct vnode *tvp = NULLVP;
+	struct cnode *cp = NULL;
+	struct filefork *fp = NULL;
+	int hfs_standard = 0;
+	int retval = 0;
+	int issystemfile;
+	int wantrsrc;
+	int hflags = 0;
+	int need_update_identity = 0;
+	struct vnode_fsparam vfsp;
+	enum vtype vtype;
+
+	struct vnode *provided_vp = NULL;
+
+
+#if QUOTA
+	int i;
+#endif /* QUOTA */
+	
+	hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
+
+	if (flags & GNV_USE_VP) {
+		/* Store the provided VP for later use */
+		provided_vp = *vpp;
+	}
+
+	/* Zero out the vpp regardless of provided input */
+	*vpp = NULL;
+
+	/* Zero out the out_flags */
+	*out_flags = 0;
+
+	if (attrp->ca_fileid == 0) {
+		retval = ENOENT;
+		goto gnv_exit;
+	}
+
+#if !FIFO
+	if (IFTOVT(attrp->ca_mode) == VFIFO) {
+		retval = ENOTSUP;
+		goto gnv_exit;
+	}
+#endif /* !FIFO */
+	vtype = IFTOVT(attrp->ca_mode);
+	issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
+	wantrsrc = flags & GNV_WANTRSRC;
+
+	/* Sanity checks: */
+	if (vtype == VBAD ||
+		(vtype != VDIR && forkp &&
+			(attrp->ca_blocks < forkp->cf_blocks ||
+			 howmany((uint64_t)forkp->cf_size, hfsmp->blockSize) > forkp->cf_blocks ||
+			 (vtype == VLNK && (uint64_t)forkp->cf_size > MAXPATHLEN)))) {
+		/* Mark the FS as corrupt and bail out */
+		hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
+		retval = EINVAL;
+		goto gnv_exit;
+	}
+	
+#ifdef HFS_CHECK_LOCK_ORDER
+	/*
+	 * The only case where it's permissible to hold the parent cnode
+	 * lock is during a create operation (hfs_makenode) or when
+	 * we don't need the cnode lock (GNV_SKIPLOCK).
+	 */
+	if ((dvp != NULL) &&
+	    (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
+	    VTOC(dvp)->c_lockowner == current_thread()) {
+		panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
+	}
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+	/*
+	 * Get a cnode (new or existing)
+	 */
+	cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc, 
+							(flags & GNV_SKIPLOCK), out_flags, &hflags);
+
+	/*
+	 * If the id is no longer valid for lookups we'll get back a NULL cp.
+	 */
+	if (cp == NULL) {
+		retval = ENOENT;
+		goto gnv_exit;
+	}
+	/*
+	 * We may have been provided a vnode via 
+	 * GNV_USE_VP.  In this case, we have raced with
+	 * a 2nd thread to create the target vnode. The provided
+	 * vnode that was passed in will be dealt with at the 
+	 * end of the function, as we don't zero out the field
+	 * until we're ready to pass responsibility to VFS. 
+	 */
+
+
+	/* 
+	 * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the 
+	 * descriptor in the cnode as needed if the cnode represents a hardlink.  
+	 * We want the caller to get the most up-to-date copy of the descriptor
+	 * as possible. However, we only do anything here if there was a valid vnode.
+	 * If there isn't a vnode, then the cnode is brand new and needs to be initialized
+	 * as it doesn't have a descriptor or cat_attr yet.
+	 * 
+	 * If we are about to replace the descriptor with the user-supplied one, then validate
+	 * that the descriptor correctly acknowledges this item is a hardlink.  We could be
+	 * subject to a race where the calling thread invoked cat_lookup, got a valid lookup 
+	 * result but the file was not yet a hardlink. With sufficient delay between there
+	 * and here, we might accidentally copy in the raw inode ID into the descriptor in the
+	 * call below.  If the descriptor's CNID is the same as the fileID then it must
+	 * not yet have been a hardlink when the lookup occurred.
+	 */
+	
+	if (!(hfs_checkdeleted(cp))) {
+		//
+		// If the bytes of the filename in the descp do not match the bytes in the
+		// cnp (and we're not looking up the resource fork), then we want to update
+		// the vnode identity to contain the bytes that HFS stores so that when an
+		// fsevent gets generated, it has the correct filename.  otherwise daemons
+		// that match filenames produced by fsevents with filenames they have stored
+		// elsewhere (e.g. bladerunner, backupd, mds), the filenames will not match.
+		// See: <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+		// for more details.
+		//
+#ifdef CN_WANTSRSRCFORK
+		if (*vpp && cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) {
+#else
+		if (*vpp && cnp && cnp->cn_nameptr && descp && descp->cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)descp->cd_nameptr, descp->cd_namelen) != 0) {
+#endif
+			vnode_update_identity (*vpp, dvp, (const char *)descp->cd_nameptr, descp->cd_namelen, 0, VNODE_UPDATE_NAME);
+		}
+		if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
+			/* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
+			if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
+					(attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
+				
+				if ((flags & GNV_SKIPLOCK) == 0) {
+					/* 
+					 * Then we took the lock. Drop it before calling
+					 * vnode_put, which may invoke hfs_vnop_inactive and need to take 
+					 * the cnode lock again.
+					 */
+					hfs_unlock(cp);
+				}
+				
+				/* 
+				 * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to 
+				 * force a re-drive in the lookup routine.  
+				 * Drop the iocount on the vnode obtained from 
+				 * chash_getcnode if needed. 
+				 */	
+				if (*vpp != NULL) {
+					vnode_put (*vpp);
+					*vpp = NULL;
+				}
+				
+				/*
+				 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
+				 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
+				 * the hash code peeks at those fields without holding the cnode lock because
+				 * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
+				 * call above.  Since we're bailing out, unset whatever flags we just set, and
+				 * wake up all waiters for this cnode.
+				 */
+				if (hflags) {
+					hfs_chashwakeup(hfsmp, cp, hflags);
+				}
+				
+				*out_flags = GNV_CAT_ATTRCHANGED;
+				retval = ERECYCLE;
+				goto gnv_exit;
+			}
+			else {
+				/* 
+				 * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor. 
+				 *
+				 * Replacing the descriptor here is fine because we looked up the item without
+				 * a vnode in hand before.  If a vnode existed, its identity must be attached to this
+				 * item.  We are not susceptible to the lookup fastpath issue at this point.
+				 */
+				replace_desc(cp, descp);
+
+				/*
+				 * This item was a hardlink, and its name needed to be updated. By replacing the 
+				 * descriptor above, we've now updated the cnode's internal representation of
+				 * its link ID/CNID, parent ID, and its name.  However, VFS must now be alerted
+				 * to the fact that this vnode now has a new parent, since we cannot guarantee
+				 * that the new link lived in the same directory as the alternative name for
+				 * this item.  
+				 */
+				if ((*vpp != NULL) && (cnp || cp->c_desc.cd_nameptr)) {
+					/* we could be requesting the rsrc of a hardlink file... */
+#ifdef CN_WANTSRSRCFORK
+					if (cp->c_desc.cd_nameptr && (cnp == NULL || !(cnp->cn_flags & CN_WANTSRSRCFORK))) {
+#else
+					if (cp->c_desc.cd_nameptr) {
+#endif
+						//
+						// Update the identity with what we have stored on disk as
+						// the name of this file.  This is related to:
+						//    <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+						//
+						vnode_update_identity (*vpp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0,
+							       (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
+					} else if (cnp) {
+						vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash,
+								       (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
+					}
+				}
+			}
+		}
+	}
+	
+	/* 
+	 * At this point, we have performed hardlink and open-unlinked checks
+	 * above.  We have now validated the state of the vnode that was given back
+	 * to us from the cnode hash code and find it safe to return. 
+	 */
+	if (*vpp != NULL) {
+		retval = 0;
+		goto gnv_exit;
+	}
+
+	/*
+	 * If this is a new cnode then initialize it.
+	 */
+	if (ISSET(cp->c_hflag, H_ALLOC)) {
+		lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
+#if HFS_COMPRESSION
+		cp->c_decmp = NULL;
+#endif
+
+		/* Make sure its still valid (ie exists on disk). */
+		if (!(flags & GNV_CREATE)) {
+			int error = 0;
+			if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
+				hfs_chash_abort(hfsmp, cp);
+				if ((flags & GNV_SKIPLOCK) == 0) {
+					hfs_unlock(cp);
+				}
+				hfs_reclaim_cnode(hfsmp, cp);
+				*vpp = NULL;
+				/* 
+				 * If we hit this case, that means that the entry was there in the catalog when
+				 * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
+				 * that we checked the catalog and the time we went to get a vnode/cnode for it,
+				 * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
+				 * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
+				 * an ENOENT.  To indicate to the caller that they should really double-check the
+				 * entry (it could have been renamed over and gotten a new fileid), we mark a bit
+				 * in the output flags.
+				 */
+				if (error == ENOENT) {
+					*out_flags = GNV_CAT_DELETED;
+					retval = ENOENT;
+					goto gnv_exit;
+				}
+
+				/*
+				 * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
+				 * this function as an argument because the catalog may have changed w.r.t hardlink
+				 * link counts and the firstlink field.  If that validation check fails, then let 
+				 * lookup re-drive itself to get valid/consistent data with the same failure condition below.
+				 */
+				if (error == ERECYCLE) {
+					*out_flags = GNV_CAT_ATTRCHANGED;
+					retval = ERECYCLE;
+					goto gnv_exit;
+				}
+			}
+		}
+		bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
+		bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
+
+		/* The name was inherited so clear descriptor state... */
+		descp->cd_namelen = 0;
+		descp->cd_nameptr = NULL;
+		descp->cd_flags &= ~CD_HASBUF;
+
+		/* Tag hardlinks */
+		if ((vtype == VREG || vtype == VDIR
+			 || vtype == VSOCK || vtype == VFIFO)
+			&& (descp->cd_cnid != attrp->ca_fileid
+				|| ISSET(attrp->ca_recflags, kHFSHasLinkChainMask))) {
+			cp->c_flag |= C_HARDLINK;
+		}
+		/*
+		 * Fix-up dir link counts.
+		 *
+		 * Earlier versions of Leopard used ca_linkcount for posix
+		 * nlink support (effectively the sub-directory count + 2).
+		 * That is now accomplished using the ca_dircount field with
+		 * the corresponding kHFSHasFolderCountMask flag.
+		 *
+		 * For directories the ca_linkcount is the true link count,
+		 * tracking the number of actual hardlinks to a directory.
+		 *
+		 * We only do this if the mount has HFS_FOLDERCOUNT set;
+		 * at the moment, we only set that for HFSX volumes.
+		 */
+		if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) && 
+		    (vtype == VDIR) &&
+		    !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
+		    (cp->c_attr.ca_linkcount > 1)) {
+			if (cp->c_attr.ca_entries == 0)
+				cp->c_attr.ca_dircount = 0;
+			else
+				cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
+
+			cp->c_attr.ca_linkcount = 1;
+			cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
+			if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
+				cp->c_flag |= C_MODIFIED;
+		}
+#if QUOTA
+		if (hfsmp->hfs_flags & HFS_QUOTAS) {
+			for (i = 0; i < MAXQUOTAS; i++)
+				cp->c_dquot[i] = NODQUOT;
+		}
+#endif /* QUOTA */
+		/* Mark the output flag that we're vending a new cnode */
+		*out_flags |= GNV_NEW_CNODE;
+	}
+
+	if (vtype == VDIR) {
+	        if (cp->c_vp != NULL)
+		        panic("hfs_getnewvnode: orphaned vnode (data)");
+		cvpp = &cp->c_vp;
+	} else {
+		/*
+		 * Allocate and initialize a file fork...
+		 */
+		fp = hfs_zalloc(HFS_FILEFORK_ZONE);
+		fp->ff_cp = cp;
+		if (forkp)
+			bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
+		else
+			bzero(&fp->ff_data, sizeof(struct cat_fork));
+		rl_init(&fp->ff_invalidranges);
+		fp->ff_sysfileinfo = 0;
+
+		if (wantrsrc) {
+			if (cp->c_rsrcfork != NULL)
+				panic("hfs_getnewvnode: orphaned rsrc fork");
+			if (cp->c_rsrc_vp != NULL)
+			        panic("hfs_getnewvnode: orphaned vnode (rsrc)");
+			cp->c_rsrcfork = fp;
+			cvpp = &cp->c_rsrc_vp;
+			if ( (tvp = cp->c_vp) != NULLVP )
+			        cp->c_flag |= C_NEED_DVNODE_PUT;
+		} else {
+			if (cp->c_datafork != NULL)
+				panic("hfs_getnewvnode: orphaned data fork");
+			if (cp->c_vp != NULL)
+			        panic("hfs_getnewvnode: orphaned vnode (data)");
+			cp->c_datafork = fp;
+			cvpp = &cp->c_vp;
+			if ( (tvp = cp->c_rsrc_vp) != NULLVP)
+			        cp->c_flag |= C_NEED_RVNODE_PUT;
+		}
+	}
+	if (tvp != NULLVP) {
+	        /*
+		 * grab an iocount on the vnode we weren't
+		 * interested in (i.e. we want the resource fork
+		 * but the cnode already has the data fork)
+		 * to prevent it from being
+		 * recycled by us when we call vnode_create
+		 * which will result in a deadlock when we
+		 * try to take the cnode lock in hfs_vnop_fsync or
+		 * hfs_vnop_reclaim... vnode_get can be called here
+		 * because we already hold the cnode lock which will
+		 * prevent the vnode from changing identity until
+		 * we drop it.. vnode_get will not block waiting for
+		 * a change of state... however, it will return an
+		 * error if the current iocount == 0 and we've already
+		 * started to terminate the vnode... we don't need/want to
+		 * grab an iocount in the case since we can't cause
+		 * the fileystem to be re-entered on this thread for this vp
+		 *
+		 * the matching vnode_put will happen in hfs_unlock
+		 * after we've dropped the cnode lock
+		 */
+	        if ( vnode_get(tvp) != 0)
+		        cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
+	}
+	vfsp.vnfs_mp = mp;
+	vfsp.vnfs_vtype = vtype;
+	vfsp.vnfs_str = "hfs";
+	if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
+		vfsp.vnfs_dvp = NULL;  /* no parent for me! */
+		vfsp.vnfs_cnp = NULL;  /* no name for me! */
+	} else {
+		vfsp.vnfs_dvp = dvp;
+		vfsp.vnfs_cnp = cnp;
+	}
+
+	vfsp.vnfs_fsnode = cp;
+
+	/*
+	 * Special Case HFS Standard VNOPs from HFS+, since
+	 * HFS standard is readonly/deprecated as of 10.6 
+	 */
+
+#if FIFO
+	if (vtype == VFIFO ) 
+		vfsp.vnfs_vops = hfs_fifoop_p;
+	else
+#endif
+	if (vtype == VBLK || vtype == VCHR)
+		vfsp.vnfs_vops = hfs_specop_p;
+#if CONFIG_HFS_STD
+	else if (hfs_standard)
+		vfsp.vnfs_vops = hfs_std_vnodeop_p;
+#endif
+	else 
+		vfsp.vnfs_vops = hfs_vnodeop_p;
+
+	if (vtype == VBLK || vtype == VCHR)
+		vfsp.vnfs_rdev = attrp->ca_rdev;
+	else
+		vfsp.vnfs_rdev = 0;
+
+	if (forkp) 
+		vfsp.vnfs_filesize = forkp->cf_size;
+	else
+		vfsp.vnfs_filesize = 0;
+
+	vfsp.vnfs_flags = VNFS_ADDFSREF;
+#ifdef CN_WANTSRSRCFORK
+	if (cnp && cnp->cn_nameptr && !(cnp->cn_flags & CN_WANTSRSRCFORK) && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) {
+#else
+	if (cnp && cnp->cn_nameptr && cp->c_desc.cd_nameptr && strncmp((const char *)cnp->cn_nameptr, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0) {
+#endif
+		//
+		// We don't want VFS to add an entry for this vnode because the name in the
+		// cnp does not match the bytes stored on disk for this file.  Instead we'll
+		// update the identity later after the vnode is created and we'll do so with
+		// the correct bytes for this filename.  For more details, see:
+		//   <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+		//
+		vfsp.vnfs_flags |= VNFS_NOCACHE;
+		need_update_identity = 1;
+	} else if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE)) {
+		vfsp.vnfs_flags |= VNFS_NOCACHE;
+	}
+
+	/* Tag system files */
+	vfsp.vnfs_marksystem = issystemfile;
+
+	/* Tag root directory */
+	if (descp->cd_cnid == kHFSRootFolderID)
+		vfsp.vnfs_markroot = 1;
+	else	
+		vfsp.vnfs_markroot = 0;
+	
+	/*
+	 * If provided_vp was non-NULL, then it is an already-allocated (but not 
+	 * initialized) vnode. We simply need to initialize it to this identity.  
+	 * If it was NULL, then assume that we need to call vnode_create with the 
+	 * normal arguments/types.
+	 */ 
+	if (provided_vp) {
+		vp = provided_vp;
+		/* 
+		 * After we assign the value of provided_vp into 'vp' (so that it can be
+		 * mutated safely by vnode_initialize), we can NULL it out.  At this point, the disposal
+		 * and handling of the provided vnode will be the responsibility of VFS, which will
+		 * clean it up and vnode_put it properly if vnode_initialize fails. 
+		 */
+		provided_vp = NULL;
+
+		retval = vnode_initialize (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp);
+		/* See error handling below for resolving provided_vp */
+	}
+	else {
+		/* Do a standard vnode_create */
+		retval = vnode_create (VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp);
+	}
+
+	/* 
+	 * We used a local variable to hold the result of vnode_create/vnode_initialize so that
+	 * on error cases in vnode_create we won't accidentally harm the cnode's fields
+	 */
+	
+	if (retval) {
+		/* Clean up if we encountered an error */	
+		if (fp) {
+			if (fp == cp->c_datafork)
+				cp->c_datafork = NULL;
+			else
+				cp->c_rsrcfork = NULL;
+
+			hfs_zfree(fp, HFS_FILEFORK_ZONE);
+		}
+		/*
+		 * If this is a newly created cnode or a vnode reclaim
+		 * occurred during the attachment, then cleanup the cnode.
+		 */
+		if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
+			hfs_chash_abort(hfsmp, cp);
+			hfs_reclaim_cnode(hfsmp, cp);
+		} 
+		else {
+			hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
+			if ((flags & GNV_SKIPLOCK) == 0){
+				hfs_unlock(cp);
+			}
+		}
+		*vpp = NULL;
+		goto gnv_exit;
+	}
+
+	/* If no error, then assign the value into the cnode's fields  */	
+	*cvpp = vp;
+
+	vnode_settag(vp, VT_HFS);
+	if (cp->c_flag & C_HARDLINK) {
+		vnode_setmultipath(vp);
+	}
+
+	if (cp->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
+		vnode_setfastdevicecandidate(vp);
+	}
+
+	if (cp->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+		vnode_setautocandidate(vp);
+	}
+
+
+
+
+	if (vp && need_update_identity) {
+		//
+		// As above, update the name of the vnode if the bytes stored in hfs do not match
+		// the bytes in the cnp.  See this radar:
+		//    <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
+		// for more details.
+		//
+		vnode_update_identity (vp, dvp, (const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, VNODE_UPDATE_NAME);
+	}
+
+	/*
+	 * Tag resource fork vnodes as needing an VNOP_INACTIVE
+	 * so that any deferred removes (open unlinked files)
+	 * have the chance to process the resource fork.
+	 */
+	if (VNODE_IS_RSRC(vp)) {
+		int err;
+
+		KDBG(HFSDBG_GETNEWVNODE, kdebug_vnode(cp->c_vp), kdebug_vnode(cp->c_rsrc_vp));
+
+		/* Force VL_NEEDINACTIVE on this vnode */
+		err = vnode_ref(vp);
+		if (err == 0) {
+			vnode_rele(vp);
+		}
+	}
+	hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
+
+	/*
+	 * Stop tracking an active hot file.
+	 */
+	if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile && !(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+		(void) hfs_removehotfile(vp);
+	}
+	
+#if CONFIG_PROTECT
+	/* Initialize the cp data structures. The key should be in place now. */
+	if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
+		cp_entry_init(cp, mp);
+	}
+#endif
+
+	*vpp = vp;
+	retval = 0;
+
+gnv_exit:
+	if (provided_vp) {
+		/* Release our empty vnode if it was not used */
+		vnode_put (provided_vp);
+	}
+	return retval;
+}
+
+
+static void
+hfs_reclaim_cnode(hfsmount_t *hfsmp, struct cnode *cp)
+{
+#if QUOTA
+	int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (cp->c_dquot[i] != NODQUOT) {
+			dqreclaim(cp->c_dquot[i]);
+			cp->c_dquot[i] = NODQUOT;
+		}
+	}
+#endif /* QUOTA */
+
+	/* 
+	 * If the descriptor has a name then release it
+	 */
+	if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
+		const char *nameptr;
+
+		nameptr = (const char *) cp->c_desc.cd_nameptr;
+		cp->c_desc.cd_nameptr = 0;
+		cp->c_desc.cd_flags &= ~CD_HASBUF;
+		cp->c_desc.cd_namelen = 0;
+		vfs_removename(nameptr);
+	}
+	
+	/*
+	 * We only call this function if we are in hfs_vnop_reclaim and 
+	 * attempting to reclaim a cnode with only one live fork.  Because the vnode
+	 * went through reclaim, any future attempts to use this item will have to
+	 * go through lookup again, which will need to create a new vnode.  Thus,
+	 * destroying the locks below is safe.
+	 */	
+	
+	lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
+	lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
+#if HFS_COMPRESSION
+	if (cp->c_decmp) {
+		decmpfs_cnode_destroy(cp->c_decmp);
+		decmpfs_cnode_free(cp->c_decmp);
+	}
+#endif
+#if CONFIG_PROTECT
+	cp_entry_destroy(hfsmp, cp->c_cpentry);
+	cp->c_cpentry = NULL;
+#else
+	(void)hfsmp;	// Prevent compiler warning
+#endif
+
+	hfs_zfree(cp, HFS_CNODE_ZONE);
+}
+
+
+/*
+ * hfs_valid_cnode
+ *
+ * This function is used to validate data that is stored in-core against what is contained
+ * in the catalog.  Common uses include validating that the parent-child relationship still exist
+ * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
+ * the point of the check.
+ */
+int
+hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, 
+		cnid_t cnid, struct cat_attr *cattr, int *error)
+{
+	struct cat_attr attr;
+	struct cat_desc cndesc;
+	int stillvalid = 0;
+	int lockflags;
+
+	/* System files are always valid */
+	if (cnid < kHFSFirstUserCatalogNodeID) {
+		*error = 0;
+		return (1);
+	}
+
+	/* XXX optimization:  check write count in dvp */
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	if (dvp && cnp) {
+		int lookup = 0;
+		struct cat_fork fork;
+		bzero(&cndesc, sizeof(cndesc));
+		cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+		cndesc.cd_namelen = cnp->cn_namelen;
+		cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
+		cndesc.cd_hint = VTOC(dvp)->c_childhint;
+
+		/* 
+		 * We have to be careful when calling cat_lookup.  The result argument
+		 * 'attr' may get different results based on whether or not you ask
+		 * for the filefork to be supplied as output.  This is because cat_lookupbykey
+		 * will attempt to do basic validation/smoke tests against the resident
+		 * extents if there are no overflow extent records, but it needs someplace
+		 * in memory to store the on-disk fork structures.
+		 *
+		 * Since hfs_lookup calls cat_lookup with a filefork argument, we should
+		 * do the same here, to verify that block count differences are not
+		 * due to calling the function with different styles.  cat_lookupbykey
+		 * will request the volume be fsck'd if there is true on-disk corruption
+		 * where the number of blocks does not match the number generated by 
+		 * summing the number of blocks in the resident extents.
+		 */
+		
+		lookup = cat_lookup (hfsmp, &cndesc, 0, 0, NULL, &attr, &fork, NULL);
+
+		if ((lookup == 0) && (cnid == attr.ca_fileid)) {
+			stillvalid = 1;
+			*error = 0;
+		}
+		else {
+			*error = ENOENT;
+		}
+	
+		/*
+		 * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation 
+		 * race.  Specifically, if there is no vnode/cnode pair for the directory entry 
+		 * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
+		 * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
+		 * changing in between the time we do the cat_lookup there and the time we re-grab the 
+		 * catalog lock above to do another cat_lookup. 
+		 * 
+		 * However, we need to check more than just the CNID and parent-child name relationships above.  
+		 * Hardlinks can suffer the same race in the following scenario:  Suppose we do a 
+		 * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have 
+		 * the cat_attr in hand (passed in above).  But in between then and now, the vnode was 
+		 * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get 
+		 * a chance to do anything.  This is possible if there are a lot of threads thrashing around
+		 * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
+		 * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is 
+		 * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
+		 * already exists, as it does in the case of rename and delete. 
+		 */ 
+		if (stillvalid && cattr != NULL) {
+			if (cattr->ca_linkcount != attr.ca_linkcount) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+			
+			if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+
+			if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+
+			if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+		}
+	} else {
+		if (cat_idlookup(hfsmp, cnid, 0, 0, NULL, NULL, NULL) == 0) {
+			stillvalid = 1;
+			*error = 0;
+		}
+		else {
+			*error = ENOENT;
+		}
+	}
+notvalid:
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	return (stillvalid);
+}
+
+
+/*
+ * Per HI and Finder requirements, HFS should add in the
+ * date/time that a particular directory entry was added 
+ * to the containing directory. 
+ * This is stored in the extended Finder Info for the 
+ * item in question.
+ *
+ * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
+ * We must ignore user attempts to set this part of the finderinfo, and
+ * so we need to save a local copy of the date added, write in the user 
+ * finderinfo, then stuff the value back in.  
+ */
+void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
+	u_int8_t *finfo = NULL;
+
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = (u_int8_t*)attrp->ca_finderinfo;
+	finfo = finfo + 16;
+
+	/* 
+	 * Make sure to write it out as big endian, since that's how
+	 * finder info is defined.  
+	 * 
+	 * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
+	 */
+	if (S_ISREG(attrp->ca_mode)) {
+		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+		extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+		attrp->ca_recflags |= kHFSHasDateAddedMask; 
+	}
+	else if (S_ISDIR(attrp->ca_mode)) {
+		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+		extinfo->date_added = OSSwapHostToBigInt32(dateadded);		
+				attrp->ca_recflags |= kHFSHasDateAddedMask; 
+	}
+	/* If it were neither directory/file, then we'd bail out */
+	return;
+}
+
+static u_int32_t
+hfs_get_dateadded_internal(const uint8_t *finderinfo, mode_t mode)
+{
+	const uint8_t *finfo = NULL;
+	u_int32_t dateadded = 0;
+
+
+
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = finderinfo + 16;
+
+	/* 
+	 * FinderInfo is written out in big endian... make sure to convert it to host
+	 * native before we use it.
+	 */
+	if (S_ISREG(mode)) {
+		const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
+		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
+	}
+	else if (S_ISDIR(mode)) {
+		const struct FndrExtendedDirInfo *extinfo = (const struct FndrExtendedDirInfo *)finfo;
+		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
+	}
+
+	return dateadded;
+}
+
+u_int32_t
+hfs_get_dateadded(struct cnode *cp)
+{
+	if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
+		/* Date added was never set.  Return 0. */
+		return (0);
+	}
+
+	return (hfs_get_dateadded_internal((u_int8_t*)cp->c_finderinfo,
+	    cp->c_attr.ca_mode));
+}
+
+u_int32_t
+hfs_get_dateadded_from_blob(const uint8_t *finderinfo, mode_t mode)
+{
+	return (hfs_get_dateadded_internal(finderinfo, mode));
+}
+
+/*
+ * Per HI and Finder requirements, HFS maintains a "write/generation
+ * count" for each file that is incremented on any write & pageout.
+ * It should start at 1 to reserve "0" as a special value.  If it
+ * should ever wrap around, it will skip using 0.
+ *
+ * Note that finderinfo is manipulated in hfs_vnop_setxattr and care
+ * is and should be taken to ignore user attempts to set the part of
+ * the finderinfo that records the generation counter.
+ *
+ * Any change to the generation counter *must* not be visible before
+ * the change that caused it (for obvious reasons), and given the
+ * limitations of our current architecture, the change to the
+ * generation counter may occur some time afterwards (particularly in
+ * the case where a file is mapped writable---more on that below).
+ *
+ * We make no guarantees about the consistency of a file.  In other
+ * words, a reader that is operating concurrently with a writer might
+ * see some, but not all of writer's changes, and the generation
+ * counter will *not* necessarily tell you this has happened.  To
+ * enforce consistency, clients must make their own arrangements
+ * e.g. use file locking.
+ *
+ * We treat files that are mapped writable as a special case: when
+ * that happens, clients requesting the generation count will be told
+ * it has a generation count of zero and they use that knowledge as a
+ * hint that the file is changing and it therefore might be prudent to
+ * wait until it is no longer mapped writable.  Clients should *not*
+ * rely on this behaviour however; we might decide that it's better
+ * for us to publish the fact that a file is mapped writable via
+ * alternate means and return the generation counter when it is mapped
+ * writable as it still has some, albeit limited, use.  We reserve the
+ * right to make this change.
+ *
+ * Lastly, it's important to realise that because data and metadata
+ * take different paths through the system, it's possible upon crash
+ * or sudden power loss and after a restart, that a change may be
+ * visible to the rest of the system without a corresponding change to
+ * the generation counter.  The reverse may also be true, but for all
+ * practical applications this shouldn't be an issue.
+ */
+void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) {
+	u_int8_t *finfo = NULL;
+
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = (u_int8_t*)attrp->ca_finderinfo;
+	finfo = finfo + 16;
+
+	/* 
+	 * Make sure to write it out as big endian, since that's how
+	 * finder info is defined.  
+	 *
+	 * Generation count is only supported for files.
+	 */
+	if (S_ISREG(attrp->ca_mode)) {
+		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+		extinfo->write_gen_counter = OSSwapHostToBigInt32(gencount);
+	}
+
+	/* If it were neither directory/file, then we'd bail out */
+	return;
+}
+
+/*
+ * Increase the gen count by 1; if it wraps around to 0, increment by
+ * two.  The cnode *must* be locked exclusively by the caller.  
+ *
+ * You may think holding the lock is unnecessary because we only need
+ * to change the counter, but consider this sequence of events: thread
+ * A calls hfs_incr_gencount and the generation counter is 2 upon
+ * entry.  A context switch occurs and thread B increments the counter
+ * to 3, thread C now gets the generation counter (for whatever
+ * purpose), and then another thread makes another change and the
+ * generation counter is incremented again---it's now 4.  Now thread A
+ * continues and it sets the generation counter back to 3.  So you can
+ * see, thread C would miss the change that caused the generation
+ * counter to increment to 4 and for this reason the cnode *must*
+ * always be locked exclusively.
+ */
+uint32_t hfs_incr_gencount (struct cnode *cp) {
+	u_int8_t *finfo = NULL;
+	u_int32_t gcount = 0;
+
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = (u_int8_t*)cp->c_finderinfo;
+	finfo = finfo + 16;
+
+	/* 
+	 * FinderInfo is written out in big endian... make sure to convert it to host
+	 * native before we use it.
+	 *
+	 * NOTE: the write_gen_counter is stored in the same location in both the
+	 *       FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
+	 *       last 32-bit word) so it is safe to have one code path here.
+	 */
+	if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode)) {
+		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+		gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
+
+		/* Was it zero to begin with (file originated in 10.8 or earlier?) */
+		if (gcount == 0) {
+			gcount++;
+		}
+
+		/* now bump it */
+		gcount++;
+
+		/* Did it wrap around ? */
+		if (gcount == 0) {
+			gcount++;
+		}
+		extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount);
+
+		SET(cp->c_flag, C_MINOR_MOD);
+	}
+	else {
+		gcount = 0;
+	}	
+
+	return gcount;
+}
+
+/*
+ * There is no need for any locks here (other than an iocount on an
+ * associated vnode) because reading and writing an aligned 32 bit
+ * integer should be atomic on all platforms we support.
+ */
+static u_int32_t
+hfs_get_gencount_internal(const uint8_t *finderinfo, mode_t mode)
+{
+	const uint8_t *finfo = NULL;
+	u_int32_t gcount = 0;
+
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = finderinfo;
+	finfo = finfo + 16;
+
+	/* 
+	 * FinderInfo is written out in big endian... make sure to convert it to host
+	 * native before we use it.
+	 *
+	 * NOTE: the write_gen_counter is stored in the same location in both the
+	 *       FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
+	 *       last 32-bit word) so it is safe to have one code path here.
+	 */
+	if (S_ISDIR(mode) || S_ISREG(mode)) {
+		const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
+		gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter);
+		
+		/* 
+		 * Is it zero?  File might originate in 10.8 or earlier. We lie and bump it to 1,
+		 * since the incrementer code is able to handle this case and will double-increment
+		 * for us.
+		 */
+		if (gcount == 0) {
+			gcount++;	
+		}
+	}
+
+	return gcount;
+}
+
+/* Getter for the gen count */
+u_int32_t hfs_get_gencount (struct cnode *cp) {
+	return hfs_get_gencount_internal(cp->c_finderinfo, cp->c_attr.ca_mode);
+}
+
+/* Getter for the gen count from a buffer (currently pointer to finderinfo)*/
+u_int32_t hfs_get_gencount_from_blob (const uint8_t *finfoblob, mode_t mode) {
+	return hfs_get_gencount_internal(finfoblob, mode);
+}
+
+void hfs_clear_might_be_dirty_flag(cnode_t *cp)
+{
+	/*
+	 * If we're about to touch both mtime and ctime, we can clear the
+	 * C_MIGHT_BE_DIRTY_FROM_MAPPING since we can guarantee that
+	 * subsequent page-outs can only be for data made dirty before
+	 * now.
+	 */
+	CLR(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING);
+}
+
+/*
+ * Touch cnode times based on c_touch_xxx flags
+ *
+ * cnode must be locked exclusive
+ *
+ * This will also update the volume modify time
+ */
+void
+hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
+{
+	vfs_context_t ctx;
+
+	if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY) || ISSET(cp->c_flag, C_NOEXISTS)) {
+		cp->c_touch_acctime = FALSE;
+		cp->c_touch_chgtime = FALSE;
+		cp->c_touch_modtime = FALSE;
+		CLR(cp->c_flag, C_NEEDS_DATEADDED);
+		return;
+	}
+#if CONFIG_HFS_STD
+	else if (hfsmp->hfs_flags & HFS_STANDARD) {
+	/* HFS Standard doesn't support access times */
+		cp->c_touch_acctime = FALSE;
+	}
+#endif
+
+	ctx = vfs_context_current();
+	/*
+	 * Skip access time updates if:
+	 *	. MNT_NOATIME is set
+	 *	. a file system freeze is in progress
+	 *	. a file system resize is in progress
+	 *	. the vnode associated with this cnode is marked for rapid aging
+	 */
+	if (cp->c_touch_acctime) {
+		if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
+		    hfsmp->hfs_freeze_state != HFS_THAWED ||
+		    (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
+		    (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
+				
+			cp->c_touch_acctime = FALSE;
+		}
+	}
+	if (cp->c_touch_acctime || cp->c_touch_chgtime || 
+		cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
+		struct timeval tv;
+		int touchvol = 0;
+
+		if (cp->c_touch_modtime && cp->c_touch_chgtime)
+			hfs_clear_might_be_dirty_flag(cp);
+
+		microtime(&tv);
+		    
+		if (cp->c_touch_acctime) {
+			/*
+			 * When the access time is the only thing changing, we
+			 * won't necessarily write it to disk immediately.  We
+			 * only do the atime update at vnode recycle time, when
+			 * fsync is called or when there's another reason to write
+			 * to the metadata.
+			 */
+			cp->c_atime = tv.tv_sec;
+			cp->c_touch_acctime = FALSE;
+		}
+		if (cp->c_touch_modtime) {
+			cp->c_touch_modtime = FALSE;
+			time_t new_time = tv.tv_sec;
+#if CONFIG_HFS_STD
+			/*
+			 * HFS dates that WE set must be adjusted for DST
+			 */
+			if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
+				new_time += 3600;
+			}
+#endif
+			if (cp->c_mtime != new_time) {
+				cp->c_mtime = new_time;
+				cp->c_flag |= C_MINOR_MOD;
+				touchvol = 1;
+			}
+		}
+		if (cp->c_touch_chgtime) {
+			cp->c_touch_chgtime = FALSE;
+			if (cp->c_ctime != tv.tv_sec) {
+				cp->c_ctime = tv.tv_sec;
+				cp->c_flag |= C_MINOR_MOD;
+				touchvol = 1;
+			}
+		}
+
+		if (cp->c_flag & C_NEEDS_DATEADDED) {
+			hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
+			cp->c_flag |= C_MINOR_MOD;
+			/* untwiddle the bit */
+			cp->c_flag &= ~C_NEEDS_DATEADDED;
+			touchvol = 1;
+		}
+
+		/* Touch the volume modtime if needed */
+		if (touchvol) {
+			hfs_note_header_minor_change(hfsmp);
+			HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
+		}
+	}
+}
+
+// Use this if you don't want to check the return code
+void hfs_lock_always(cnode_t *cp, enum hfs_locktype locktype)
+{
+	hfs_lock(cp, locktype, HFS_LOCK_ALWAYS);
+}
+
+/*
+ * Lock a cnode.
+ * N.B. If you add any failure cases, *make* sure hfs_lock_always works
+ */
+int
+hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
+{
+	thread_t thread = current_thread();
+
+	if (cp->c_lockowner == thread) {
+		/*
+		 * Only the extents and bitmap files support lock recursion
+		 * here.  The other system files support lock recursion in
+		 * hfs_systemfile_lock.  Eventually, we should change to
+		 * handle recursion solely in hfs_systemfile_lock.
+		 */
+		if ((cp->c_fileid == kHFSExtentsFileID) ||
+		    (cp->c_fileid == kHFSAllocationFileID)) {
+			cp->c_syslockcount++;
+		} else {
+			panic("hfs_lock: locking against myself!");
+		}
+	} else if (locktype == HFS_SHARED_LOCK) {
+		lck_rw_lock_shared(&cp->c_rwlock);
+		cp->c_lockowner = HFS_SHARED_OWNER;
+	} else { /* HFS_EXCLUSIVE_LOCK */
+		lck_rw_lock_exclusive(&cp->c_rwlock);
+		cp->c_lockowner = thread;
+
+		/* Only the extents and bitmap files support lock recursion. */
+		if ((cp->c_fileid == kHFSExtentsFileID) ||
+		    (cp->c_fileid == kHFSAllocationFileID)) {
+			cp->c_syslockcount = 1;
+		}
+	}
+
+#ifdef HFS_CHECK_LOCK_ORDER
+	/*
+	 * Regular cnodes (non-system files) cannot be locked
+	 * while holding the journal lock or a system file lock.
+	 */
+	if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
+            ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
+		vnode_t vp = NULLVP;
+
+		/* Find corresponding vnode. */
+		if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
+			vp = cp->c_vp;
+		} else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
+			vp = cp->c_rsrc_vp;
+		}
+		if (vp != NULLVP) {
+			struct hfsmount *hfsmp = VTOHFS(vp);
+
+			if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
+				/* This will eventually be a panic here, but we need
+				   to fix where we create the hot files BTree
+				   first. */
+				printf("hfs_lock: bad lock order (cnode after journal)\n");
+			}
+			if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
+				panic("hfs_lock: bad lock order (cnode after catalog)");
+			}
+			if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
+				panic("hfs_lock: bad lock order (cnode after attribute)");
+			}
+			if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
+				panic("hfs_lock: bad lock order (cnode after extents)");
+			}
+		}
+	}
+#endif /* HFS_CHECK_LOCK_ORDER */
+	
+	/*
+	 * Skip cnodes for regular files that no longer exist 
+	 * (marked deleted, catalog entry gone).
+	 */
+	if (((flags & HFS_LOCK_ALLOW_NOEXISTS) == 0) && 
+	    ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
+	    (cp->c_flag & C_NOEXISTS)) {
+		hfs_unlock(cp);
+		return (ENOENT);
+	}
+	return (0);
+}
+
+bool hfs_lock_upgrade(cnode_t *cp)
+{
+	if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock)) {
+		cp->c_lockowner = current_thread();
+		return true;
+	} else
+		return false;
+}
+
+/*
+ * Lock a pair of cnodes.
+ */
+int
+hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfs_locktype locktype)
+{
+	struct cnode *first, *last;
+	int error;
+
+	/*
+	 * If cnodes match then just lock one.
+	 */
+	if (cp1 == cp2) {
+		return hfs_lock(cp1, locktype, HFS_LOCK_DEFAULT);
+	}
+
+	/*
+	 * Lock in cnode address order.
+	 */
+	if (cp1 < cp2) {
+		first = cp1;
+		last = cp2;
+	} else {
+		first = cp2;
+		last = cp1;
+	}
+
+	if ( (error = hfs_lock(first, locktype, HFS_LOCK_DEFAULT))) {
+		return (error);
+	}
+	if ( (error = hfs_lock(last, locktype, HFS_LOCK_DEFAULT))) {
+		hfs_unlock(first);
+		return (error);
+	}
+	return (0);
+}
+
+/*
+ * Check ordering of two cnodes. Return true if they are are in-order.
+ */
+static int
+hfs_isordered(struct cnode *cp1, struct cnode *cp2)
+{
+	if (cp1 == cp2)
+		return (0);
+	if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
+		return (1);
+	if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
+		return (0);
+	/*
+	 * Locking order is cnode address order.
+	 */
+	return (cp1 < cp2);
+}
+
+/*
+ * Acquire 4 cnode locks.
+ *   - locked in cnode address order (lesser address first).
+ *   - all or none of the locks are taken
+ *   - only one lock taken per cnode (dup cnodes are skipped)
+ *   - some of the cnode pointers may be null
+ */
+int
+hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
+             struct cnode *cp4, enum hfs_locktype locktype, struct cnode **error_cnode)
+{
+	struct cnode * a[3];
+	struct cnode * b[3];
+	struct cnode * list[4];
+	struct cnode * tmp;
+	int i, j, k;
+	int error;
+	if (error_cnode) {
+		*error_cnode = NULL;
+	}
+
+	if (hfs_isordered(cp1, cp2)) {
+		a[0] = cp1; a[1] = cp2;
+	} else {
+		a[0] = cp2; a[1] = cp1;
+	}
+	if (hfs_isordered(cp3, cp4)) {
+		b[0] = cp3; b[1] = cp4;
+	} else {
+		b[0] = cp4; b[1] = cp3;
+	}
+	a[2] = (struct cnode *)0xffffffff;  /* sentinel value */
+	b[2] = (struct cnode *)0xffffffff;  /* sentinel value */
+
+	/*
+	 * Build the lock list, skipping over duplicates
+	 */
+	for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
+		tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
+		if (k == 0 || tmp != list[k-1])
+			list[k++] = tmp;
+	}
+
+	/*
+	 * Now we can lock using list[0 - k].
+	 * Skip over NULL entries.
+	 */
+	for (i = 0; i < k; ++i) {
+		if (list[i])
+			if ((error = hfs_lock(list[i], locktype, HFS_LOCK_DEFAULT))) {
+				/* Only stuff error_cnode if requested */
+				if (error_cnode) {
+					*error_cnode = list[i];
+				}
+				/* Drop any locks we acquired. */
+				while (--i >= 0) {
+					if (list[i])
+						hfs_unlock(list[i]);
+				}
+				return (error);
+			}
+	}
+	return (0);
+}
+
+
+/*
+ * Unlock a cnode.
+ */
+void
+hfs_unlock(struct cnode *cp)
+{
+	vnode_t rvp = NULLVP;
+	vnode_t vp = NULLVP;
+	u_int32_t c_flag = 0;
+
+	/*
+	 * Only the extents and bitmap file's support lock recursion.
+	 */
+	if ((cp->c_fileid == kHFSExtentsFileID) ||
+	    (cp->c_fileid == kHFSAllocationFileID)) {
+		if (--cp->c_syslockcount > 0) {
+			return;
+		}
+	}
+
+	const thread_t thread = current_thread();
+
+	if (cp->c_lockowner == thread) {
+		c_flag = cp->c_flag;
+
+		// If we have the truncate lock, we must defer the puts
+		if (cp->c_truncatelockowner == thread) {
+			if (ISSET(c_flag, C_NEED_DVNODE_PUT)
+				&& !cp->c_need_dvnode_put_after_truncate_unlock) {
+				CLR(c_flag, C_NEED_DVNODE_PUT);
+				cp->c_need_dvnode_put_after_truncate_unlock = true;
+			}
+			if (ISSET(c_flag, C_NEED_RVNODE_PUT)
+				&& !cp->c_need_rvnode_put_after_truncate_unlock) {
+				CLR(c_flag, C_NEED_RVNODE_PUT);
+				cp->c_need_rvnode_put_after_truncate_unlock = true;
+			}
+		}
+
+		CLR(cp->c_flag, (C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE
+						 | C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT));
+
+		if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
+	        vp = cp->c_vp;
+		}
+		if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
+	        rvp = cp->c_rsrc_vp;
+		}
+
+	    cp->c_lockowner = NULL;
+	    lck_rw_unlock_exclusive(&cp->c_rwlock);
+	} else {
+	    lck_rw_unlock_shared(&cp->c_rwlock);
+	}
+
+	/* Perform any vnode post processing after cnode lock is dropped. */
+	if (vp) {
+		if (c_flag & C_NEED_DATA_SETSIZE) {
+			ubc_setsize(vp, VTOF(vp)->ff_size);
+#if HFS_COMPRESSION
+			/*
+			 * If this is a compressed file, we need to reset the
+			 * compression state.  We will have set the size to zero
+			 * above and it will get fixed up later (in exactly the
+			 * same way that new vnodes are fixed up).  Note that we
+			 * should only be able to get here if the truncate lock is
+			 * held exclusively and so we do the reset when that's
+			 * unlocked.
+			 */
+			decmpfs_cnode *dp = VTOCMP(vp);
+			if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN)
+				cp->c_need_decmpfs_reset = true;
+#endif
+		}
+		if (c_flag & C_NEED_DVNODE_PUT)
+			vnode_put(vp);
+	}
+	if (rvp) {
+		if (c_flag & C_NEED_RSRC_SETSIZE)
+			ubc_setsize(rvp, VTOF(rvp)->ff_size);
+		if (c_flag & C_NEED_RVNODE_PUT)
+	        	vnode_put(rvp);
+	}
+}
+
+/*
+ * Unlock a pair of cnodes.
+ */
+void
+hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
+{
+	hfs_unlock(cp1);
+	if (cp2 != cp1)
+		hfs_unlock(cp2);
+}
+
+/*
+ * Unlock a group of cnodes.
+ */
+void
+hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
+{
+	struct cnode * list[4];
+	int i, k = 0;
+
+	if (cp1) {
+		hfs_unlock(cp1);
+		list[k++] = cp1;
+	}
+	if (cp2) {
+		for (i = 0; i < k; ++i) {
+			if (list[i] == cp2)
+				goto skip1;
+		}
+		hfs_unlock(cp2);
+		list[k++] = cp2;
+	}
+skip1:
+	if (cp3) {
+		for (i = 0; i < k; ++i) {
+			if (list[i] == cp3)
+				goto skip2;
+		}
+		hfs_unlock(cp3);
+		list[k++] = cp3;
+	}
+skip2:
+	if (cp4) {
+		for (i = 0; i < k; ++i) {
+			if (list[i] == cp4)
+				return;
+		}
+		hfs_unlock(cp4);
+	}
+}
+
+
+/*
+ * Protect a cnode against a truncation.
+ *
+ * Used mainly by read/write since they don't hold the
+ * cnode lock across calls to the cluster layer.
+ *
+ * The process doing a truncation must take the lock
+ * exclusive. The read/write processes can take it
+ * shared.  The locktype argument is the same as supplied to
+ * hfs_lock.
+ */
+void
+hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
+{
+	thread_t thread = current_thread();
+
+	if (cp->c_truncatelockowner == thread) {
+		/* 
+		 * Ignore grabbing the lock if it the current thread already 
+		 * holds exclusive lock.
+		 * 
+		 * This is needed on the hfs_vnop_pagein path where we need to ensure
+		 * the file does not change sizes while we are paging in.  However,
+		 * we may already hold the lock exclusive due to another 
+		 * VNOP from earlier in the call stack.  So if we already hold 
+		 * the truncate lock exclusive, allow it to proceed, but ONLY if 
+		 * it's in the recursive case.
+		 */
+		if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
+			panic("hfs_lock_truncate: cnode %p locked!", cp);
+		}
+	} else if (locktype == HFS_SHARED_LOCK) {
+		lck_rw_lock_shared(&cp->c_truncatelock);
+		cp->c_truncatelockowner = HFS_SHARED_OWNER;
+	} else { /* HFS_EXCLUSIVE_LOCK */
+		lck_rw_lock_exclusive(&cp->c_truncatelock);
+		cp->c_truncatelockowner = thread;
+	}
+}
+
+bool hfs_truncate_lock_upgrade(struct cnode *cp)
+{
+	hfs_assert(cp->c_truncatelockowner == HFS_SHARED_OWNER);
+	if (!lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock))
+		return false;
+	cp->c_truncatelockowner = current_thread();
+	return true;
+}
+
+void hfs_truncate_lock_downgrade(struct cnode *cp)
+{
+	hfs_assert(cp->c_truncatelockowner == current_thread());
+	lck_rw_lock_exclusive_to_shared(&cp->c_truncatelock);
+	cp->c_truncatelockowner = HFS_SHARED_OWNER;
+}
+
+/*
+ * Attempt to get the truncate lock.  If it cannot be acquired, error out.
+ * This function is needed in the degenerate hfs_vnop_pagein during force unmount
+ * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
+ * temporarily need to disable V2 semantics.  
+ */
+int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags)
+{
+	thread_t thread = current_thread();
+	boolean_t didlock = false;
+
+	if (cp->c_truncatelockowner == thread) {
+		/* 
+		 * Ignore grabbing the lock if the current thread already 
+		 * holds exclusive lock.
+		 * 
+		 * This is needed on the hfs_vnop_pagein path where we need to ensure
+		 * the file does not change sizes while we are paging in.  However,
+		 * we may already hold the lock exclusive due to another 
+		 * VNOP from earlier in the call stack.  So if we already hold 
+		 * the truncate lock exclusive, allow it to proceed, but ONLY if 
+		 * it's in the recursive case.
+		 */
+		if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) {
+			panic("hfs_lock_truncate: cnode %p locked!", cp);
+		}
+	} else if (locktype == HFS_SHARED_LOCK) {
+		didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
+		if (didlock) {
+			cp->c_truncatelockowner = HFS_SHARED_OWNER;
+		}
+	} else { /* HFS_EXCLUSIVE_LOCK */
+		didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
+		if (didlock) {
+			cp->c_truncatelockowner = thread;
+		}
+	}
+	
+	return didlock;
+}
+
+
+/*
+ * Unlock the truncate lock, which protects against size changes.
+ * 
+ * If HFS_LOCK_SKIP_IF_EXCLUSIVE flag was set, it means that a previous 
+ * hfs_lock_truncate() might have skipped grabbing a lock because 
+ * the current thread was already holding the lock exclusive and 
+ * we may need to return from this function without actually unlocking 
+ * the truncate lock.
+ */
+void
+hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags)
+{
+	thread_t thread = current_thread();	
+
+	/*
+	 * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current 
+	 * lock owner of the truncate lock is our current thread, then 
+	 * we must have skipped taking the lock earlier by in 
+	 * hfs_lock_truncate() by setting HFS_LOCK_SKIP_IF_EXCLUSIVE in the 
+	 * flags (as the current thread was current lock owner).
+	 *
+	 * If HFS_LOCK_SKIP_IF_EXCLUSIVE is not set (most of the time) then 
+	 * we check the lockowner field to infer whether the lock was taken 
+	 * exclusively or shared in order to know what underlying lock 
+	 * routine to call. 
+	 */
+	if (flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) {
+		if (cp->c_truncatelockowner == thread) {
+			return;	
+		}
+	}
+
+	/* HFS_LOCK_EXCLUSIVE */
+	if (thread == cp->c_truncatelockowner) {
+		vnode_t vp = NULL, rvp = NULL;
+
+		/*
+		 * If there are pending set sizes, the cnode lock should be dropped
+		 * first.
+		 */
+		hfs_assert(!(cp->c_lockowner == thread
+				 && ISSET(cp->c_flag, C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE)));
+
+		if (cp->c_need_dvnode_put_after_truncate_unlock) {
+			vp = cp->c_vp;
+			cp->c_need_dvnode_put_after_truncate_unlock = false;
+		}
+		if (cp->c_need_rvnode_put_after_truncate_unlock) {
+			rvp = cp->c_rsrc_vp;
+			cp->c_need_rvnode_put_after_truncate_unlock = false;
+		}
+
+#if HFS_COMPRESSION
+		bool reset_decmpfs = cp->c_need_decmpfs_reset;
+		cp->c_need_decmpfs_reset = false;
+#endif
+
+		cp->c_truncatelockowner = NULL;
+		lck_rw_unlock_exclusive(&cp->c_truncatelock);
+
+#if HFS_COMPRESSION
+		if (reset_decmpfs) {
+			decmpfs_cnode *dp = cp->c_decmp;
+			if (dp && decmpfs_cnode_get_vnode_state(dp) != FILE_TYPE_UNKNOWN)
+				decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
+		}
+#endif
+
+		// Do the puts now
+		if (vp)
+			vnode_put(vp);
+		if (rvp)
+			vnode_put(rvp);
+	} else { /* HFS_LOCK_SHARED */
+		lck_rw_unlock_shared(&cp->c_truncatelock);
+	}
+}
diff --git a/core/hfs_cnode.h b/core/hfs_cnode.h
new file mode 100644
index 0000000..088c445
--- /dev/null
+++ b/core/hfs_cnode.h
@@ -0,0 +1,630 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _HFS_CNODE_H_
+#define _HFS_CNODE_H_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/quota.h>
+#if HFS_COMPRESSION
+#include <sys/decmpfs.h>
+#endif
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+#include <kern/locks.h>
+
+#include "hfs_catalog.h"
+#include "rangelist.h"
+#include "hfs_dbg.h"
+
+/*
+ * The filefork is used to represent an HFS file fork (data or resource).
+ * Reading or writing any of these fields requires holding cnode lock.
+ */
+struct filefork {
+	struct cnode   *ff_cp;               /* cnode associated with this fork */
+	struct rl_head  ff_invalidranges;    /* Areas of disk that should read back as zeroes */
+	union {
+	   void        *ffu_sysfileinfo;     /* additional info for system files */
+	   char        *ffu_symlinkptr;      /* symbolic link pathname */
+	} ff_union;
+	struct cat_fork ff_data;             /* fork data (size, extents) */
+};
+typedef struct filefork filefork_t;
+
+
+#define HFS_TEMPLOOKUP_NAMELEN 32
+
+/*
+ * Catalog Lookup struct (runtime)
+ *
+ * This is used so that when we need to malloc a container for a catalog
+ * lookup operation, we can acquire memory for everything in one fell swoop
+ * as opposed to putting many of these objects on the stack.  The cat_fork
+ * data structure can take up 100+bytes easily, and that can add to stack
+ * overhead.  
+ *
+ * As a result, we use this to easily pass around the memory needed for a
+ * lookup operation.
+ */
+struct cat_lookup_buffer {
+	struct cat_desc lookup_desc;
+	struct cat_attr lookup_attr;
+	struct filefork lookup_fork;
+	struct componentname lookup_cn;
+	char lookup_name[HFS_TEMPLOOKUP_NAMELEN]; /* for open-unlinked paths only */
+};
+
+
+/* Aliases for common fields */
+#define ff_size          ff_data.cf_size
+#define ff_new_size      ff_data.cf_new_size
+#define ff_clumpsize     ff_data.cf_clump
+#define ff_bytesread     ff_data.cf_bytesread
+#define ff_extents       ff_data.cf_extents
+
+/*
+ * Note that the blocks fields are protected by the cnode lock, *not*
+ * the truncate lock.
+ */
+#define ff_blocks        ff_data.cf_blocks
+#define ff_unallocblocks ff_data.cf_vblocks
+static inline uint32_t ff_allocblocks(filefork_t *ff)
+{
+	hfs_assert(ff->ff_blocks >= ff->ff_unallocblocks);
+	return ff->ff_blocks - ff->ff_unallocblocks;
+}
+
+#define ff_symlinkptr    ff_union.ffu_symlinkptr
+#define ff_sysfileinfo   ff_union.ffu_sysfileinfo
+
+
+/* The btree code still needs these... */
+#define fcbEOF           ff_size
+#define fcbExtents       ff_extents
+#define	fcbBTCBPtr       ff_sysfileinfo
+
+typedef u_int8_t atomicflag_t;
+
+
+/*
+ * Hardlink Origin (for hardlinked directories).
+ */
+struct linkorigin {
+	TAILQ_ENTRY(linkorigin)  lo_link;  /* chain */
+	void *  lo_thread;      /* thread that performed the lookup */
+	cnid_t  lo_cnid;        /* hardlink's cnid */
+	cnid_t  lo_parentcnid;  /* hardlink's parent cnid */
+};
+typedef struct linkorigin linkorigin_t;
+
+#define MAX_CACHED_ORIGINS  10
+#define MAX_CACHED_FILE_ORIGINS 8
+
+/*
+ * The cnode is used to represent each active (or recently active)
+ * file or directory in the HFS filesystem.
+ *
+ * Reading or writing any of these fields requires holding c_lock.
+ */
+struct cnode {
+	lck_rw_t                c_rwlock;       /* cnode's lock */
+	thread_t                c_lockowner;    /* cnode's lock owner (exclusive case only) */
+	lck_rw_t                c_truncatelock; /* protects file from truncation during read/write */
+	thread_t                c_truncatelockowner;    /* truncate lock owner (exclusive case only) */
+	LIST_ENTRY(cnode)	c_hash;		/* cnode's hash chain */
+	u_int32_t		c_flag;		/* cnode's runtime flags */
+	u_int32_t		c_hflag;	/* cnode's flags for maintaining hash - protected by global hash lock */
+	struct vnode		*c_vp;		/* vnode for data fork or dir */
+	struct vnode		*c_rsrc_vp;	/* vnode for resource fork */
+    struct dquot		*c_dquot[MAXQUOTAS]; /* cnode's quota info */
+	u_int32_t		c_childhint;	 /* catalog hint for children (small dirs only) */
+	u_int32_t		c_dirthreadhint; /* catalog hint for directory's thread rec */
+	struct cat_desc		c_desc;		/* cnode's descriptor */
+	struct cat_attr		c_attr;		/* cnode's attributes */
+	TAILQ_HEAD(hfs_originhead, linkorigin) c_originlist;  /* hardlink origin cache */
+	TAILQ_HEAD(hfs_hinthead, directoryhint) c_hintlist;  /* readdir directory hint list */
+  	int16_t			c_dirhinttag;	/* directory hint tag */
+	union {
+	    int16_t     cu_dirhintcnt;          /* directory hint count */
+	    int16_t     cu_syslockcount;        /* system file use only */
+	} c_union;
+	u_int32_t		c_dirchangecnt; /* changes each insert/delete (in-core only) */
+ 	struct filefork		*c_datafork;	/* cnode's data fork */
+	struct filefork		*c_rsrcfork;	/* cnode's rsrc fork */
+	atomicflag_t	c_touch_acctime;
+	atomicflag_t	c_touch_chgtime;
+	atomicflag_t	c_touch_modtime;
+
+	// The following flags are protected by the truncate lock
+	union {
+		struct {
+			bool	c_need_dvnode_put_after_truncate_unlock : 1;
+			bool	c_need_rvnode_put_after_truncate_unlock : 1;
+#if HFS_COMPRESSION
+			bool	c_need_decmpfs_reset 					: 1;
+#endif
+		};
+		uint8_t c_tflags;
+	};
+
+	/*
+	 * Where we're using a journal, we keep track of the last
+	 * transaction that we did an update in.  If a minor modification
+	 * is made, we'll still push it if we're still on the same
+	 * transaction.
+	 */
+	uint32_t c_update_txn;
+
+#if HFS_COMPRESSION
+	struct decmpfs_cnode  *c_decmp;
+#endif /* HFS_COMPRESSION */
+#if CONFIG_PROTECT
+	struct cprotect *c_cpentry;	/* content protection data */
+#endif
+
+#if HFS_MALLOC_DEBUG
+	// N.B. â *must* always be last
+	uint64_t		magic;
+#endif
+};
+typedef struct cnode cnode_t;
+
+/* Aliases for common cnode fields */
+#define c_cnid		c_desc.cd_cnid
+#define c_hint		c_desc.cd_hint
+#define c_parentcnid	c_desc.cd_parentcnid
+#define c_encoding	c_desc.cd_encoding
+
+#define c_fileid	c_attr.ca_fileid
+#define c_mode		c_attr.ca_mode
+#define c_linkcount	c_attr.ca_linkcount
+#define c_uid		c_attr.ca_uid
+#define c_gid		c_attr.ca_gid
+#define c_rdev		c_attr.ca_union1.cau_rdev
+#define c_atime		c_attr.ca_atime
+#define c_mtime		c_attr.ca_mtime
+#define c_ctime		c_attr.ca_ctime
+#define c_itime		c_attr.ca_itime
+#define c_btime		c_attr.ca_btime
+#define c_bsdflags		c_attr.ca_flags
+#define c_finderinfo	c_attr.ca_finderinfo
+#define c_blocks	c_attr.ca_union2.cau_blocks
+#define c_entries	c_attr.ca_union2.cau_entries
+#define c_zftimeout	c_childhint
+
+#define c_dirhintcnt    c_union.cu_dirhintcnt
+#define c_syslockcount  c_union.cu_syslockcount
+
+
+/* hash maintenance flags kept in c_hflag and protected by hfs_chash_mutex */
+#define H_ALLOC		0x00001	/* CNode is being allocated */
+#define H_ATTACH	0x00002	/* CNode is being attached to by another vnode */
+#define	H_TRANSIT	0x00004	/* CNode is getting recycled  */
+#define H_WAITING	0x00008	/* CNode is being waited for */
+
+
+/* 
+ * Runtime cnode flags (kept in c_flag) 
+ */
+#define C_NEED_RVNODE_PUT   0x0000001  /* Need to do a vnode_put on c_rsrc_vp after the unlock */
+#define C_NEED_DVNODE_PUT   0x0000002  /* Need to do a vnode_put on c_vp after the unlock */
+#define C_ZFWANTSYNC	    0x0000004  /* fsync requested and file has holes */
+#define C_FROMSYNC          0x0000008  /* fsync was called from sync */ 
+
+#define C_MODIFIED          0x0000010  /* CNode has been modified */
+#define C_NOEXISTS          0x0000020  /* CNode has been deleted, catalog entry is gone */
+#define C_DELETED           0x0000040  /* CNode has been marked to be deleted */
+#define C_HARDLINK          0x0000080  /* CNode is a hard link (file or dir) */
+
+/*
+ * A minor modification is one where the volume would not be inconsistent if
+ * the change was not pushed to disk.  For example, changes to times.
+ */
+#define C_MINOR_MOD			0x0000100  /* CNode has a minor modification */
+
+#define C_HASXATTRS         0x0000200  /* cnode has extended attributes */
+#define C_NEG_ENTRIES       0x0000400  /* directory has negative name entries */
+/* 
+ * For C_SSD_STATIC: SSDs may want to deal with the file payload data in a 
+ * different manner knowing that the content is not likely to be modified. This is
+ * purely advisory at the HFS level, and is not maintained after the cnode goes out of core.
+ */
+#define C_SSD_STATIC        0x0000800  /* Assume future writes contain static content */
+
+#define C_NEED_DATA_SETSIZE 0x0001000  /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */
+#define C_NEED_RSRC_SETSIZE 0x0002000  /* Do a ubc_setsize(0) on c_vp after the unlock */
+#define C_DIR_MODIFICATION  0x0004000  /* Directory is being modified, wait for lookups */
+#define C_ALWAYS_ZEROFILL   0x0008000  /* Always zero-fill the file on an fsync */
+
+#define C_RENAMED           0x0010000  /* cnode was deleted as part of rename; C_DELETED should also be set */
+#define C_NEEDS_DATEADDED   0x0020000  /* cnode needs date-added written to the finderinfo bit */
+#define C_BACKINGSTORE      0x0040000  /* cnode is a backing store for an existing or currently-mounting filesystem */
+
+/*
+ * This flag indicates the cnode might be dirty because it
+ * was mapped writable so if we get any page-outs, update
+ * the modification and change times.
+ */
+#define C_MIGHT_BE_DIRTY_FROM_MAPPING   0x0080000
+
+/* 
+ * For C_SSD_GREEDY_MODE: SSDs may want to write the file payload data using the greedy mode knowing
+ * that the content needs to be written out to the disk quicker than normal at the expense of storage efficiency.
+ * This is purely advisory at the HFS level, and is not maintained after the cnode goes out of core.
+ */
+#define C_SSD_GREEDY_MODE   0x0100000  /* Assume future writes are recommended to be written in SLC mode */
+
+/* 0x0200000  is currently unused */ 
+
+#define C_IO_ISOCHRONOUS    0x0400000  /* device-specific isochronous throughput I/O */
+
+#define ZFTIMELIMIT	(5 * 60)
+
+/*
+ * The following is the "invisible" bit from the fdFlags field
+ * in the FndrFileInfo.
+ */
+enum { kFinderInvisibleMask = 1 << 14 };
+
+
+/*
+ * Convert between cnode pointers and vnode pointers
+ */
+#define VTOC(vp)	((struct cnode *)vnode_fsnode((vp)))
+
+#define CTOV(cp,rsrc)	(((rsrc) && S_ISREG((cp)->c_mode)) ? \
+			(cp)->c_rsrc_vp : (cp)->c_vp)
+
+/*
+ * Convert between vnode pointers and file forks
+ *
+ * Note: no CTOF since that is ambiguous
+ */
+
+#define FTOC(fp)	((fp)->ff_cp)
+
+#define VTOF(vp)	((vp) == VTOC((vp))->c_rsrc_vp ?	\
+			 VTOC((vp))->c_rsrcfork :		\
+			 VTOC((vp))->c_datafork)
+
+#define VCTOF(vp, cp)	((vp) == (cp)->c_rsrc_vp ?	\
+			 (cp)->c_rsrcfork :		\
+			 (cp)->c_datafork)
+
+#define FTOV(fp)	((fp) == FTOC(fp)->c_rsrcfork ?		\
+			 FTOC(fp)->c_rsrc_vp :			\
+			 FTOC(fp)->c_vp)
+
+/*
+ * This is a helper function used for determining whether or not a cnode has become open
+ * unlinked in between the time we acquired its vnode and the time we acquire the cnode lock
+ * to start manipulating it.  Due to the SMP nature of VFS, it is probably necessary to 
+ * use this macro every time we acquire a cnode lock, as the content of the Cnode may have
+ * been modified in betweeen the lookup and a VNOP.  Whether or not to call this is dependent
+ * upon the VNOP in question.  Sometimes it is OK to use an open-unlinked file, for example, in,
+ * reading.  But other times, such as on the source of a VNOP_RENAME, it should be disallowed.
+ */
+int hfs_checkdeleted(struct cnode *cp);
+
+/*
+ * Test for a resource fork
+ */
+#define FORK_IS_RSRC(fp)	((fp) == FTOC(fp)->c_rsrcfork)
+
+#define VNODE_IS_RSRC(vp)	((vp) == VTOC((vp))->c_rsrc_vp)
+
+#if HFS_COMPRESSION
+/*
+ * VTOCMP(vp) returns a pointer to vp's decmpfs_cnode; this could be NULL
+ * if the file is not compressed or if hfs_file_is_compressed() hasn't
+ * yet been called on this file.
+ */
+#define VTOCMP(vp) (VTOC((vp))->c_decmp)
+int hfs_file_is_compressed(struct cnode *cp, int skiplock);
+int hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock);
+int hfs_hides_rsrc(vfs_context_t ctx, struct cnode *cp, int skiplock);
+int hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skiplock);
+#endif
+
+#define ATIME_ONDISK_ACCURACY	300
+
+static inline bool hfs_should_save_atime(cnode_t *cp)
+{
+	/*
+	 * We only write atime updates to disk if the delta is greater
+	 * than ATIME_ONDISK_ACCURACY.
+	 */
+	return (cp->c_atime < cp->c_attr.ca_atimeondisk
+			|| cp->c_atime - cp->c_attr.ca_atimeondisk > ATIME_ONDISK_ACCURACY);
+}
+
+typedef enum {
+	HFS_NOT_DIRTY   = 0,
+	HFS_DIRTY       = 1,
+	HFS_DIRTY_ATIME = 2
+} hfs_dirty_t;
+
+static inline hfs_dirty_t hfs_is_dirty(cnode_t *cp)
+{
+	if (ISSET(cp->c_flag, C_NOEXISTS))
+		return HFS_NOT_DIRTY;
+
+	if (ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED)
+		|| cp->c_touch_chgtime || cp->c_touch_modtime) {
+		return HFS_DIRTY;
+	}
+
+	if (cp->c_touch_acctime || hfs_should_save_atime(cp))
+		return HFS_DIRTY_ATIME;
+
+	return HFS_NOT_DIRTY;
+}
+
+/* This overlays the FileID portion of NFS file handles. */
+struct hfsfid {
+	u_int32_t hfsfid_cnid;	/* Catalog node ID. */
+	u_int32_t hfsfid_gen;	/* Generation number (create date). */
+};
+
+
+/* Get new default vnode */
+extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
+                           struct cat_desc *descp, int flags, struct cat_attr *attrp,
+                           struct cat_fork *forkp, struct vnode **vpp, int *out_flags);
+
+/* Input flags for hfs_getnewvnode */
+
+#define GNV_WANTRSRC   0x01  /* Request the resource fork vnode. */
+#define GNV_SKIPLOCK   0x02  /* Skip taking the cnode lock (when getting resource fork). */
+#define GNV_CREATE     0x04  /* The vnode is for a newly created item. */
+#define GNV_NOCACHE	   0x08  /* Delay entering this item in the name cache */
+#define GNV_USE_VP     0x10  /* Use the vnode provided in *vpp instead of creating a new one */  
+
+/* Output flags for hfs_getnewvnode */
+#define GNV_CHASH_RENAMED	0x01	/* The cnode was renamed in-flight */
+#define GNV_CAT_DELETED		0x02	/* The cnode was deleted from the catalog */
+#define GNV_NEW_CNODE		0x04	/* We are vending out a newly initialized cnode */
+#define GNV_CAT_ATTRCHANGED	0x08	/* Something in struct cat_attr changed in between cat_lookups */
+
+
+/* Touch cnode times based on c_touch_xxx flags */
+extern void hfs_touchtimes(struct hfsmount *, struct cnode *);
+extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded);
+extern u_int32_t hfs_get_dateadded (struct cnode *cp); 
+extern u_int32_t hfs_get_dateadded_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */);
+
+/* Gen counter methods */
+extern void hfs_write_gencount(struct cat_attr *cattrp, uint32_t gencount);
+extern uint32_t hfs_get_gencount(struct cnode *cp);
+extern uint32_t hfs_incr_gencount (struct cnode *cp);
+extern uint32_t hfs_get_gencount_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */);
+
+/* Document id methods */
+extern uint32_t hfs_get_document_id(struct cnode * /* cp */);
+extern uint32_t hfs_get_document_id_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */);
+
+/* Zero-fill file and push regions out to disk */
+enum {
+	// Use this flag if you're going to sync later
+	HFS_FILE_DONE_NO_SYNC 	= 1,
+};
+typedef uint32_t hfs_file_done_opts_t;
+extern int  hfs_filedone(struct vnode *vp, vfs_context_t context, 
+						 hfs_file_done_opts_t opts);
+
+/*
+ * HFS cnode hash functions.
+ */
+extern void  hfs_chashinit(void);
+extern void  hfs_chashinit_finish(struct hfsmount *hfsmp);
+extern void  hfs_delete_chash(struct hfsmount *hfsmp);
+extern int   hfs_chashremove(struct hfsmount *hfsmp, struct cnode *cp);
+extern void  hfs_chash_abort(struct hfsmount *hfsmp, struct cnode *cp);
+extern void  hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct cnode *cp2);
+extern void  hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int flags);
+extern void  hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp);
+
+extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, 
+										int skiplock, int allow_deleted);
+extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, 
+										 int wantrsrc, int skiplock, int *out_flags, int *hflags);
+extern int hfs_chash_snoop(struct hfsmount *, ino_t, int, int (*)(const cnode_t *, void *), void *);
+extern int hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, 
+							cnid_t cnid, struct cat_attr *cattr, int *error);
+
+extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
+
+/*
+ * HFS cnode lock functions.
+ *
+ *  HFS Locking Order:
+ *
+ *  1. cnode truncate lock (if needed) -- see below for more on this
+ *
+ *     + hfs_vnop_pagein/out handles recursive use of this lock (by
+ *       using flag option HFS_LOCK_SKIP_IF_EXCLUSIVE) although there
+ *       are issues with this (see #16620278).
+ *
+ *	   + If locking multiple cnodes then the truncate lock must be taken on
+ *       all (in address order), before taking the cnode locks.
+ *
+ *  2. Hot Files stage mutex (grabbed before manipulating individual vnodes/cnodes)
+ *
+ *  3. cnode locks in address order (if needed)
+ *
+ *  4. journal (if needed)
+ *
+ *  5. Hot Files B-Tree lock (not treated as a system file)
+ *
+ *  6. system files (as needed)
+ *
+ *       A. Catalog B-tree file
+ *       B. Attributes B-tree file
+ *       C. Startup file (if there is one)
+ *       D. Allocation Bitmap file (always exclusive, supports recursion)
+ *       E. Overflow Extents B-tree file (always exclusive, supports recursion)
+ *
+ *  7. hfs mount point (always last)
+ *
+ *
+ * I. HFS cnode hash lock (must not acquire any new locks while holding this lock, always taken last)
+ */
+
+/*
+ * -- The Truncate Lock --
+ *
+ * The truncate lock is used for a few purposes (more than its name
+ * might suggest).  The first thing to note is that the cnode lock
+ * cannot be held whilst issuing any I/O other than metadata changes,
+ * so the truncate lock, in either shared or exclusive form, must
+ * usually be held in these cases.  This includes calls to ubc_setsize
+ * where the new size is less than the current size known to the VM
+ * subsystem (for two reasons: a) because reaping pages can block
+ * (e.g. on pages that are busy or being cleaned); b) reaping pages
+ * might require page-in for tasks that have that region mapped
+ * privately).  The same applies to other calls into the VM subsystem.
+ *
+ * Here are some (but not necessarily all) cases that the truncate
+ * lock protects for:
+ *
+ *  + When reading and writing a file, we hold the truncate lock
+ *    shared to ensure that the underlying blocks cannot be deleted
+ *    and on systems that use content protection, this also ensures
+ *    the keys remain valid (which might be being used by the
+ *    underlying layers).
+ *
+ *  + We need to protect against the following sequence of events:
+ *
+ *      A file is initially size X.  A thread issues an append to that
+ *      file.  Another thread truncates the file and then extends it
+ *      to a a new size Y.  Now the append can be applied at offset X
+ *      and then the data is lost when the file is truncated; or it
+ *      could be applied after the truncate, i.e. at offset 0; or it
+ *      can be applied at offset Y.  What we *cannot* do is apply the
+ *      append at offset X and for the data to be visible at the end.
+ *      (Note that we are free to choose when we apply the append
+ *      operation.)
+ *
+ *    To solve this, we keep things simple and take the truncate lock
+ *    exclusively in order to sequence the append with other size
+ *    changes.  Therefore any size change must take the truncate lock
+ *    exclusively.
+ *
+ *    (N.B. we could do better and allow readers to run concurrently
+ *    during the append and other size changes.)
+ *
+ * So here are the rules:
+ *
+ *  + If you plan to change ff_size, you must take the truncate lock
+ *    exclusively, *but* be careful what I/O you do whilst you have
+ *    the truncate lock exclusively and try and avoid it if you can:
+ *    if the VM subsystem tries to do something with some pages on a
+ *    different thread and you try and do some I/O with those same
+ *    pages, we will deadlock.  (See #16620278.)
+ *
+ *  + If you do anything that requires blocks to not be deleted or
+ *    encryption keys to remain valid, you must take the truncate lock
+ *    shared.
+ *
+ *  + And it follows therefore, that if you want to delete blocks or
+ *    delete keys, you must take the truncate lock exclusively.  Note 
+ *    that for asynchronous writes, the truncate lock will be dropped 
+ *    after issuing I/O but before the I/O has completed which means
+ *    that before manipulating keys, you *must* issue
+ *    vnode_wait_for_writes in addition to holding the truncate lock.
+ *
+ * N.B. ff_size is actually protected by the cnode lock and so you
+ * must hold the cnode lock exclusively to change it and shared to
+ * read it.
+ *
+ */
+
+enum hfs_locktype {
+	HFS_SHARED_LOCK = 1, 
+	HFS_EXCLUSIVE_LOCK = 2
+};
+
+/* Option flags for cnode and truncate lock functions */
+enum hfs_lockflags {
+	HFS_LOCK_DEFAULT           = 0x0,    /* Default flag, no options provided */
+	HFS_LOCK_ALLOW_NOEXISTS    = 0x1,    /* Allow locking of all cnodes, including cnode marked deleted with no catalog entry */
+	HFS_LOCK_SKIP_IF_EXCLUSIVE = 0x2,    /* Skip locking if the current thread already holds the lock exclusive */
+
+	// Used when you do not want to check return from hfs_lock
+	HFS_LOCK_ALWAYS			   = HFS_LOCK_ALLOW_NOEXISTS, 
+};
+#define HFS_SHARED_OWNER  (void *)0xffffffff
+
+void hfs_lock_always(cnode_t *cnode, enum hfs_locktype);
+int hfs_lock(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
+bool hfs_lock_upgrade(cnode_t *cp);
+int hfs_lockpair(struct cnode *, struct cnode *, enum hfs_locktype);
+int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *,
+                        enum hfs_locktype, struct cnode **);
+void hfs_unlock(struct cnode *);
+void hfs_unlockpair(struct cnode *, struct cnode *);
+void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *);
+
+void hfs_lock_truncate(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
+bool hfs_truncate_lock_upgrade(struct cnode *cp);
+void hfs_truncate_lock_downgrade(struct cnode *cp);
+void hfs_unlock_truncate(struct cnode *, enum hfs_lockflags);
+int hfs_try_trunclock(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
+
+extern int  hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype);
+extern void hfs_systemfile_unlock(struct hfsmount *, int);
+
+void hfs_clear_might_be_dirty_flag(cnode_t *cp);
+
+int hfs_set_bsd_flags(struct hfsmount *, struct cnode *,
+				  u_int32_t, u_int32_t, vfs_context_t, int *);
+bool hfs_is_journal_file(struct hfsmount *, struct cnode *);
+
+// cnode must be locked
+static inline __attribute__((pure))
+bool hfs_has_rsrc(const cnode_t *cp)
+{
+	if (cp->c_rsrcfork)
+		return cp->c_rsrcfork->ff_blocks > 0;
+	else
+		return cp->c_datafork && cp->c_blocks > cp->c_datafork->ff_blocks;
+}
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+
+#endif /* ! _HFS_CNODE_H_ */
diff --git a/core/hfs_cprotect.c b/core/hfs_cprotect.c
new file mode 100644
index 0000000..3fa485b
--- /dev/null
+++ b/core/hfs_cprotect.c
@@ -0,0 +1,2773 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#if CONFIG_PROTECT
+
+#include <sys/mount.h>
+#include <sys/random.h>
+#include <sys/xattr.h>
+#include <sys/vnode_if.h>
+#include <sys/fcntl.h>
+#include <libkern/OSByteOrder.h>
+#include <libkern/crypto/sha1.h>
+#include <sys/proc.h>
+#include <sys/kauth.h>
+#include <sys/sysctl.h>
+#include <sys/ubc.h>
+#include <uuid/uuid.h>
+
+#include "hfs.h"
+#include "hfs_cnode.h"
+#include "hfs_fsctl.h"
+#include "hfs_cprotect.h"
+#include "hfs_iokit.h"
+
+#if HFS_CONFIG_KEY_ROLL
+#include "hfs_key_roll.h"
+#endif
+
+#define PTR_ADD(type, base, offset)		(type)((uintptr_t)(base) + (offset))
+
+extern int (**hfs_vnodeop_p) (void *);
+
+/*
+ * CP private functions
+ */
+static int cp_root_major_vers(mount_t mp);
+static int cp_getxattr(cnode_t *, struct hfsmount *hfsmp, struct cprotect **);
+static void cp_entry_dealloc(hfsmount_t *hfsmp, struct cprotect *entry);
+static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp, struct cnode *);
+static int cp_lock_vnode_callback(vnode_t, void *);
+static int cp_vnode_is_eligible (vnode_t);
+static int cp_check_access (cnode_t *cp, struct hfsmount *hfsmp, int vnop);
+static int cp_unwrap(struct hfsmount *, struct cprotect *, struct cnode *);
+static void cp_init_access(aks_cred_t access, struct cnode *cp);
+
+// -- cp_key_pair accessors --
+
+void cpkp_init(cp_key_pair_t *cpkp, uint16_t max_pers_key_len,
+			   uint16_t max_cached_key_len)
+{
+	cpkp->cpkp_max_pers_key_len = max_pers_key_len;
+	cpkp->cpkp_pers_key_len = 0;
+	cpx_init(cpkp_cpx(cpkp), max_cached_key_len);
+
+	// Default to using offsets
+	cpx_set_use_offset_for_iv(cpkp_cpx(cpkp), true);
+}
+
+uint16_t cpkp_max_pers_key_len(const cp_key_pair_t *cpkp)
+{
+	return cpkp->cpkp_max_pers_key_len;
+}
+
+uint16_t cpkp_pers_key_len(const cp_key_pair_t *cpkp)
+{
+	return cpkp->cpkp_pers_key_len;
+}
+
+static bool cpkp_has_pers_key(const cp_key_pair_t *cpkp)
+{
+	return cpkp->cpkp_pers_key_len > 0;
+}
+
+static void *cpkp_pers_key(const cp_key_pair_t *cpkp)
+{
+	return PTR_ADD(void *, &cpkp->cpkp_cpx, cpx_sizex(cpkp_cpx(cpkp)));
+}
+
+static void cpkp_set_pers_key_len(cp_key_pair_t *cpkp, uint16_t key_len)
+{
+	if (key_len > cpkp->cpkp_max_pers_key_len)
+		panic("hfs_cprotect: key too big!");
+	cpkp->cpkp_pers_key_len = key_len;
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+cpx_t cpkp_cpx(const cp_key_pair_t *cpkp)
+{
+	// Cast to remove const qualifier
+	return (cpx_t)&cpkp->cpkp_cpx;
+}
+#pragma clang diagnostic pop
+
+size_t cpkp_size(uint16_t pers_key_len, uint16_t cached_key_len)
+{
+	return sizeof(cp_key_pair_t) + pers_key_len + cpx_size(cached_key_len);
+}
+
+size_t cpkp_sizex(const cp_key_pair_t *cpkp)
+{
+	return cpkp_size(cpkp->cpkp_max_pers_key_len, cpx_max_key_len(cpkp_cpx(cpkp)));
+}
+
+void cpkp_flush(cp_key_pair_t *cpkp)
+{
+	cpx_flush(cpkp_cpx(cpkp));
+	cpkp->cpkp_pers_key_len = 0;
+	bzero(cpkp_pers_key(cpkp), cpkp->cpkp_max_pers_key_len);
+}
+
+bool cpkp_can_copy(const cp_key_pair_t *src, const cp_key_pair_t *dst)
+{
+	return (cpkp_pers_key_len(src) <= dst->cpkp_max_pers_key_len
+			&& cpx_can_copy(cpkp_cpx(src), cpkp_cpx(dst)));
+}
+
+void cpkp_copy(const cp_key_pair_t *src, cp_key_pair_t *dst)
+{
+	const uint16_t key_len = cpkp_pers_key_len(src);
+	cpkp_set_pers_key_len(dst, key_len);
+	memcpy(cpkp_pers_key(dst), cpkp_pers_key(src), key_len);
+	cpx_copy(cpkp_cpx(src), cpkp_cpx(dst));
+}
+
+// --
+
+bool cp_is_supported_version(uint16_t vers)
+{
+	return vers == CP_VERS_4 || vers == CP_VERS_5;
+}
+
+/*
+ * Return the appropriate key and, if requested, the physical offset and
+ * maximum length for a particular I/O operation.
+ */
+void cp_io_params(__unused hfsmount_t *hfsmp, cprotect_t cpr,
+				  __unused off_rsrc_t off_rsrc,
+				  __unused int direction, cp_io_params_t *io_params)
+{
+#if HFS_CONFIG_KEY_ROLL
+	hfs_cp_key_roll_ctx_t *ckr = cpr->cp_key_roll_ctx;
+
+	if (ckr && off_rsrc < ckr->ckr_off_rsrc) {
+		/*
+		 * When we're in the process of rolling an extent, ckr_off_rsrc will
+		 * indicate the end of the extent.
+		 */
+		const off_rsrc_t roll_loc = ckr->ckr_off_rsrc
+			- hfs_blk_to_bytes(ckr->ckr_roll_extent.blockCount,
+							   hfsmp->blockSize);
+
+		if (off_rsrc < roll_loc) {
+			io_params->max_len		= roll_loc - off_rsrc;
+			io_params->phys_offset	= -1;
+		} else {
+			/*
+			 * We should never get reads to the extent we're rolling
+			 * because the pages should be locked in the UBC.  If we
+			 * did get reads it's not obvious what the right thing to
+			 * do is either: we could read from the old location, but
+			 * we might have written later data to the new location,
+			 * or we could read from the new location, but data might
+			 * not have been written there yet.
+			 *
+			 * Note that whilst raw encrypted reads don't lock any
+			 * pages, or take a cluster_read_direct lock, the call to
+			 * hfs_key_roll_up_to in hfs_vnop_read will have ensured
+			 * that the file has been rolled beyond the offset being
+			 * read so this path should never be taken in that case.
+			 */
+			hfs_assert(direction == VNODE_WRITE);
+
+			// For release builds, just in case...
+			if (direction == VNODE_READ) {
+				// Use the old key and offset
+				goto old_key;
+			}
+
+			io_params->max_len = ckr->ckr_off_rsrc - off_rsrc;
+			io_params->phys_offset = hfs_blk_to_bytes(ckr->ckr_roll_extent.startBlock,
+													  hfsmp->blockSize) + off_rsrc - roll_loc;
+		}
+
+		// Use new key
+		io_params->cpx = cpkp_cpx(&ckr->ckr_keys);
+		return;
+	}
+old_key:
+	// Use old key...
+#endif
+
+	io_params->max_len = INT64_MAX;
+	io_params->phys_offset = -1;
+	io_params->cpx = cpkp_cpx(&cpr->cp_keys);
+}
+
+static void cp_flush_cached_keys(cprotect_t cpr)
+{
+	cpx_flush(cpkp_cpx(&cpr->cp_keys));
+#if HFS_CONFIG_KEY_ROLL
+	if (cpr->cp_key_roll_ctx)
+		cpx_flush(cpkp_cpx(&cpr->cp_key_roll_ctx->ckr_keys));
+#endif
+}
+
+static bool cp_needs_pers_key(cprotect_t cpr)
+{
+	if (CP_CLASS(cpr->cp_pclass) == PROTECTION_CLASS_F)
+		return !cpx_has_key(cpkp_cpx(&cpr->cp_keys));
+	else
+		return !cpkp_has_pers_key(&cpr->cp_keys);
+}
+
+static cp_key_revision_t cp_initial_key_revision(__unused hfsmount_t *hfsmp)
+{
+	return 1;
+}
+
+cp_key_revision_t cp_next_key_revision(cp_key_revision_t rev)
+{
+	rev = (rev + 0x0100) ^ (mach_absolute_time() & 0xff);
+	if (!rev)
+		rev = 1;
+	return rev;
+}
+
+/*
+ * Allocate and initialize a cprotect blob for a new cnode.
+ * Called from hfs_getnewvnode: cnode is locked exclusive.
+ * 
+ * Read xattr data off the cnode. Then, if conditions permit,
+ * unwrap the file key and cache it in the cprotect blob.
+ */
+int
+cp_entry_init(struct cnode *cp, struct mount *mp)
+{
+	struct cprotect *entry = NULL;
+	int error = 0;
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+
+	/*
+	 * The cnode should be locked at this point, regardless of whether or not
+	 * we are creating a new item in the namespace or vending a vnode on behalf
+	 * of lookup.  The only time we tell getnewvnode to skip the lock is when 
+	 * constructing a resource fork vnode. But a resource fork vnode must come
+	 * after the regular data fork cnode has already been constructed.
+	 */
+	if (!cp_fs_protected (mp)) {
+		cp->c_cpentry = NULL;
+		return 0;
+	}
+
+	if (!S_ISREG(cp->c_mode) && !S_ISDIR(cp->c_mode)) {
+		cp->c_cpentry = NULL;
+		return 0;
+	}
+
+	if (hfsmp->hfs_running_cp_major_vers == 0) {
+		panic ("hfs cp: no running mount point version! ");		
+	}
+
+	hfs_assert(cp->c_cpentry == NULL);
+
+	error = cp_getxattr(cp, hfsmp, &entry);
+	if (error == ENOATTR) {
+		/*
+		 * Normally, we should always have a CP EA for a file or directory that
+		 * we are initializing here. However, there are some extenuating circumstances,
+		 * such as the root directory immediately following a newfs_hfs.
+		 *
+		 * As a result, we leave code here to deal with an ENOATTR which will always
+		 * default to a 'D/NONE' key, though we don't expect to use it much.
+		 */
+		cp_key_class_t target_class = PROTECTION_CLASS_D;
+
+		if (S_ISDIR(cp->c_mode)) {
+			target_class = PROTECTION_CLASS_DIR_NONE;
+		}
+
+		cp_key_revision_t key_revision = cp_initial_key_revision(hfsmp);
+
+		/* allow keybag to override our class preferences */
+		error = cp_new (&target_class, hfsmp, cp, cp->c_mode, CP_KEYWRAP_DIFFCLASS,
+						key_revision, (cp_new_alloc_fn)cp_entry_alloc, (void **)&entry);
+		if (error == 0) {
+			entry->cp_pclass = target_class;
+			entry->cp_key_os_version = cp_os_version();
+			entry->cp_key_revision = key_revision;
+			error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE);
+		}
+	}
+
+	/* 
+	 * Bail out if:
+	 * a) error was not ENOATTR (we got something bad from the getxattr call)
+	 * b) we encountered an error setting the xattr above.
+	 * c) we failed to generate a new cprotect data structure.
+	 */
+	if (error) {
+		goto out;
+	}	
+
+	cp->c_cpentry = entry;
+
+out:
+	if (error == 0) {
+		entry->cp_backing_cnode = cp;
+	}
+	else {
+		if (entry) {
+			cp_entry_destroy(hfsmp, entry);
+		}
+		cp->c_cpentry = NULL;
+	}
+
+	return error;
+}
+
+/*
+ * cp_setup_newentry
+ * 
+ * Generate a keyless cprotect structure for use with the new AppleKeyStore kext.
+ * Since the kext is now responsible for vending us both wrapped/unwrapped keys
+ * we need to create a keyless xattr upon file / directory creation. When we have the inode value
+ * and the file/directory is established, then we can ask it to generate keys.  Note that
+ * this introduces a potential race;  If the device is locked and the wrapping
+ * keys are purged between the time we call this function and the time we ask it to generate
+ * keys for us, we could have to fail the open(2) call and back out the entry.
+ */
+
+int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp,
+					   cp_key_class_t suppliedclass, mode_t cmode,
+					   struct cprotect **tmpentry)
+{
+	int isdir = 0;
+	struct cprotect *entry = NULL;
+	uint32_t target_class = hfsmp->default_cp_class;
+	suppliedclass = CP_CLASS(suppliedclass);
+
+	if (hfsmp->hfs_running_cp_major_vers == 0) {
+		panic ("CP: major vers not set in mount!");
+	}
+	
+	if (S_ISDIR (cmode))  {
+		isdir = 1;
+	}
+
+	/* Decide the target class.  Input argument takes priority. */
+	if (cp_is_valid_class (isdir, suppliedclass)) {
+		/* caller supplies -1 if it was not specified so we will default to the mount point value */
+		target_class = suppliedclass;
+		/*
+		 * One exception, F is never valid for a directory
+		 * because its children may inherit and userland will be
+		 * unable to read/write to the files.
+		 */
+		if (isdir) {
+			if (target_class == PROTECTION_CLASS_F) {
+				*tmpentry = NULL;
+				return EINVAL;
+			}
+		}
+	}
+	else {
+		/* 
+		 * If no valid class was supplied, behave differently depending on whether or not
+		 * the item being created is a file or directory.
+		 * 
+		 * for FILE:
+		 * 		If parent directory has a non-zero class, use that.
+		 * 		If parent directory has a zero class (not set), then attempt to
+		 *		apply the mount point default.
+		 * 
+		 * for DIRECTORY:
+		 *		Directories always inherit from the parent; if the parent
+		 * 		has a NONE class set, then we can continue to use that.
+		 */
+		if ((dcp) && (dcp->c_cpentry)) {
+			uint32_t parentclass = CP_CLASS(dcp->c_cpentry->cp_pclass);
+			/* If the parent class is not valid, default to the mount point value */
+			if (cp_is_valid_class(1, parentclass)) {
+				if (isdir) {
+					target_class = parentclass;	
+				}
+				else if (parentclass != PROTECTION_CLASS_DIR_NONE) {
+					/* files can inherit so long as it's not NONE */
+					target_class = parentclass;
+				}
+			}
+			/* Otherwise, we already defaulted to the mount point's default */
+		}
+	}
+
+	/* Generate the cprotect to vend out */
+	entry = cp_entry_alloc(NULL, 0, 0, NULL);
+	if (entry == NULL) {
+		*tmpentry = NULL;
+		return ENOMEM;
+	}	
+
+	/* 
+	 * We don't have keys yet, so fill in what we can.  At this point
+	 * this blob has no keys and it has no backing xattr.  We just know the
+	 * target class.
+	 */
+	entry->cp_flags = CP_NO_XATTR;
+	/* Note this is only the effective class */
+	entry->cp_pclass = target_class;
+	*tmpentry = entry;
+
+	return 0;
+}
+
+/*
+ * Set up an initial key/class pair for a disassociated cprotect entry.
+ * This function is used to generate transient keys that will never be
+ * written to disk.  We use class F for this since it provides the exact
+ * semantics that are needed here.  Because we never attach this blob to
+ * a cnode directly, we take a pointer to the cprotect struct.
+ *
+ * This function is primarily used in the HFS FS truncation codepath
+ * where we may rely on AES symmetry to relocate encrypted data from
+ * one spot in the disk to another.
+ */
+int cpx_gentempkeys(cpx_t *pcpx, __unused struct hfsmount *hfsmp)
+{
+	cpx_t cpx = cpx_alloc(CP_MAX_KEYSIZE);
+
+	cpx_set_key_len(cpx, CP_MAX_KEYSIZE);
+	read_random(cpx_key(cpx), CP_MAX_KEYSIZE);
+	cpx_set_use_offset_for_iv(cpx, true);
+
+	*pcpx = cpx;
+
+	return 0;
+}
+
+/*
+ * Tear down and clear a cprotect blob for a closing file.
+ * Called at hfs_reclaim_cnode: cnode is locked exclusive.
+ */
+void
+cp_entry_destroy(hfsmount_t *hfsmp, struct cprotect *entry_ptr)
+{
+	if (entry_ptr == NULL) {
+		/* nothing to clean up */
+		return;
+	}
+	cp_entry_dealloc(hfsmp, entry_ptr);
+}
+
+
+int
+cp_fs_protected (mount_t mnt) 
+{
+	return (vfs_flags(mnt) & MNT_CPROTECT);
+}
+
+
+/*
+ * Return a pointer to underlying cnode if there is one for this vnode.
+ * Done without taking cnode lock, inspecting only vnode state.
+ */
+struct cnode *
+cp_get_protected_cnode(struct vnode *vp)
+{
+	if (!cp_vnode_is_eligible(vp)) {
+		return NULL;
+	}
+
+	if (!cp_fs_protected(VTOVFS(vp))) {
+		/* mount point doesn't support it */
+		return NULL;
+	}
+
+	return vnode_fsnode(vp);
+}
+
+
+/*
+ * Sets *class to persistent class associated with vnode,
+ * or returns error.
+ */
+int
+cp_vnode_getclass(struct vnode *vp, cp_key_class_t *class)
+{
+	struct cprotect *entry;
+	int error = 0;
+	struct cnode *cp;
+	int took_truncate_lock = 0;
+	struct hfsmount *hfsmp = NULL;
+
+	/* Is this an interesting vp? */
+	if (!cp_vnode_is_eligible (vp)) {
+		return EBADF;
+	}
+
+	/* Is the mount point formatted for content protection? */
+	if (!cp_fs_protected(VTOVFS(vp))) {
+		return ENOTSUP;
+	}
+
+	cp = VTOC(vp);
+	hfsmp = VTOHFS(vp);
+
+	/*
+	 * Take the truncate lock up-front in shared mode because we may need
+	 * to manipulate the CP blob. Pend lock events until we're done here.
+	 */
+	hfs_lock_truncate (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	took_truncate_lock = 1;
+
+	/*
+	 * We take only the shared cnode lock up-front.  If it turns out that
+	 * we need to manipulate the CP blob to write a key out, drop the
+	 * shared cnode lock and acquire an exclusive lock.
+	 */
+	error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	if (error) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		return error;
+	}
+
+	/* pull the class from the live entry */
+	entry = cp->c_cpentry;
+
+	if (entry == NULL) {
+		panic("Content Protection: uninitialized cnode %p", cp);
+	}
+	
+	/* Note that we may not have keys yet, but we know the target class. */
+
+	if (error == 0) {
+		*class = CP_CLASS(entry->cp_pclass);
+	}
+
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	}
+
+	hfs_unlock(cp);
+	return error;
+}
+
+/*
+ * Sets persistent class for this file or directory.
+ * If vnode cannot be protected (system file, non-regular file, non-hfs), EBADF.
+ * If the new class can't be accessed now, EPERM.
+ * Otherwise, record class and re-wrap key if the mount point is content-protected.
+ */
+int
+cp_vnode_setclass(struct vnode *vp, cp_key_class_t newclass)
+{
+	struct cnode *cp;
+	struct cprotect *entry = 0;
+	int error = 0;
+	int took_truncate_lock = 0;
+	struct hfsmount *hfsmp = NULL;
+	int isdir = 0;
+
+	if (vnode_isdir (vp)) {
+		isdir = 1;
+	}
+
+	/* Ensure we only use the effective class here */
+	newclass = CP_CLASS(newclass);
+
+	if (!cp_is_valid_class(isdir, newclass)) {
+		printf("hfs: CP: cp_setclass called with invalid class %d\n", newclass);
+		return EINVAL;
+	}
+
+	/* Is this an interesting vp? */
+	if (!cp_vnode_is_eligible(vp)) {
+		return EBADF;
+	}
+
+	/* Is the mount point formatted for content protection? */
+	if (!cp_fs_protected(VTOVFS(vp))) {
+		return ENOTSUP;
+	}
+
+	hfsmp = VTOHFS(vp);
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		return EROFS;
+	}
+
+	/*
+	 * Take the cnode truncate lock exclusive because we want to manipulate the
+	 * CP blob. The lock-event handling code is doing the same.  This also forces
+	 * all pending IOs to drain before we can re-write the persistent and cache keys.
+	 */
+	cp = VTOC(vp);
+	hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	took_truncate_lock = 1;
+
+	/*
+	 * The truncate lock is not sufficient to guarantee the CP blob
+	 * isn't being used.  We must wait for existing writes to finish.
+	 */
+	vnode_waitforwrites(vp, 0, 0, 0, "cp_vnode_setclass");
+
+	if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) {
+		return EINVAL;
+	}
+
+	entry = cp->c_cpentry;
+	if (entry == NULL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/* 
+	 * re-wrap per-file key with new class.  
+	 * Generate an entirely new key if switching to F. 
+	 */
+	if (vnode_isreg(vp)) {
+		/*
+		 * The vnode is a file.  Before proceeding with the re-wrap, we need
+		 * to unwrap the keys before proceeding.  This is to ensure that 
+		 * the destination class's properties still work appropriately for the
+		 * target class (since B allows I/O but an unwrap prior to the next unlock
+		 * will not be allowed).
+		 */
+		if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
+			error = cp_restore_keys (entry, hfsmp, cp);
+			if (error) {
+				goto out;
+			}
+		}
+
+		if (newclass == PROTECTION_CLASS_F) {
+			/* Verify that file is blockless if switching to class F */
+			if (cp->c_datafork->ff_size > 0) {
+				error = EINVAL;
+				goto out;
+			}
+
+			cp_key_pair_t *cpkp;
+			cprotect_t new_entry = cp_entry_alloc(NULL, 0, CP_MAX_KEYSIZE, &cpkp);
+
+			if (!new_entry) {
+				error = ENOMEM;
+				goto out;
+			}
+
+			/* newclass is only the effective class */
+			new_entry->cp_pclass = newclass;
+			new_entry->cp_key_os_version = cp_os_version();
+			new_entry->cp_key_revision = cp_next_key_revision(entry->cp_key_revision);
+
+			cpx_t cpx = cpkp_cpx(cpkp);
+
+			/* Class F files are not wrapped, so they continue to use MAX_KEYSIZE */
+			cpx_set_key_len(cpx, CP_MAX_KEYSIZE);
+			read_random (cpx_key(cpx), CP_MAX_KEYSIZE);
+
+			cp_replace_entry(hfsmp, cp, new_entry);
+
+			error = 0;
+			goto out;
+		}
+
+		/* Deny the setclass if file is to be moved from F to something else */
+		if (entry->cp_pclass == PROTECTION_CLASS_F) {
+			error = EPERM;
+			goto out;
+		}
+
+		if (!cpkp_has_pers_key(&entry->cp_keys)) {
+			struct cprotect *new_entry = NULL;
+			/*
+			 * We want to fail if we can't wrap to the target class. By not setting
+			 * CP_KEYWRAP_DIFFCLASS, we tell keygeneration that if it can't wrap
+			 * to 'newclass' then error out.
+			 */
+			uint32_t flags = 0;
+			error = cp_generate_keys (hfsmp, cp, newclass, flags,  &new_entry);
+			if (error == 0) {
+				cp_replace_entry (hfsmp, cp, new_entry);
+			}
+			/* Bypass the setxattr code below since generate_keys does it for us */
+			goto out;
+		}
+
+		cprotect_t new_entry;
+		error = cp_rewrap(cp, hfsmp, &newclass, &entry->cp_keys, entry,
+						  (cp_new_alloc_fn)cp_entry_alloc, (void **)&new_entry);
+		if (error) {
+			/* we didn't have perms to set this class. leave file as-is and error out */
+			goto out;
+		}
+
+#if HFS_CONFIG_KEY_ROLL
+		hfs_cp_key_roll_ctx_t *new_key_roll_ctx = NULL;
+		if (entry->cp_key_roll_ctx) {
+			error = cp_rewrap(cp, hfsmp, &newclass, &entry->cp_key_roll_ctx->ckr_keys,
+							  entry->cp_key_roll_ctx,
+							  (cp_new_alloc_fn)hfs_key_roll_ctx_alloc,
+							  (void **)&new_key_roll_ctx);
+
+			if (error) {
+				cp_entry_dealloc(hfsmp, new_entry);
+				goto out;
+			}
+
+			new_entry->cp_key_roll_ctx = new_key_roll_ctx;
+		}
+#endif
+
+		new_entry->cp_pclass = newclass;
+
+		cp_replace_entry(hfsmp, cp, new_entry);
+		entry = new_entry;
+	}
+	else if (vnode_isdir(vp)) {
+		/* For directories, just update the pclass.  newclass is only effective class */
+		entry->cp_pclass = newclass;
+		error = 0;	
+	}
+	else {
+		/* anything else, just error out */
+		error = EINVAL;
+		goto out;	
+	}
+	
+	/* 
+	 * We get here if the new class was F, or if we were re-wrapping a cprotect that already
+	 * existed. If the keys were never generated, then they'll skip the setxattr calls.
+	 */
+
+	error = cp_setxattr(cp, cp->c_cpentry, VTOHFS(vp), 0, XATTR_REPLACE);
+	if (error == ENOATTR) {
+		error = cp_setxattr(cp, cp->c_cpentry, VTOHFS(vp), 0, XATTR_CREATE);
+	}
+
+out:
+
+	if (took_truncate_lock) {
+		hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+	}
+	hfs_unlock(cp);
+	return error;
+}
+
+
+int cp_vnode_transcode(vnode_t vp, cp_key_t *k)
+{
+	struct cnode *cp;
+	struct cprotect *entry = 0;
+	int error = 0;
+	int took_truncate_lock = 0;
+	struct hfsmount *hfsmp = NULL;
+
+	/* Structures passed between HFS and AKS */
+	struct aks_cred_s access_in;
+	struct aks_wrapped_key_s wrapped_key_in, wrapped_key_out;
+
+	/* Is this an interesting vp? */
+	if (!cp_vnode_is_eligible(vp)) {
+		return EBADF;
+	}
+
+	/* Is the mount point formatted for content protection? */
+	if (!cp_fs_protected(VTOVFS(vp))) {
+		return ENOTSUP;
+	}
+
+	cp = VTOC(vp);
+	hfsmp = VTOHFS(vp);
+
+	/*
+	 * Take the cnode truncate lock exclusive because we want to manipulate the
+	 * CP blob. The lock-event handling code is doing the same.  This also forces
+	 * all pending IOs to drain before we can re-write the persistent and cache keys.
+	 */
+	hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	took_truncate_lock = 1;
+
+	if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) {
+		return EINVAL;
+	}
+
+	entry = cp->c_cpentry;
+	if (entry == NULL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/* Send the per-file key in wrapped form for re-wrap with the current class information
+	 * Send NULLs in the output parameters of the wrapper() and AKS will do the rest.
+	 * Don't need to process any outputs, so just clear the locks and pass along the error. */
+	if (vnode_isreg(vp)) {
+
+		/* Picked up the following from cp_wrap().
+		 * If needed, more comments available there. */
+
+		if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) {
+			error = EINVAL;
+			goto out;
+		}
+
+		cp_init_access(&access_in, cp);
+
+		bzero(&wrapped_key_in, sizeof(wrapped_key_in));
+		bzero(&wrapped_key_out, sizeof(wrapped_key_out));
+
+		cp_key_pair_t *cpkp = &entry->cp_keys;
+
+#if HFS_CONFIG_KEY_ROLL
+		if (entry->cp_key_roll_ctx)
+			cpkp = &entry->cp_key_roll_ctx->ckr_keys;
+#endif
+
+		wrapped_key_in.key = cpkp_pers_key(cpkp);
+		wrapped_key_in.key_len = cpkp_pers_key_len(cpkp);
+
+		if (!wrapped_key_in.key_len) {
+			error = EINVAL;
+			goto out;
+		}
+
+		/* Use the actual persistent class when talking to AKS */
+		wrapped_key_in.dp_class = entry->cp_pclass;
+		wrapped_key_out.key = k->key;
+		wrapped_key_out.key_len = k->len;
+
+		error = hfs_backup_key(&access_in,
+							   &wrapped_key_in,
+							   &wrapped_key_out);
+
+		if(error)
+			error = EPERM;
+		else
+			k->len = wrapped_key_out.key_len;
+	}
+
+out:
+	if (took_truncate_lock) {
+		hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+	}
+	hfs_unlock(cp);
+	return error;
+}
+
+
+/*
+ * Check permission for the given operation (read, write) on this node.
+ * Additionally, if the node needs work, do it:
+ * - create a new key for the file if one hasn't been set before
+ * - write out the xattr if it hasn't already been saved
+ * - unwrap the key if needed
+ *
+ * Takes cnode lock, and upgrades to exclusive if modifying cprotect.
+ *
+ * Note that this function does *NOT* take the cnode truncate lock.  This is because
+ * the thread calling us may already have the truncate lock.  It is not necessary
+ * because either we successfully finish this function before the keys are tossed
+ * and the IO will fail, or the keys are tossed and then this function will fail.
+ * Either way, the cnode lock still ultimately guards the keys.  We only rely on the
+ * truncate lock to protect us against tossing the keys as a cluster call is in-flight.
+ */
+int
+cp_handle_vnop(struct vnode *vp, int vnop, int ioflag)
+{
+	struct cprotect *entry;
+	int error = 0;
+	struct hfsmount *hfsmp = NULL;
+	struct cnode *cp = NULL;
+
+	/*
+	 * First, do validation against the vnode before proceeding any further:
+	 * Is this vnode originating from a valid content-protected filesystem ?
+	 */
+	if (cp_vnode_is_eligible(vp) == 0) {
+		/*
+		 * It is either not HFS or not a file/dir.  Just return success. This is a valid
+		 * case if servicing i/o against another filesystem type from VFS
+		 */
+		return 0;
+	}
+
+	if (cp_fs_protected (VTOVFS(vp)) == 0) {
+		/*
+		 * The underlying filesystem does not support content protection.  This is also
+		 * a valid case.  Simply return success.
+		 */
+		return 0;
+	}
+
+	/*
+	 * At this point, we know we have a HFS vnode that backs a file or directory on a
+	 * filesystem that supports content protection
+	 */
+	cp = VTOC(vp);
+
+	if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+		return error;
+	}
+
+	entry = cp->c_cpentry;
+
+	if (entry == NULL) {
+		/*
+		 * If this cnode is not content protected, simply return success.
+		 * Note that this function is called by all I/O-based call sites
+		 * when CONFIG_PROTECT is enabled during XNU building.
+		 */
+
+		/* 
+		 * All files should have cprotect structs.  It's possible to encounter
+		 * a directory from a V2.0 CP system but all files should have protection
+		 * EAs
+		 */
+		if (vnode_isreg(vp)) {
+			error = EPERM;
+		}
+
+		goto out;
+	}
+
+	vp = CTOV(cp, 0);
+	if (vp == NULL) {
+		/* is it a rsrc */
+		vp = CTOV(cp,1);
+		if (vp == NULL) {
+			error = EINVAL;
+			goto out;
+		}
+	}
+	hfsmp = VTOHFS(vp);
+
+	if ((error = cp_check_access(cp, hfsmp, vnop))) {
+		/* check for raw encrypted access before bailing out */
+		if ((ioflag & IO_ENCRYPTED)
+#if HFS_CONFIG_KEY_ROLL
+			// If we're rolling, we need the keys
+			&& !hfs_is_key_rolling(cp)
+#endif
+			&& (vnop == CP_READ_ACCESS)) {
+			/*
+			 * read access only + asking for the raw encrypted bytes
+			 * is legitimate, so reset the error value to 0
+			 */
+			error = 0;
+		}
+		else {
+			goto out;
+		}
+	}
+
+	if (!ISSET(entry->cp_flags, CP_NO_XATTR)) {
+		if (!S_ISREG(cp->c_mode))
+			goto out;
+
+		// If we have a persistent key and the cached key, we're done
+		if (!cp_needs_pers_key(entry)
+			&& cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
+			goto out;
+		}
+	}
+
+	/* upgrade to exclusive lock */
+	if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE) {
+		if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			return error;
+		}
+	} else {
+		cp->c_lockowner = current_thread();
+	}
+
+	/* generate new keys if none have ever been saved */
+	if (cp_needs_pers_key(entry)) {
+		struct cprotect *newentry = NULL;
+		/* 
+		 * It's ok if this ends up being wrapped in a different class than 'pclass'.
+		 * class modification is OK here. 
+		 */		
+		uint32_t flags = CP_KEYWRAP_DIFFCLASS;
+
+		error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry);	
+		if (error == 0) {
+			cp_replace_entry (hfsmp, cp, newentry);
+			entry = newentry;
+		}
+		else {
+			goto out;
+		}
+	}
+
+	/* unwrap keys if needed */
+	if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
+		if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) {
+			/* no need to try to restore keys; they are not going to be used */
+			error = 0;
+		}
+		else {
+			error = cp_restore_keys(entry, hfsmp, cp);
+			if (error) {
+				goto out;
+			}
+		}
+	}
+
+	/* write out the xattr if it's new */
+	if (entry->cp_flags & CP_NO_XATTR)
+		error = cp_setxattr(cp, entry, VTOHFS(cp->c_vp), 0, XATTR_CREATE);
+
+out:
+
+	hfs_unlock(cp);
+	return error;
+}
+
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+static void cp_log_eperm (struct vnode* vp, int pclass, boolean_t create) {
+	char procname[256] = {};
+	const char *fname = "unknown";
+	const char *dbgop = "open";
+
+	int ppid = proc_selfpid();
+	/* selfname does a strlcpy so we're OK */
+	proc_selfname(procname, sizeof(procname));
+	if (vp && vp->v_name) {
+		/* steal from the namecache */
+		fname = vp->v_name;
+	}
+
+	if (create) {
+		dbgop = "create";	
+	}
+	
+	printf("proc %s (pid %d) class %d, op: %s failure @ file %s\n", procname, ppid, pclass, dbgop, fname);
+}
+#endif
+#endif
+
+
+int
+cp_handle_open(struct vnode *vp, int mode)
+{
+	struct cnode *cp = NULL ;
+	struct cprotect *entry = NULL;
+	struct hfsmount *hfsmp;
+	int error = 0;
+
+	/* If vnode not eligible, just return success */
+	if (!cp_vnode_is_eligible(vp)) {
+		return 0;
+	}
+
+	/* If mount point not properly set up, then also return success */
+	if (!cp_fs_protected(VTOVFS(vp))) {
+		return 0;
+	}
+
+	cp = VTOC(vp);
+
+	// Allow if raw encrypted mode requested
+	if (ISSET(mode, FENCRYPTED)) {
+#if HFS_CONFIG_KEY_ROLL
+		// If we're rolling, we need the keys
+		hfs_lock_always(cp, HFS_SHARED_LOCK);
+		bool rolling = hfs_is_key_rolling(cp);
+		hfs_unlock(cp);
+		if (!rolling)
+			return 0;
+#else
+		return 0;
+#endif
+	}
+	if (ISSET(mode, FUNENCRYPTED)) {
+		return 0;
+	}
+
+	/* We know the vnode is in a valid state. Acquire cnode and validate */
+	hfsmp = VTOHFS(vp);
+
+	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		return error;
+	}
+
+	entry = cp->c_cpentry;
+	if (entry == NULL) {
+		/* 
+		 * If the mount is protected and we couldn't get a cprotect for this vnode,
+		 * then it's not valid for opening.
+		 */
+		if (vnode_isreg(vp)) {
+			error = EPERM;
+		}
+		goto out;
+	}
+
+	if (!S_ISREG(cp->c_mode))
+		goto out;
+
+	/*
+	 * Does the cnode have keys yet?  If not, then generate them.
+	 */
+	if (cp_needs_pers_key(entry)) {
+		struct cprotect *newentry = NULL;
+		/* Allow the keybag to override our class preferences */
+		uint32_t flags = CP_KEYWRAP_DIFFCLASS;
+		error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), flags, &newentry);
+		if (error == 0) {
+			cp_replace_entry (hfsmp, cp, newentry);
+			entry = newentry;
+		}	
+		else {
+			goto out;
+		}
+	}	
+
+	/*
+	 * We want to minimize the number of unwraps that we'll have to do since 
+	 * the cost can vary, depending on the platform we're running. 
+	 */
+	switch (CP_CLASS(entry->cp_pclass)) {
+		case PROTECTION_CLASS_B:
+			if (mode & O_CREAT) {
+				/* 
+				 * Class B always allows creation.  Since O_CREAT was passed through
+				 * we infer that this was a newly created vnode/cnode.  Even though a potential
+				 * race exists when multiple threads attempt to create/open a particular
+				 * file, only one can "win" and actually create it.  VFS will unset the
+				 * O_CREAT bit on the loser.	 
+				 * 
+				 * Note that skipping the unwrap check here is not a security issue -- 
+				 * we have to unwrap the key permanently upon the first I/O.
+				 */
+				break;
+			}
+			
+			if (cpx_has_key(cpkp_cpx(&entry->cp_keys)) && !ISSET(mode, FENCRYPTED)) {
+				/*
+				 * For a class B file, attempt the unwrap if we have the key in
+				 * core already. 
+				 * The device could have just transitioned into the lock state, and 
+				 * this vnode may not yet have been purged from the vnode cache (which would
+				 * remove the keys). 
+				 */
+				struct aks_cred_s access_in;
+				struct aks_wrapped_key_s wrapped_key_in;
+
+				cp_init_access(&access_in, cp);
+				bzero(&wrapped_key_in, sizeof(wrapped_key_in));
+				wrapped_key_in.key = cpkp_pers_key(&entry->cp_keys);
+				wrapped_key_in.key_len = cpkp_pers_key_len(&entry->cp_keys);
+				/* Use the persistent class when talking to AKS */
+				wrapped_key_in.dp_class = entry->cp_pclass;
+				error = hfs_unwrap_key(&access_in, &wrapped_key_in, NULL);
+				if (error) {
+					error = EPERM;
+				}
+				break;
+			}
+			/* otherwise, fall through to attempt the unwrap/restore */
+		case PROTECTION_CLASS_A:
+		case PROTECTION_CLASS_C:
+			/*
+			 * At this point, we know that we need to attempt an unwrap if needed; we want
+			 * to makes sure that open(2) fails properly if the device is either just-locked
+			 * or never made it past first unlock.  Since the keybag serializes access to the
+			 * unwrapping keys for us and only calls our VFS callback once they've been purged, 
+			 * we will get here in two cases:
+			 * 
+			 * A) we're in a window before the wrapping keys are purged; this is OK since when they get 
+			 * purged, the vnode will get flushed if needed.
+			 * 
+			 * B) The keys are already gone.  In this case, the restore_keys call below will fail. 
+			 *
+			 * Since this function is bypassed entirely if we're opening a raw encrypted file, 
+			 * we can always attempt the restore.
+			 */
+			if (!cpx_has_key(cpkp_cpx(&entry->cp_keys))) {
+				error = cp_restore_keys(entry, hfsmp, cp);
+			}
+	
+			if (error) {
+				error = EPERM;
+			}
+	
+			break;
+
+		case PROTECTION_CLASS_D:
+		default:
+			break;
+	}
+
+out:
+
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+	if ((hfsmp->hfs_cp_verbose) && (error == EPERM)) {
+		cp_log_eperm (vp, CP_CLASS(entry->cp_pclass), false);
+	}
+#endif
+#endif
+
+	hfs_unlock(cp);
+	return error;
+}
+
+
+/*
+ * cp_getrootxattr:
+ * Gets the EA we set on the root folder (fileid 1) to get information about the
+ * version of Content Protection that was used to write to this filesystem.
+ * Note that all multi-byte fields are written to disk little endian so they must be
+ * converted to native endian-ness as needed.
+ */
+int
+cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) 
+{
+	void	*buf;
+
+	/*
+	 * We allow for an extra 64 bytes to cater for upgrades.  This wouldn't
+	 * be necessary if the xattr routines just returned what we asked for.
+	 */
+	size_t bufsize = roundup(sizeof(struct cp_root_xattr) + 64, 64);
+
+	int error = 0;
+
+	hfs_assert(outxattr);
+
+	buf = hfs_malloc(bufsize);
+
+	uio_t uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+
+	uio_addiov(uio, CAST_USER_ADDR_T(buf), bufsize);
+
+	size_t attrsize = bufsize;
+
+	struct vnop_getxattr_args args = {
+		.a_uio = uio,
+		.a_name = CONTENT_PROTECTION_XATTR_NAME,
+		.a_size = &attrsize
+	};
+
+	error = hfs_getxattr_internal(NULL, &args, hfsmp, 1);
+
+	uio_free(uio);
+
+	if (error != 0) {
+		goto out;
+	}
+
+	if (attrsize < CP_ROOT_XATTR_MIN_LEN) {
+		error = HFS_EINCONSISTENT;
+		goto out;
+	}
+
+	const struct cp_root_xattr *xattr = buf;
+
+	bzero(outxattr, sizeof(*outxattr));
+
+	/* Now convert the multi-byte fields to native endianness */
+	outxattr->major_version = OSSwapLittleToHostInt16(xattr->major_version);
+	outxattr->minor_version = OSSwapLittleToHostInt16(xattr->minor_version);
+	outxattr->flags = OSSwapLittleToHostInt64(xattr->flags);
+
+	if (outxattr->major_version >= CP_VERS_5) {
+		if (attrsize < sizeof(struct cp_root_xattr)) {
+			error = HFS_EINCONSISTENT;
+			goto out;
+		}
+#if HFS_CONFIG_KEY_ROLL
+		outxattr->auto_roll_min_version = OSSwapLittleToHostInt32(xattr->auto_roll_min_version);
+		outxattr->auto_roll_max_version = OSSwapLittleToHostInt32(xattr->auto_roll_max_version);
+#endif
+	}
+
+out:
+	hfs_free(buf, bufsize);
+	return error;
+}
+
+/*
+ * cp_setrootxattr:
+ * Sets the EA we set on the root folder (fileid 1) to get information about the
+ * version of Content Protection that was used to write to this filesystem.
+ * Note that all multi-byte fields are written to disk little endian so they must be
+ * converted to little endian as needed.
+ *
+ * This will be written to the disk when it detects the EA is not there, or when we need
+ * to make a modification to the on-disk version that can be done in-place.
+ */
+int
+cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr)
+{
+	int error = 0;
+	struct vnop_setxattr_args args;
+
+	args.a_desc = NULL;
+	args.a_vp = NULL;
+	args.a_name = CONTENT_PROTECTION_XATTR_NAME;
+	args.a_uio = NULL; //pass data ptr instead
+	args.a_options = 0;
+	args.a_context = NULL; //no context needed, only done from mount.
+
+	const uint64_t flags = newxattr->flags;
+
+	/* Now convert the multi-byte fields to little endian before writing to disk. */
+	newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags);
+
+	int xattr_size = sizeof(struct cp_root_xattr);
+
+#if HFS_CONFIG_KEY_ROLL
+	bool upgraded = false;
+
+	if (newxattr->auto_roll_min_version || newxattr->auto_roll_max_version) {
+		if (newxattr->major_version < CP_VERS_5) {
+			printf("hfs: upgrading to cp version %u\n", CP_CURRENT_VERS);
+
+			newxattr->major_version = CP_CURRENT_VERS;
+			newxattr->minor_version = CP_MINOR_VERS;
+
+			upgraded = true;
+		}
+
+		newxattr->auto_roll_min_version = OSSwapHostToLittleInt32(newxattr->auto_roll_min_version);
+		newxattr->auto_roll_max_version = OSSwapHostToLittleInt32(newxattr->auto_roll_max_version);
+	} else if (newxattr->major_version == CP_VERS_4)
+		xattr_size = offsetof(struct cp_root_xattr, auto_roll_min_version);
+#endif
+
+	newxattr->major_version = OSSwapHostToLittleInt16(newxattr->major_version);
+	newxattr->minor_version = OSSwapHostToLittleInt16(newxattr->minor_version);
+
+	error = hfs_setxattr_internal(NULL, (caddr_t)newxattr,
+			xattr_size, &args, hfsmp, 1);
+
+	if (!error) {
+		hfsmp->cproot_flags = flags;
+#if HFS_CONFIG_KEY_ROLL
+		if (upgraded)
+			hfsmp->hfs_running_cp_major_vers = CP_CURRENT_VERS;
+#endif
+	}
+
+	return error;
+}
+
+
+/*
+ * Stores new xattr data on the cnode.
+ * cnode lock held exclusive (if available).
+ *
+ * This function is also invoked during file creation.
+ */
+int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp,
+				uint32_t fileid, int options)
+{
+	int error = 0;
+	cp_key_pair_t *cpkp = &entry->cp_keys;
+#if HFS_CONFIG_KEY_ROLL
+	bool rolling = entry->cp_key_roll_ctx != NULL;
+
+	if (rolling && entry->cp_key_roll_ctx->ckr_off_rsrc == INT64_MAX) {
+		// We've finished rolling, but we still have the context
+		rolling = false;
+		cpkp = &entry->cp_key_roll_ctx->ckr_keys;
+	}
+#endif
+
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		return EROFS;
+	}
+
+	if (hfsmp->hfs_running_cp_major_vers < CP_CURRENT_VERS) {
+		// Upgrade
+		printf("hfs: upgrading to cp version %u\n", CP_CURRENT_VERS);
+
+		struct cp_root_xattr root_xattr;
+
+		error = cp_getrootxattr(hfsmp, &root_xattr);
+		if (error)
+			return error;
+
+		root_xattr.major_version = CP_CURRENT_VERS;
+		root_xattr.minor_version = CP_MINOR_VERS;
+
+		error = cp_setrootxattr(hfsmp, &root_xattr);
+		if (error)
+			return error;
+
+		hfsmp->hfs_running_cp_major_vers = CP_CURRENT_VERS;
+	}
+
+	struct cp_xattr_v5 *xattr;
+	xattr = hfs_malloc(sizeof(*xattr));
+
+	xattr->xattr_major_version	= OSSwapHostToLittleConstInt16(CP_VERS_5);
+	xattr->xattr_minor_version	= OSSwapHostToLittleConstInt16(CP_MINOR_VERS);
+	xattr->flags				= 0;
+#if HFS_CONFIG_KEY_ROLL
+	if (rolling)
+		xattr->flags |= CP_XAF_KEY_ROLLING;
+#endif
+	xattr->persistent_class		= OSSwapHostToLittleInt32(entry->cp_pclass);
+	xattr->key_os_version		= OSSwapHostToLittleInt32(entry->cp_key_os_version);
+	xattr->key_revision			= OSSwapHostToLittleInt16(entry->cp_key_revision);
+
+	uint16_t key_len = cpkp_pers_key_len(cpkp);
+	xattr->key_len	= OSSwapHostToLittleInt16(key_len);
+	memcpy(xattr->persistent_key, cpkp_pers_key(cpkp), key_len);
+
+	size_t xattr_len = offsetof(struct cp_xattr_v5, persistent_key) + key_len;
+
+#if HFS_CONFIG_KEY_ROLL
+	if (rolling) {
+		struct cp_roll_info *roll_info = PTR_ADD(struct cp_roll_info *, xattr, xattr_len);
+
+		roll_info->off_rsrc = OSSwapHostToLittleInt64(entry->cp_key_roll_ctx->ckr_off_rsrc);
+
+		key_len = cpkp_pers_key_len(&entry->cp_key_roll_ctx->ckr_keys);
+		roll_info->key_len = OSSwapHostToLittleInt16(key_len);
+
+		memcpy(roll_info->key, cpkp_pers_key(&entry->cp_key_roll_ctx->ckr_keys), key_len);
+
+		xattr_len += offsetof(struct cp_roll_info, key) + key_len;
+	}
+#endif
+
+	struct vnop_setxattr_args args = {
+		.a_vp		= cp ? cp->c_vp : NULL,
+		.a_name		= CONTENT_PROTECTION_XATTR_NAME,
+		.a_options	= options,
+		.a_context	= vfs_context_current(),
+	};
+
+	error = hfs_setxattr_internal(cp, xattr, xattr_len, &args, hfsmp, fileid);
+
+	hfs_free(xattr, sizeof(*xattr));
+
+	if (error == 0 ) {
+		entry->cp_flags &= ~CP_NO_XATTR;
+	}
+
+	return error;
+}
+
+/*
+ * Used by an fcntl to query the underlying FS for its content protection version #
+ */
+
+int
+cp_get_root_major_vers(vnode_t vp, uint32_t *level) 
+{
+	int err = 0;
+	struct hfsmount *hfsmp = NULL;
+	struct mount *mp = NULL;
+
+	mp = VTOVFS(vp);
+
+	/* check if it supports content protection */
+	if (cp_fs_protected(mp) == 0) {
+		return ENOTSUP;
+	}
+
+	hfsmp = VFSTOHFS(mp);
+	/* figure out the level */
+
+	err = cp_root_major_vers(mp);
+
+	if (err == 0) {
+		*level = hfsmp->hfs_running_cp_major_vers;
+	}
+	/* in error case, cp_root_major_vers will just return EINVAL. Use that */
+
+	return err;
+}
+
+/* Used by fcntl to query default protection level of FS */
+int cp_get_default_level (struct vnode *vp, uint32_t *level) {
+	int err = 0;
+	struct hfsmount *hfsmp = NULL;
+	struct mount *mp = NULL;
+
+	mp = VTOVFS(vp);
+
+	/* check if it supports content protection */
+	if (cp_fs_protected(mp) == 0) {
+		return ENOTSUP;
+	}
+
+	hfsmp = VFSTOHFS(mp);
+	/* figure out the default */
+
+	*level = hfsmp->default_cp_class;
+	return err;
+}
+
+/********************
+ * Private Functions
+ *******************/
+
+static int
+cp_root_major_vers(mount_t mp)
+{
+	int err = 0;
+	struct cp_root_xattr xattr;
+	struct hfsmount *hfsmp = NULL;
+
+	hfsmp = vfs_fsprivate(mp);
+	err = cp_getrootxattr (hfsmp, &xattr);
+
+	if (err == 0) {
+		hfsmp->hfs_running_cp_major_vers = xattr.major_version;
+	}
+	else {
+		return EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+cp_vnode_is_eligible(struct vnode *vp)
+{
+	return !vnode_issystem(vp) && (vnode_isreg(vp) || vnode_isdir(vp));
+}
+
+#if DEBUG
+static const uint32_t cp_magic1 = 0x7b727063;	// cpr{
+static const uint32_t cp_magic2 = 0x7270637d;	// }cpr
+#endif
+
+struct cprotect *
+cp_entry_alloc(cprotect_t old, uint16_t pers_key_len,
+			   uint16_t cached_key_len, cp_key_pair_t **pcpkp)
+{
+	struct cprotect *cp_entry;
+
+	if (pers_key_len > CP_MAX_WRAPPEDKEYSIZE)
+		return (NULL);
+
+	size_t size = (sizeof(struct cprotect) - sizeof(cp_key_pair_t)
+				   + cpkp_size(pers_key_len, cached_key_len));
+
+#if DEBUG
+	size += 4;	// Extra for magic2
+#endif
+
+	cp_entry = hfs_malloc(size);
+
+	if (old) {
+		memcpy(cp_entry, old, offsetof(struct cprotect, cp_keys));
+
+#if HFS_CONFIG_KEY_ROLL
+		// We don't copy the key roll context
+		cp_entry->cp_key_roll_ctx = NULL;
+#endif
+	} else {
+		bzero(cp_entry, offsetof(struct cprotect, cp_keys));
+	}
+
+#if DEBUG
+	cp_entry->cp_magic1 = cp_magic1;
+	*PTR_ADD(uint32_t *, cp_entry, size - 4) = cp_magic2;
+#endif
+
+	cpkp_init(&cp_entry->cp_keys, pers_key_len, cached_key_len);
+
+	/*
+	 * If we've been passed the old entry, then we are in the process of
+	 * rewrapping in which case we need to copy the cached key.  This is
+	 * important for class B files when the device is locked because we
+	 * won't be able to unwrap whilst in this state, yet we still need the
+	 * unwrapped key.
+	 */
+	if (old)
+		cpx_copy(cpkp_cpx(&old->cp_keys), cpkp_cpx(&cp_entry->cp_keys));
+
+	if (pcpkp)
+		*pcpkp = &cp_entry->cp_keys;
+
+	return cp_entry;
+}
+
+static void
+cp_entry_dealloc(__unused hfsmount_t *hfsmp, struct cprotect *entry)
+{
+#if HFS_CONFIG_KEY_ROLL
+	hfs_release_key_roll_ctx(hfsmp, entry);
+#endif
+
+	cpkp_flush(&entry->cp_keys);
+
+	size_t entry_size = (sizeof(struct cprotect) - sizeof(cp_key_pair_t)
+						 + cpkp_sizex(&entry->cp_keys));
+
+#if DEBUG
+	hfs_assert(entry->cp_magic1 == cp_magic1);
+	hfs_assert(*PTR_ADD(uint32_t *, entry, (sizeof(struct cprotect) - sizeof(cp_key_pair_t)
+										+ cpkp_sizex(&entry->cp_keys) == cp_magic2)));
+
+	entry_size += 4;	// Extra for magic2
+#endif
+
+	hfs_free(entry, entry_size);
+}
+
+static int cp_read_xattr_v4(__unused hfsmount_t *hfsmp, struct cp_xattr_v4 *xattr,
+							size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options)
+{
+	/* Endian swap the multi-byte fields into host endianness from L.E. */
+	xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version);
+	xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version);
+	xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size);
+	xattr->flags = OSSwapLittleToHostInt32(xattr->flags);
+	xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class);
+	xattr->key_os_version = OSSwapLittleToHostInt32(xattr->key_os_version);
+
+	/*
+	 * Prevent a buffer overflow, and validate the key length obtained from the
+	 * EA. If it's too big, then bail out, because the EA can't be trusted at this
+	 * point.
+	 */
+	if (xattr->key_size > CP_MAX_WRAPPEDKEYSIZE)
+		return HFS_EINCONSISTENT;
+
+	size_t min_len = offsetof(struct cp_xattr_v4, persistent_key) + xattr->key_size;
+	if (xattr_len < min_len)
+		return HFS_EINCONSISTENT;
+
+	/*
+	 * Class F files have no backing key; their keylength should be 0,
+	 * though they should have the proper flags set.
+	 *
+	 * A request to instantiate a CP for a class F file should result
+	 * in a bzero'd cp that just says class F, with key_flushed set.
+	 */
+	if (CP_CLASS(xattr->persistent_class) == PROTECTION_CLASS_F
+		|| ISSET(xattr->flags, CP_XAF_NEEDS_KEYS)) {
+		xattr->key_size = 0;
+	}
+
+	/* set up entry with information from xattr */
+	cp_key_pair_t *cpkp;
+	cprotect_t entry;
+	
+	if (ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+		/* caller passed in a pre-allocated structure to get the basic info */
+		entry = *pcpr;
+		bzero(entry, offsetof(struct cprotect, cp_keys));
+	}
+	else {
+		entry = cp_entry_alloc(NULL, xattr->key_size, CP_MAX_CACHEBUFLEN, &cpkp);
+	}
+
+	entry->cp_pclass = xattr->persistent_class;
+	entry->cp_key_os_version = xattr->key_os_version;
+
+
+	if (!ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+		if (xattr->key_size) {
+			cpkp_set_pers_key_len(cpkp, xattr->key_size);
+			memcpy(cpkp_pers_key(cpkp), xattr->persistent_key, xattr->key_size);
+		}
+
+		*pcpr = entry;
+	}
+	else if (xattr->key_size) {
+		SET(entry->cp_flags, CP_HAS_A_KEY);
+	}
+
+	return 0;
+}
+
+int cp_read_xattr_v5(hfsmount_t *hfsmp, struct cp_xattr_v5 *xattr,
+					 size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options)
+{
+	if (xattr->xattr_major_version == OSSwapHostToLittleConstInt16(CP_VERS_4)) {
+		return cp_read_xattr_v4(hfsmp, (struct cp_xattr_v4 *)xattr, xattr_len, pcpr, options);
+	}
+
+	xattr->xattr_major_version	= OSSwapLittleToHostInt16(xattr->xattr_major_version);
+
+	if (xattr->xattr_major_version != CP_VERS_5) {
+		printf("hfs: cp_getxattr: unsupported xattr version %d\n",
+			   xattr->xattr_major_version);
+		return ENOTSUP;
+	}
+
+	size_t min_len = offsetof(struct cp_xattr_v5, persistent_key);
+
+	if (xattr_len < min_len)
+		return HFS_EINCONSISTENT;
+
+	xattr->xattr_minor_version	= OSSwapLittleToHostInt16(xattr->xattr_minor_version);
+	xattr->flags				= OSSwapLittleToHostInt32(xattr->flags);
+	xattr->persistent_class		= OSSwapLittleToHostInt32(xattr->persistent_class);
+	xattr->key_os_version		= OSSwapLittleToHostInt32(xattr->key_os_version);
+	xattr->key_revision			= OSSwapLittleToHostInt16(xattr->key_revision);
+	xattr->key_len				= OSSwapLittleToHostInt16(xattr->key_len);
+
+	uint16_t pers_key_len = xattr->key_len;
+
+	min_len += pers_key_len;
+	if (xattr_len < min_len)
+		return HFS_EINCONSISTENT;
+
+#if HFS_CONFIG_KEY_ROLL
+	struct cp_roll_info *roll_info = NULL;
+
+	if (ISSET(xattr->flags, CP_XAF_KEY_ROLLING)) {
+		roll_info = PTR_ADD(struct cp_roll_info *, xattr, min_len);
+
+		min_len += offsetof(struct cp_roll_info, key);
+
+		if (xattr_len < min_len)
+			return HFS_EINCONSISTENT;
+
+		roll_info->off_rsrc = OSSwapLittleToHostInt64(roll_info->off_rsrc);
+
+        if (roll_info->off_rsrc % hfsmp->blockSize)
+            return HFS_EINCONSISTENT;
+
+		roll_info->key_len = OSSwapLittleToHostInt16(roll_info->key_len);
+
+		min_len += roll_info->key_len;
+		if (xattr_len < min_len)
+			return HFS_EINCONSISTENT;
+	}
+#endif
+
+	cp_key_pair_t *cpkp;
+	cprotect_t entry;
+	
+	/* 
+	 * If option CP_GET_XATTR_BASIC_INFO is set, we only return basic
+	 * information about the file's protection (and not the key) and
+	 * we store the result in the structure the caller passed to us.
+	 */
+	if (ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+		entry = *pcpr;
+		bzero(entry, offsetof(struct cprotect, cp_keys));
+#if HFS_CONFIG_KEY_ROLL
+		if (ISSET(xattr->flags, CP_XAF_KEY_ROLLING)) {
+			SET(entry->cp_flags, CP_KEY_IS_ROLLING);
+		}
+#endif
+	} else {
+		entry = cp_entry_alloc(NULL, xattr->key_len, CP_MAX_CACHEBUFLEN, &cpkp);
+	}
+
+	entry->cp_pclass			= xattr->persistent_class;
+	entry->cp_key_os_version	= xattr->key_os_version;
+	entry->cp_key_revision		= xattr->key_revision;
+
+	if (!ISSET(options, CP_GET_XATTR_BASIC_INFO)) {
+		if (xattr->key_len) {
+			cpkp_set_pers_key_len(cpkp, xattr->key_len);
+			memcpy(cpkp_pers_key(cpkp), xattr->persistent_key, xattr->key_len);
+		}
+
+#if HFS_CONFIG_KEY_ROLL
+		if (roll_info) {
+			entry->cp_key_roll_ctx = hfs_key_roll_ctx_alloc(NULL, roll_info->key_len,
+															CP_MAX_CACHEBUFLEN, &cpkp);
+
+			entry->cp_key_roll_ctx->ckr_off_rsrc = roll_info->off_rsrc;
+
+			if (roll_info->key_len) {
+				cpkp_set_pers_key_len(cpkp, roll_info->key_len);
+				memcpy(cpkp_pers_key(cpkp), roll_info->key, roll_info->key_len);
+			}
+		}
+#endif
+
+		*pcpr = entry;
+	}
+	else if (xattr->key_len) {
+		SET(entry->cp_flags, CP_HAS_A_KEY);
+	}
+
+	return 0;
+}
+
+/*
+ * Initializes a new cprotect entry with xattr data from the cnode.
+ * cnode lock held shared
+ */
+static int
+cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, cprotect_t *outentry)
+{
+	size_t xattr_len;
+	struct cp_xattr_v5 *xattr;
+
+	xattr = hfs_malloc(xattr_len = sizeof(*xattr));
+
+	int error = hfs_xattr_read(cp->c_vp, CONTENT_PROTECTION_XATTR_NAME,
+							   xattr, &xattr_len);
+
+	if (!error) {
+		if (xattr_len < CP_XATTR_MIN_LEN)
+			error = HFS_EINCONSISTENT;
+		else
+			error = cp_read_xattr_v5(hfsmp, xattr, xattr_len, outentry, 0);
+	}
+
+#if DEBUG
+	if (error && error != ENOATTR) {
+		printf("cp_getxattr: bad cp xattr (%d):\n", error);
+		for (size_t i = 0; i < xattr_len; ++i)
+			printf("%02x ", ((uint8_t *)xattr)[i]);
+		printf("\n");
+	}
+#endif
+
+	hfs_free(xattr, sizeof(*xattr));
+
+	return error;
+}
+
+/*
+ * If permitted, restore entry's unwrapped key from the persistent key.
+ * If not, clear key and set CP_KEY_FLUSHED.
+ * cnode lock held exclusive
+ */
+static int
+cp_restore_keys(struct cprotect *entry, struct hfsmount *hfsmp, struct cnode *cp)
+{
+	int error = 0;
+
+ 	error = cp_unwrap(hfsmp, entry, cp);
+	if (error) {
+		cp_flush_cached_keys(entry);
+		error = EPERM;
+	}
+	return error;
+}
+
+void cp_device_locked_callback(mount_t mp, cp_lock_state_t state)
+{
+	struct hfsmount *hfsmp;
+
+	/*
+	 * When iterating the various mount points that may
+	 * be present on a content-protected device, we need to skip
+	 * those that do not have it enabled.
+	 */
+	if (!cp_fs_protected(mp)) {
+		return;
+	}
+
+	hfsmp = VFSTOHFS(mp);
+
+	hfsmp->hfs_cp_lock_state = state;
+
+	if (state == CP_LOCKED_STATE) {
+		/* 
+		 * We respond only to lock events.  Since cprotect structs
+		 * decrypt/restore keys lazily, the unlock events don't
+		 * actually cause anything to happen.
+		 */
+		vnode_iterate(mp, 0, cp_lock_vnode_callback, (void *)(uintptr_t)state);
+	}
+}
+
+/*
+ * Deny access to protected files if keys have been locked.
+ */
+static int
+cp_check_access(struct cnode *cp, struct hfsmount *hfsmp, int vnop __unused)
+{
+	int error = 0;
+
+	/* 
+	 * For now it's OK to examine the state variable here without
+	 * holding the HFS lock.  This is only a short-circuit; if the state
+	 * transitions (or is in transition) after we examine this field, we'd
+	 * have to handle that anyway. 
+	 */
+	if (hfsmp->hfs_cp_lock_state == CP_UNLOCKED_STATE) {
+		return 0;
+	}
+
+	if (!cp->c_cpentry) {
+		/* unprotected node */
+		return 0;
+	}
+
+	if (!S_ISREG(cp->c_mode)) {
+		return 0;
+	}
+
+	/* Deny all access for class A files */
+	switch (CP_CLASS(cp->c_cpentry->cp_pclass)) {
+		case PROTECTION_CLASS_A: {
+			error = EPERM;
+			break;
+		}
+		default:
+			error = 0;
+			break;
+	}
+
+	return error;
+}
+
+/*
+ * Respond to a lock or unlock event.
+ * On lock: clear out keys from memory, then flush file contents.
+ * On unlock: nothing (function not called).
+ */
+static int
+cp_lock_vnode_callback(struct vnode *vp, void *arg)
+{
+	cnode_t *cp = NULL;
+	struct cprotect *entry = NULL;
+	int error = 0;
+	int locked = 1;
+	unsigned long action = 0;
+	int took_truncate_lock = 0;
+
+	error = vnode_getwithref (vp);
+	if (error) {
+		return error;
+	}
+
+	cp = VTOC(vp);
+
+	/*
+	 * When cleaning cnodes due to a lock event, we must
+	 * take the truncate lock AND the cnode lock.  By taking
+	 * the truncate lock here, we force (nearly) all pending IOs
+	 * to drain before we can acquire the truncate lock.  All HFS cluster
+	 * io calls except for swapfile IO need to acquire the truncate lock
+	 * prior to calling into the cluster layer.
+	 */
+	hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	took_truncate_lock = 1;
+
+	hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+	entry = cp->c_cpentry;
+	if (!entry) {
+		/* unprotected vnode: not a regular file */
+		goto out;
+	}
+
+	action = (unsigned long) arg;
+	switch (action) {
+		case CP_LOCKED_STATE: {
+			vfs_context_t ctx;
+			if (CP_CLASS(entry->cp_pclass) != PROTECTION_CLASS_A ||
+				vnode_isdir(vp)) {
+				/*
+				 * There is no change at lock for other classes than A.
+				 * B is kept in memory for writing, and class F (for VM) does
+				 * not have a wrapped key, so there is no work needed for
+				 * wrapping/unwrapping.
+				 *
+				 * Note that 'class F' is relevant here because if
+				 * hfs_vnop_strategy does not take the cnode lock
+				 * to protect the cp blob across IO operations, we rely
+				 * implicitly on the truncate lock to be held when doing IO.
+				 * The only case where the truncate lock is not held is during
+				 * swapfile IO because HFS just funnels the VNOP_PAGEOUT
+				 * directly to cluster_pageout.
+				 */
+				goto out;
+			}
+
+			/* Before doing anything else, zero-fill sparse ranges as needed */
+			ctx = vfs_context_current();
+			(void) hfs_filedone (vp, ctx, 0);
+
+			/* first, sync back dirty pages */
+			hfs_unlock (cp);
+			ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_PUSHALL | UBC_INVALIDATE | UBC_SYNC);
+			hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+			/* flush keys:
+			 * There was a concern here(9206856) about flushing keys before nand layer is done using them.
+			 * But since we are using ubc_msync with UBC_SYNC, it blocks until all IO is completed.
+			 * Once IOFS caches or is done with these keys, it calls the completion routine in IOSF.
+			 * Which in turn calls buf_biodone() and eventually unblocks ubc_msync()
+			 * Also verified that the cached data in IOFS is overwritten by other data, and there
+			 * is no key leakage in that layer.
+			 */
+
+			cp_flush_cached_keys(entry);
+
+			/* some write may have arrived in the mean time. dump those pages */
+			hfs_unlock(cp);
+			locked = 0;
+
+			ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_INVALIDATE | UBC_SYNC);
+			break;
+		}
+		case CP_UNLOCKED_STATE: {
+			/* no-op */
+			break;
+		}
+		default:
+			panic("Content Protection: unknown lock action %lu\n", action);
+	}
+
+out:
+	if (locked) {
+		hfs_unlock(cp);
+	}
+
+	if (took_truncate_lock) {
+		hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+	}
+
+	vnode_put (vp);
+	return error;
+}
+
+
+/* 
+ * cp_rewrap:
+ *
+ * Generate a new wrapped key based on the existing cache key.
+ */
+
+int
+cp_rewrap(struct cnode *cp, __unused hfsmount_t *hfsmp,
+		  cp_key_class_t *newclass, cp_key_pair_t *cpkp, const void *old_holder,
+		  cp_new_alloc_fn alloc_fn, void **pholder)
+{
+	struct cprotect *entry = cp->c_cpentry;
+
+	uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE];
+	unsigned keylen = CP_MAX_WRAPPEDKEYSIZE;
+	int error = 0;
+	const cp_key_class_t key_class = CP_CLASS(*newclass);
+
+	/* Structures passed between HFS and AKS */
+	struct aks_cred_s access_in;
+	struct aks_wrapped_key_s wrapped_key_in;
+	struct aks_wrapped_key_s wrapped_key_out;
+
+	/*
+	 * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient
+	 * key that is only good as long as the file is open.  There is no
+	 * wrapped key, so there isn't anything to wrap.
+	 */
+	if (key_class == PROTECTION_CLASS_F) {
+		return EINVAL;
+	}
+
+	cp_init_access(&access_in, cp);
+
+	bzero(&wrapped_key_in, sizeof(wrapped_key_in));
+	wrapped_key_in.key = cpkp_pers_key(cpkp);
+	wrapped_key_in.key_len = cpkp_pers_key_len(cpkp);
+	/* Use the persistent class when talking to AKS */
+	wrapped_key_in.dp_class = entry->cp_pclass;
+
+	bzero(&wrapped_key_out, sizeof(wrapped_key_out));
+	wrapped_key_out.key = new_persistent_key;
+	wrapped_key_out.key_len = keylen;
+
+	/*
+	 * inode is passed here to find the backup bag wrapped blob
+	 * from userspace.  This lookup will occur shortly after creation
+	 * and only if the file still exists.  Beyond this lookup the
+	 * inode is not used.  Technically there is a race, we practically
+	 * don't lose.
+	 */
+	error = hfs_rewrap_key(&access_in,
+						   key_class, /* new class */
+						   &wrapped_key_in,
+						   &wrapped_key_out);
+
+	keylen = wrapped_key_out.key_len;
+
+	if (error == 0) {
+		/*
+		 * Verify that AKS returned to us a wrapped key of the 
+		 * target class requested.   
+		 */
+		/* Get the effective class here */
+		cp_key_class_t effective = CP_CLASS(wrapped_key_out.dp_class);
+		if (effective != key_class) {
+			/* 
+			 * Fail the operation if defaults or some other enforcement
+			 * dictated that the class be wrapped differently. 
+			 */
+
+			/* TODO: Invalidate the key when 12170074 unblocked */
+			return EPERM;
+		}
+
+		/* Allocate a new cpentry */
+		cp_key_pair_t *new_cpkp;
+		*pholder = alloc_fn(old_holder, keylen, CP_MAX_CACHEBUFLEN, &new_cpkp);
+
+		/* copy the new key into the entry */
+		cpkp_set_pers_key_len(new_cpkp, keylen);
+		memcpy(cpkp_pers_key(new_cpkp), new_persistent_key, keylen);
+
+		/* Actually record/store what AKS reported back, not the effective class stored in newclass */
+		*newclass = wrapped_key_out.dp_class;
+	}
+	else {
+		error = EPERM;
+	}
+
+	return error;
+}
+
+static int cpkp_unwrap(cnode_t *cp, cp_key_class_t key_class, cp_key_pair_t *cpkp)
+{
+	int error = 0;
+	uint8_t iv_key[CP_IV_KEYSIZE];
+	cpx_t cpx = cpkp_cpx(cpkp);
+
+	/* Structures passed between HFS and AKS */
+	struct aks_cred_s access_in;
+	struct aks_wrapped_key_s wrapped_key_in;
+	struct aks_raw_key_s key_out;
+
+	cp_init_access(&access_in, cp);
+
+	bzero(&wrapped_key_in, sizeof(wrapped_key_in));
+	wrapped_key_in.key = cpkp_pers_key(cpkp);
+	wrapped_key_in.key_len = cpkp_max_pers_key_len(cpkp);
+	/* Use the persistent class when talking to AKS */
+	wrapped_key_in.dp_class = key_class;
+
+	bzero(&key_out, sizeof(key_out));
+	key_out.iv_key = iv_key;
+	key_out.key = cpx_key(cpx);
+	/*
+	 * The unwrapper should validate/set the key length for
+	 * the IV key length and the cache key length, however we need
+	 * to supply the correct buffer length so that AKS knows how
+	 * many bytes it has to work with.
+	 */
+	key_out.iv_key_len = CP_IV_KEYSIZE;
+	key_out.key_len = cpx_max_key_len(cpx);
+
+	error = hfs_unwrap_key(&access_in, &wrapped_key_in, &key_out);
+	if (!error) {
+		if (key_out.key_len == 0 || key_out.key_len > CP_MAX_CACHEBUFLEN) {
+			panic ("cp_unwrap: invalid key length! (%ul)\n", key_out.key_len);
+		}
+
+		if (key_out.iv_key_len != CP_IV_KEYSIZE)
+			panic ("cp_unwrap: invalid iv key length! (%ul)\n", key_out.iv_key_len);
+
+		cpx_set_key_len(cpx, key_out.key_len);
+
+		cpx_set_aes_iv_key(cpx, iv_key);
+		cpx_set_is_sep_wrapped_key(cpx, ISSET(key_out.flags, AKS_RAW_KEY_WRAPPEDKEY));
+	} else {
+		error = EPERM;
+	}
+
+	return error;
+}
+
+static int
+cp_unwrap(__unused struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp)
+{
+	/*
+	 * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient
+	 * key that is only good as long as the file is open.  There is no
+	 * wrapped key, so there isn't anything to unwrap.
+	 */
+	if (CP_CLASS(entry->cp_pclass) == PROTECTION_CLASS_F) {
+		return EPERM;
+	}
+
+	int error = cpkp_unwrap(cp, entry->cp_pclass, &entry->cp_keys);
+
+#if HFS_CONFIG_KEY_ROLL
+	if (!error && entry->cp_key_roll_ctx) {
+		error = cpkp_unwrap(cp, entry->cp_pclass, &entry->cp_key_roll_ctx->ckr_keys);
+		if (error)
+			cpx_flush(cpkp_cpx(&entry->cp_keys));
+	}
+#endif
+
+	return error;
+}
+
+/*
+ * cp_generate_keys
+ *
+ * Take a cnode that has already been initialized and establish persistent and
+ * cache keys for it at this time. Note that at the time this is called, the
+ * directory entry has already been created and we are holding the cnode lock
+ * on 'cp'.
+ * 
+ */
+int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, cp_key_class_t targetclass,
+		uint32_t keyflags, struct cprotect **newentry) 
+{
+
+	int error = 0;
+	struct cprotect *newcp = NULL;
+	*newentry = NULL;
+
+	/* Target class must be an effective class only */
+	targetclass = CP_CLASS(targetclass);
+
+	/* Validate that it has a cprotect already */
+	if (cp->c_cpentry == NULL) {
+		/* We can't do anything if it shouldn't be protected. */
+		return 0;
+	}	
+
+	/* Asserts for the underlying cprotect */
+	if (cp->c_cpentry->cp_flags & CP_NO_XATTR) {
+		/* should already have an xattr by this point. */
+		error = EINVAL;
+		goto out;
+	}
+
+	if (S_ISREG(cp->c_mode)) {
+		if (!cp_needs_pers_key(cp->c_cpentry)) {
+			error = EINVAL;
+			goto out;
+		}
+	}
+
+	cp_key_revision_t key_revision = cp_initial_key_revision(hfsmp);
+
+	error = cp_new (&targetclass, hfsmp, cp, cp->c_mode, keyflags, key_revision,
+					(cp_new_alloc_fn)cp_entry_alloc, (void **)&newcp);
+	if (error) {
+		/* 
+		 * Key generation failed. This is not necessarily fatal
+		 * since the device could have transitioned into the lock 
+		 * state before we called this.  
+		 */	
+		error = EPERM;
+		goto out;
+	}
+
+	newcp->cp_pclass			= targetclass;
+	newcp->cp_key_os_version	= cp_os_version();
+	newcp->cp_key_revision		= key_revision;
+
+	/*
+	 * If we got here, then we have a new cprotect.
+	 * Attempt to write the new one out.
+	 */
+	error = cp_setxattr (cp, newcp, hfsmp, cp->c_fileid, XATTR_REPLACE);
+
+	if (error) {
+		/* Tear down the new cprotect; Tell MKB that it's invalid. Bail out */
+		/* TODO: rdar://12170074 needs to be fixed before we can tell MKB */
+		if (newcp) {
+			cp_entry_destroy(hfsmp, newcp);
+		}	
+		goto out;
+	}
+
+	/* 
+	 * If we get here then we can assert that:
+	 * 1) generated wrapped/unwrapped keys.
+	 * 2) wrote the new keys to disk.
+	 * 3) cprotect is ready to go.
+	 */
+
+	*newentry = newcp;
+
+out:
+	return error;
+
+}
+
+void cp_replace_entry (hfsmount_t *hfsmp, struct cnode *cp, struct cprotect *newentry)
+{
+	if (cp->c_cpentry) {
+#if HFS_CONFIG_KEY_ROLL
+		// Transfer the tentative reservation
+		if (cp->c_cpentry->cp_key_roll_ctx && newentry->cp_key_roll_ctx) {
+			newentry->cp_key_roll_ctx->ckr_tentative_reservation
+				= cp->c_cpentry->cp_key_roll_ctx->ckr_tentative_reservation;
+
+			cp->c_cpentry->cp_key_roll_ctx->ckr_tentative_reservation = NULL;
+		}
+#endif
+
+		cp_entry_destroy (hfsmp, cp->c_cpentry);
+	}
+	cp->c_cpentry = newentry;
+	newentry->cp_backing_cnode = cp;
+
+	return;
+}
+
+
+/*
+ * cp_new
+ *
+ * Given a double-pointer to a cprotect, generate keys (either in-kernel or from keystore),
+ * allocate a cprotect, and vend it back to the caller.
+ * 
+ * Additionally, decide if keys are even needed -- directories get cprotect data structures
+ * but they do not have keys.
+ *
+ */
+
+int
+cp_new(cp_key_class_t *newclass_eff, __unused struct hfsmount *hfsmp, struct cnode *cp,
+	   mode_t cmode, int32_t keyflags, cp_key_revision_t key_revision,
+	   cp_new_alloc_fn alloc_fn, void **pholder)
+{
+	int error = 0;
+	uint8_t new_key[CP_MAX_CACHEBUFLEN];
+	unsigned new_key_len = CP_MAX_CACHEBUFLEN;  /* AKS tell us the proper key length, how much of this is used */
+	uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE];
+	unsigned new_persistent_len = CP_MAX_WRAPPEDKEYSIZE;
+	uint8_t iv_key[CP_IV_KEYSIZE];
+	unsigned iv_key_len = CP_IV_KEYSIZE;
+	int iswrapped = 0;
+	cp_key_class_t key_class = CP_CLASS(*newclass_eff);
+
+	/* Structures passed between HFS and AKS */
+	struct aks_cred_s access_in;
+	struct aks_wrapped_key_s wrapped_key_out;
+	struct aks_raw_key_s key_out;
+
+	/* Sanity check that it's a file or directory here */
+	if (!(S_ISREG(cmode)) && !(S_ISDIR(cmode))) {
+		return EPERM;
+	}
+
+	/*
+	 * Step 1: Generate Keys if needed.
+	 * 
+	 * For class F files, the kernel provides the key.
+	 * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient
+	 * key that is only good as long as the file is open.  There is no
+	 * wrapped key, so there isn't anything to wrap.
+	 *
+	 * For class A->D files, the key store provides the key 
+	 * 
+	 * For Directories, we only give them a class ; no keys.
+	 */
+	if (S_ISDIR (cmode)) {
+		/* Directories */
+		new_persistent_len = 0;
+		new_key_len = 0;
+
+		error = 0;
+	}
+	else {
+		/* Must be a file */         
+		if (key_class == PROTECTION_CLASS_F) {
+			/* class F files are not wrapped; they can still use the max key size */
+			new_key_len = CP_MAX_KEYSIZE;
+			read_random (&new_key[0], new_key_len);
+			new_persistent_len = 0;
+
+			error = 0;
+		}
+		else {
+			/* 
+			 * The keystore is provided the file ID so that it can associate
+			 * the wrapped backup blob with this key from userspace. This 
+			 * lookup occurs after successful file creation.  Beyond this, the
+			 * file ID is not used.  Note that there is a potential race here if
+			 * the file ID is re-used.  
+			 */
+			cp_init_access(&access_in, cp);
+		
+			bzero(&key_out, sizeof(key_out));
+			key_out.key = new_key;
+			key_out.iv_key = iv_key;
+			/* 
+			 * AKS will override our key length fields, but we need to supply
+			 * the length of the buffer in those length fields so that 
+			 * AKS knows hoa many bytes it has to work with.
+			 */
+			key_out.key_len = new_key_len;
+			key_out.iv_key_len = iv_key_len;
+
+			bzero(&wrapped_key_out, sizeof(wrapped_key_out));
+			wrapped_key_out.key = new_persistent_key;
+			wrapped_key_out.key_len = new_persistent_len;
+
+			access_in.key_revision = key_revision;
+
+			error = hfs_new_key(&access_in,
+								key_class,
+								&key_out,
+								&wrapped_key_out);
+
+			if (error) {
+				/* keybag returned failure */
+				error = EPERM;
+				goto cpnew_fail;
+			}
+
+			/* Now sanity-check the output from new_key */
+			if (key_out.key_len == 0 || key_out.key_len > CP_MAX_CACHEBUFLEN) {
+				panic ("cp_new: invalid key length! (%ul) \n", key_out.key_len);
+			}
+
+			if (key_out.iv_key_len != CP_IV_KEYSIZE) {
+				panic ("cp_new: invalid iv key length! (%ul) \n", key_out.iv_key_len);
+			}	
+		
+			/* 
+			 * AKS is allowed to override our preferences and wrap with a 
+			 * different class key for policy reasons. If we were told that 
+			 * any class other than the one specified is unacceptable then error out 
+			 * if that occurred.  Check that the effective class returned by 
+			 * AKS is the same as our effective new class 
+			 */
+			if (CP_CLASS(wrapped_key_out.dp_class) != key_class) {
+				if (!ISSET(keyflags, CP_KEYWRAP_DIFFCLASS)) {
+					error = EPERM;
+					/* TODO: When 12170074 fixed, release/invalidate the key! */
+					goto cpnew_fail;
+				}
+			}
+
+			*newclass_eff = wrapped_key_out.dp_class;
+			new_key_len = key_out.key_len;
+			iv_key_len = key_out.iv_key_len;
+			new_persistent_len = wrapped_key_out.key_len;
+
+			/* Is the key a SEP wrapped key? */
+			if (key_out.flags & AKS_RAW_KEY_WRAPPEDKEY) {
+				iswrapped = 1;
+			}
+		}
+	}
+
+	/*
+	 * Step 2: allocate cprotect and initialize it.
+	 */
+
+	cp_key_pair_t *cpkp;
+	*pholder = alloc_fn(NULL, new_persistent_len, new_key_len, &cpkp);
+	if (*pholder == NULL) {
+		return ENOMEM;
+	}
+
+	/* Copy the cache key & IV keys into place if needed. */
+	if (new_key_len > 0) {
+		cpx_t cpx = cpkp_cpx(cpkp);
+
+		cpx_set_key_len(cpx, new_key_len);
+		memcpy(cpx_key(cpx), new_key, new_key_len);
+
+		/* Initialize the IV key */
+		if (key_class != PROTECTION_CLASS_F)
+			cpx_set_aes_iv_key(cpx, iv_key);
+
+		cpx_set_is_sep_wrapped_key(cpx, iswrapped);
+	}
+	if (new_persistent_len > 0) {
+		cpkp_set_pers_key_len(cpkp, new_persistent_len);
+		memcpy(cpkp_pers_key(cpkp), new_persistent_key, new_persistent_len);
+	}
+
+cpnew_fail:
+
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+	if ((hfsmp->hfs_cp_verbose) && (error == EPERM)) {
+		/* Only introspect the data fork */
+		cp_log_eperm (cp->c_vp, *newclass_eff, true);
+	}
+#endif
+#endif
+
+	return error;
+}
+
+
+/* Initialize the aks_cred_t structure passed to AKS */
+static void cp_init_access(aks_cred_t access, struct cnode *cp)
+{
+	vfs_context_t context = vfs_context_current();
+	kauth_cred_t cred = vfs_context_ucred(context);
+	proc_t proc = vfs_context_proc(context);
+	struct hfsmount *hfsmp;
+	struct vnode *vp;
+	uuid_t hfs_uuid;
+
+	bzero(access, sizeof(*access));
+
+	vp = CTOV(cp, 0);
+	if (vp == NULL) {
+		/* is it a rsrc */
+		vp = CTOV(cp,1);
+		if (vp == NULL) {
+			//leave the struct bzeroed. 
+			return;
+		}
+	}
+
+	hfsmp = VTOHFS(vp);
+	hfs_getvoluuid(hfsmp, hfs_uuid);
+
+	/* Note: HFS uses 32-bit fileID, even though inode is a 64-bit value */
+	access->inode = cp->c_fileid;
+	access->pid = proc_pid(proc);
+	access->uid = kauth_cred_getuid(cred);
+	uuid_copy (access->volume_uuid, hfs_uuid);	
+
+	if (cp->c_cpentry)
+		access->key_revision = cp->c_cpentry->cp_key_revision;
+
+	return;
+}
+
+#if HFS_CONFIG_KEY_ROLL
+
+errno_t cp_set_auto_roll(hfsmount_t *hfsmp,
+						 const hfs_key_auto_roll_args_t *args)
+{
+	// 64 bytes should be OK on the stack
+	_Static_assert(sizeof(struct cp_root_xattr) < 64, "cp_root_xattr too big!");
+
+	struct cp_root_xattr xattr;
+	errno_t ret;
+
+	ret = cp_getrootxattr(hfsmp, &xattr);
+	if (ret)
+		return ret;
+
+	ret = hfs_start_transaction(hfsmp);
+	if (ret)
+		return ret;
+
+	xattr.auto_roll_min_version = args->min_key_os_version;
+	xattr.auto_roll_max_version = args->max_key_os_version;
+
+	bool roll_old_class_gen = ISSET(args->flags, HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION);
+
+	if (roll_old_class_gen)
+		SET(xattr.flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION);
+	else
+		CLR(xattr.flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION);
+
+	ret = cp_setrootxattr(hfsmp, &xattr);
+
+	errno_t ret2 = hfs_end_transaction(hfsmp);
+
+	if (!ret)
+		ret = ret2;
+
+	if (ret)
+		return ret;
+
+	hfs_lock_mount(hfsmp);
+	hfsmp->hfs_auto_roll_min_key_os_version = args->min_key_os_version;
+	hfsmp->hfs_auto_roll_max_key_os_version = args->max_key_os_version;
+	hfs_unlock_mount(hfsmp);
+
+	return ret;
+}
+
+bool cp_should_auto_roll(hfsmount_t *hfsmp, cprotect_t cpr)
+{
+	if (cpr->cp_key_roll_ctx) {
+		// Already rolling
+		return false;
+	}
+
+	// Only automatically roll class A, B & C
+	if (CP_CLASS(cpr->cp_pclass) < PROTECTION_CLASS_A
+		|| CP_CLASS(cpr->cp_pclass) > PROTECTION_CLASS_C) {
+		return false;
+	}
+
+	if (!cpkp_has_pers_key(&cpr->cp_keys))
+		return false;
+
+	/*
+	 * Remember, the class generation stored in HFS+ is updated at the *end*,
+	 * so it's old if it matches the generation we have stored.
+	 */
+	if (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
+		&& cp_get_crypto_generation(cpr->cp_pclass) == hfsmp->cp_crypto_generation) {
+		return true;
+	}
+
+	if (!hfsmp->hfs_auto_roll_min_key_os_version
+		&& !hfsmp->hfs_auto_roll_max_key_os_version) {
+		// No minimum or maximum set
+		return false;
+	}
+
+	if (hfsmp->hfs_auto_roll_min_key_os_version
+		&& cpr->cp_key_os_version < hfsmp->hfs_auto_roll_min_key_os_version) {
+		// Before minimum
+		return false;
+	}
+
+	if (hfsmp->hfs_auto_roll_max_key_os_version
+		&& cpr->cp_key_os_version >= hfsmp->hfs_auto_roll_max_key_os_version) {
+		// Greater than maximum
+		return false;
+	}
+
+	return true;
+}
+
+#endif // HFS_CONFIG_KEY_ROLL
+
+errno_t cp_handle_strategy(buf_t bp)
+{
+	vnode_t vp = buf_vnode(bp);
+	cnode_t *cp = NULL;
+
+	if (bufattr_rawencrypted(buf_attr(bp))
+		|| !(cp = cp_get_protected_cnode(vp))
+		|| !cp->c_cpentry) {
+		// Nothing to do
+		return 0;
+	}
+
+	/*
+	 * For filesystem resize, we may not have access to the underlying
+	 * file's cache key for whatever reason (device may be locked).
+	 * However, we do not need it since we are going to use the
+	 * temporary HFS-wide resize key which is generated once we start
+	 * relocating file content.  If this file's I/O should be done
+	 * using the resize key, it will have been supplied already, so do
+	 * not attach the file's cp blob to the buffer.
+	 */
+	if (ISSET(cp->c_cpentry->cp_flags, CP_RELOCATION_INFLIGHT))
+		return 0;
+
+#if HFS_CONFIG_KEY_ROLL
+	/*
+	 * We don't require any locks here.  Pages will be locked so no
+	 * key rolling can take place until this I/O has completed.
+	 */
+	if (!cp->c_cpentry->cp_key_roll_ctx)
+#endif
+	{
+		// Fast path
+		cpx_t cpx = cpkp_cpx(&cp->c_cpentry->cp_keys);
+
+		if (cpx_has_key(cpx)) {
+			bufattr_setcpx(buf_attr(bp), cpx);
+			return 0;
+		}
+	}
+
+	/*
+	 * We rely mostly (see note below) upon the truncate lock to
+	 * protect the CP cache key from getting tossed prior to our IO
+	 * finishing here.  Nearly all cluster io calls to manipulate file
+	 * payload from HFS take the truncate lock before calling into the
+	 * cluster layer to ensure the file size does not change, or that
+	 * they have exclusive right to change the EOF of the file.  That
+	 * same guarantee protects us here since the code that deals with
+	 * CP lock events must now take the truncate lock before doing
+	 * anything.
+	 *
+	 * If you want to change content protection structures, then the
+	 * truncate lock is not sufficient; you must take the truncate
+	 * lock and then wait for outstanding writes to complete.  This is
+	 * necessary because asynchronous I/O only holds the truncate lock
+	 * whilst I/O is being queued.
+	 *
+	 * One exception should be the VM swapfile IO, because HFS will
+	 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call
+	 * for the swapfile code only without holding the truncate lock.
+	 * This is because individual swapfiles are maintained at
+	 * fixed-length sizes by the VM code.  In non-swapfile IO we use
+	 * PAGEOUT_V2 semantics which allow us to create our own UPL and
+	 * thus take the truncate lock before calling into the cluster
+	 * layer.  In that case, however, we are not concerned with the CP
+	 * blob being wiped out in the middle of the IO because there
+	 * isn't anything to toss; the VM swapfile key stays in-core as
+	 * long as the file is open.
+	 */
+
+	off_rsrc_t off_rsrc = off_rsrc_make(buf_lblkno(bp) * GetLogicalBlockSize(vp),
+										VNODE_IS_RSRC(vp));
+	cp_io_params_t io_params;
+
+
+	/*
+	 * We want to take the cnode lock here and because the vnode write
+	 * count is a pseudo-lock, we need to do something to preserve
+	 * lock ordering; the cnode lock comes before the write count.
+	 * Ideally, the write count would be incremented after the
+	 * strategy routine returns, but that becomes complicated if the
+	 * strategy routine where to call buf_iodone before returning.
+	 * For now, we drop the write count here and then pick it up again
+	 * later.
+	 */
+	if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW))
+		vnode_writedone(vp);
+
+	hfs_lock_always(cp, HFS_SHARED_LOCK);
+	cp_io_params(VTOHFS(vp), cp->c_cpentry, off_rsrc,
+				 ISSET(buf_flags(bp), B_READ) ? VNODE_READ : VNODE_WRITE,
+				 &io_params);
+	hfs_unlock(cp);
+
+	/*
+	 * Last chance: If this data protected I/O does not have unwrapped
+	 * keys present, then try to get them.  We already know that it
+	 * should, by this point.
+	 */
+	if (!cpx_has_key(io_params.cpx)) {
+		int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS);
+		errno_t error = cp_handle_vnop(vp, io_op, 0);
+		if (error) {
+			/*
+			 * We have to be careful here.  By this point in the I/O
+			 * path, VM or the cluster engine has prepared a buf_t
+			 * with the proper file offsets and all the rest, so
+			 * simply erroring out will result in us leaking this
+			 * particular buf_t.  We need to properly decorate the
+			 * buf_t just as buf_strategy would so as to make it
+			 * appear that the I/O errored out with the particular
+			 * error code.
+			 */
+			if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW))
+				vnode_startwrite(vp);
+			buf_seterror (bp, error);
+			buf_biodone(bp);
+			return error;
+		}
+
+		hfs_lock_always(cp, HFS_SHARED_LOCK);
+		cp_io_params(VTOHFS(vp), cp->c_cpentry, off_rsrc,
+					 ISSET(buf_flags(bp), B_READ) ? VNODE_READ : VNODE_WRITE,
+					 &io_params);
+		hfs_unlock(cp);
+	}
+
+	hfs_assert(buf_count(bp) <= io_params.max_len);
+	bufattr_setcpx(buf_attr(bp), io_params.cpx);
+
+	if (!ISSET(buf_flags(bp), B_READ) && !ISSET(buf_flags(bp), B_RAW))
+		vnode_startwrite(vp);
+
+	return 0;
+}
+
+#endif /* CONFIG_PROTECT */
diff --git a/core/hfs_cprotect.h b/core/hfs_cprotect.h
new file mode 100644
index 0000000..1b409e8
--- /dev/null
+++ b/core/hfs_cprotect.h
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2009-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef HFS_CPROTECT_H_
+#define	HFS_CPROTECT_H_
+
+#if CONFIG_PROTECT
+
+#include <sys/cprotect.h>
+
+#include <sys/cdefs.h>
+#include <sys/content_protection.h>
+#include <sys/kernel_types.h>
+#include <crypto/aes.h>
+#include <sys/kdebug.h>
+
+#include "hfs.h"
+#include "hfs_fsctl.h"
+
+__BEGIN_DECLS
+
+#define CP_IV_KEYSIZE             16	/* 16x8 = 128 */
+#define CP_MAX_KEYSIZE			  32	/* 8x4 = 32, 32x8 = 256 */
+#define CP_MAX_CACHEBUFLEN        64	/* Maximum size of cp cache buffer/array */
+
+#define CP_INITIAL_WRAPPEDKEYSIZE 40
+#define CP_V2_WRAPPEDKEYSIZE      40	/* Size of the wrapped key in a v2 EA */
+#define CP_V4_RESERVEDBYTES       16	/* Number of reserved bytes in EA still present */
+
+#define CP_LOCKED_KEYCHAIN        0
+#define CP_UNLOCKED_KEYCHAIN      1
+
+#define CONTENT_PROTECTION_XATTR_NAME	"com.apple.system.cprotect"
+#define CONTENT_PROTECTION_XATTR_NAME_CHARS				\
+	{ 'c', 'o', 'm', '.', 'a', 'p', 'p', 'l', 'e',		\
+	'.', 's', 'y', 's', 't', 'e', 'm',					\
+	'.', 'c', 'p', 'r', 'o', 't', 'e', 'c', 't' }
+#define CP_CURRENT_VERS			CP_VERS_5
+#define CP_VERS_5				5		// iOS 8.1
+#define CP_VERS_4				4		// iOS 5
+#define CP_VERS_2				2		// iOS 4
+#define CP_MINOR_VERS           0
+
+/* the class occupies the lowest 5 bits, so there are 32 values (0-31) */
+#define CP_EFFECTIVE_CLASSMASK 0x0000001f
+
+/* macros for quick access/typing to mask out the classmask */
+#define CP_CLASS(x) ((cp_key_class_t)(CP_EFFECTIVE_CLASSMASK & (x)))
+
+#define CP_CRYPTO_G1	0x00000020
+
+typedef struct cp_xattr *cp_xattr_t;
+typedef struct cnode * cnode_ptr_t;
+//forward declare the struct.
+struct hfsmount;
+
+/* 
+ * Flags for Key Generation Behavior 
+ *
+ * These are passed to cp_generate_keys() and cp_new() in the 
+ * flags arguments
+ */
+#define CP_KEYWRAP_DIFFCLASS    0x00000001 /* wrapping with a different class bag is OK */
+
+/*
+ * off_rsrc_t: this structure represents an offset and whether or not it's
+ * the resource fork.  It's done this way so that we can easily do comparisons
+ * i.e.
+ *
+ *   { 0, data-fork } < { 100, rsrc-fork }
+ */
+
+enum {
+	OFF_RSRC_BIT = 0x4000000000000000,
+};
+
+typedef int64_t off_rsrc_t;
+
+static inline bool off_rsrc_is_rsrc(off_rsrc_t off_rsrc)
+{
+	return off_rsrc & OFF_RSRC_BIT;
+}
+
+static inline off_t off_rsrc_get_off(off_rsrc_t off_rsrc)
+{
+	return off_rsrc & (OFF_RSRC_BIT - 1);
+}
+
+static inline off_rsrc_t off_rsrc_make(off_t offset, bool is_rsrc)
+{
+	return offset | (is_rsrc ? OFF_RSRC_BIT : 0);
+}
+
+// -- struct cp_key_pair --
+
+/*
+ * This structure maintains the pair of keys; the persistent, wrapped key that
+ * is written to disk, and the unwrapped key (cpx_t) that we pass to lower
+ * layers.
+ */
+
+typedef struct cp_key_pair {
+	uint16_t	cpkp_max_pers_key_len;
+	uint16_t	cpkp_pers_key_len;
+	uint8_t		cpkp_cpx[];
+
+	// cpkp_cpx is variable length so the location of the persistent key varies
+	// uint8_t cpkp_persistent_key[];
+} cp_key_pair_t;
+
+// -- struct cprotect --
+
+/*
+ * Runtime-only structure containing the content protection status for
+ * the given file.  This is referenced by the cnode.  It has the
+ * variable length key pair at the end.
+ */
+
+typedef uint32_t cp_flags_t;
+enum {
+	CP_NO_XATTR				= 0x01,	/* Key info has not been saved as EA to the FS */
+	CP_RELOCATION_INFLIGHT	= 0x02,	/* File with offset IVs is in the process of being relocated. */
+
+#if HFS_CONFIG_KEY_ROLL
+	// These flags are only set if you ask for basic info from read_xattr_v5
+	CP_KEY_IS_ROLLING       = 0x04, /* File is in the middle of key rolling */
+#endif
+	CP_HAS_A_KEY            = 0x08, /* File has a non-zero length key */
+};
+
+typedef struct cprotect {
+#if DEBUG
+	uint32_t						cp_magic1;
+#endif
+	cp_flags_t						cp_flags;
+	cp_key_class_t					cp_pclass;  /* persistent class stored on-disk */
+	void*							cp_backing_cnode;
+	cp_key_os_version_t				cp_key_os_version;
+	cp_key_revision_t				cp_key_revision;
+	uint16_t						cp_raw_open_count;
+#if HFS_CONFIG_KEY_ROLL
+	struct hfs_cp_key_roll_ctx	   *cp_key_roll_ctx;
+#endif
+	cp_key_pair_t					cp_keys;	// Variable length
+} *cprotect_t;
+
+// -- On-Disk Structures --
+
+typedef uint32_t cp_xattr_flags_t;
+enum {
+	/* 
+	 * Be careful about using flags 0x02 to 0x20.  Older code used to write
+	 * flags that were used for in-memory purposes to disk and therefore
+	 * they might be used in V4 structures.  Here's what they were:
+	 *
+	 *	  CP_KEY_FLUSHED			0x02	Should never have made it to disk
+	 *    CP_NO_XATTR				0x04	Should never have made it to disk
+	 *	  CP_OFF_IV_ENABLED			0x08	Probably made it to disk
+	 *	  CP_RELOCATION_INFLIGHT	0x10	Should never have made it to disk
+	 *	  CP_SEP_WRAPPEDKEY			0x20	Probably made it to disk
+	 *
+	 */
+
+	CP_XAF_NEEDS_KEYS			= 0x0001,	/* V4 only: file needs persistent keys */
+
+};
+
+/*
+ * V2 structure written as the per-file EA payload
+ * All on-disk multi-byte fields for the CP XATTR must be stored
+ * little-endian on-disk.  This means they must be endian swapped to
+ * L.E on getxattr() and converted to LE on setxattr().
+ *
+ * This structure is a fixed length and is tightly packed.
+ * 56 bytes total.
+ */
+struct cp_xattr_v2 {
+	u_int16_t xattr_major_version;
+	u_int16_t xattr_minor_version;
+	cp_xattr_flags_t flags;
+	u_int32_t persistent_class;
+	u_int32_t key_size;
+	uint8_t   persistent_key[CP_V2_WRAPPEDKEYSIZE];
+} __attribute__((aligned(2), packed));
+
+
+/*
+ * V4 Content Protection EA On-Disk Layout.
+ *
+ * This structure must be tightly packed, but the *size can vary*
+ * depending on the length of the key.  At MOST, the key length will be
+ * CP_MAX_WRAPPEDKEYSIZE, but the length is defined by the key_size field.
+ *
+ * Either way, the packing must be applied to ensure that the key data is
+ * retrievable in the right location relative to the start of the struct.
+ *
+ * Fully packed, this structure can range from :
+ * 		MIN: 36 bytes (no key -- used with directories)
+ *		MAX: 164 bytes (with 128 byte key)
+ *
+ * During runtime we always allocate with the full 128 byte key, but only
+ * use as much of the key buffer as needed. It must be tightly packed, though.
+ */
+
+struct cp_xattr_v4 {
+	u_int16_t			xattr_major_version;
+	u_int16_t			xattr_minor_version;
+	cp_xattr_flags_t	flags;
+	cp_key_class_t		persistent_class;
+	u_int32_t			key_size;
+	// This field will be zero on older systems
+	cp_key_os_version_t	key_os_version;
+	/* CP V4 Reserved Bytes == 16 */
+	u_int8_t			reserved[CP_V4_RESERVEDBYTES];
+	/* All above fields are fixed regardless of key length (36 bytes) */
+	/* Max Wrapped Size == 128 */
+	uint8_t				persistent_key[CP_MAX_WRAPPEDKEYSIZE];
+} __attribute__((aligned(2), packed));
+
+// -- Version 5 --
+
+#if HFS_CONFIG_KEY_ROLL
+struct cp_roll_info {
+	off_rsrc_t		off_rsrc;
+	uint16_t		key_len;
+	uint8_t			key[CP_MAX_WRAPPEDKEYSIZE];
+} __attribute__((aligned(2), packed));
+#endif
+
+struct cp_xattr_v5 {
+	uint16_t			xattr_major_version;
+	uint16_t			xattr_minor_version;
+	cp_xattr_flags_t	flags;
+	cp_key_class_t		persistent_class;
+	cp_key_os_version_t	key_os_version;
+	cp_key_revision_t	key_revision;
+	uint16_t			key_len;
+
+	// 20 bytes to here
+
+	// Variable length from here
+	uint8_t				persistent_key[CP_MAX_WRAPPEDKEYSIZE];
+
+#if HFS_CONFIG_KEY_ROLL
+	// NOTE: data not necessarily here because preceding is variable
+	uint8_t				roll_key_[sizeof(struct cp_roll_info)];
+#endif
+
+	// Wouldn't be necessary if xattr routines returned just what we ask for
+	uint8_t				spare[512];
+} __attribute__((aligned(2), packed));
+
+enum {
+	CP_XATTR_MIN_LEN = 20,			// Minimum length for all versions
+};
+
+/*
+ * The Root Directory's EA (fileid 1) is special; it defines information about
+ * what capabilities the filesystem is using.
+ *
+ * The data is still stored little endian.
+ */
+struct cp_root_xattr {
+ 	u_int16_t major_version;
+ 	u_int16_t minor_version;
+ 	u_int64_t flags;
+#if HFS_CONFIG_KEY_ROLL
+	cp_key_os_version_t auto_roll_min_version;
+	cp_key_os_version_t auto_roll_max_version;
+#endif
+} __attribute__((aligned(2), packed));
+
+enum {
+	CP_ROOT_XATTR_MIN_LEN = 12,
+};
+
+
+// -- Function Prototypes --
+
+int cp_entry_init(cnode_ptr_t, struct mount *);
+int cpx_gentempkeys(cpx_t *pcpx, struct hfsmount *hfsmp);
+void cp_entry_destroy(struct hfsmount *hfsmp, struct cprotect *entry_ptr);
+void cp_replace_entry (struct hfsmount *hfsmp, struct cnode *cp, struct cprotect *newentry);
+cnode_ptr_t cp_get_protected_cnode(vnode_t);
+int cp_fs_protected (mount_t);
+int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr);
+int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr);
+int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp,
+					  cp_key_class_t targetclass, uint32_t flags,
+					  struct cprotect **newentry);
+int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp,
+					   cp_key_class_t suppliedclass, mode_t cmode,
+					   struct cprotect **tmpentry);
+int cp_is_valid_class (int isdir, int32_t protectionclass);
+int cp_set_trimmed(struct hfsmount*);
+int cp_set_rewrapped(struct hfsmount *);
+int cp_flop_generation (struct hfsmount*);
+bool cp_is_supported_version(uint16_t version);
+int cp_vnode_getclass(struct vnode *vp, cp_key_class_t *class);
+int cp_vnode_setclass(struct vnode *vp, cp_key_class_t newclass);
+int cp_get_root_major_vers(vnode_t vp, uint32_t *level);
+int cp_vnode_transcode(vnode_t vp, cp_key_t *k);
+int cp_get_default_level (struct vnode *vp, uint32_t *level);
+void cp_device_locked_callback(mount_t mp, cp_lock_state_t state);
+
+#if HFS_CONFIG_KEY_ROLL
+bool cp_should_auto_roll(struct hfsmount *hfsmp, cprotect_t cpr);
+errno_t cp_set_auto_roll(struct hfsmount *hfsmp,
+						 const hfs_key_auto_roll_args_t *args);
+#endif
+
+typedef struct cp_io_params {
+	// The key to use
+	cpx_t	cpx;
+
+	/*
+	 * The physical offset for this I/O or -1 if unknown (i.e. caller must
+	 * do a regular look up).
+	 */
+	off_t	phys_offset;
+
+	// The maximum length allowed for this I/O
+	off_t	max_len;
+} cp_io_params_t;
+
+// Return the I/O parameters for this I/O
+void cp_io_params(struct hfsmount *hfsmp, cprotect_t cpr, off_rsrc_t off_rsrc,
+				  int direction, cp_io_params_t *io_params);
+
+int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp,
+				uint32_t fileid, int xattr_opts);
+
+typedef void * (* cp_new_alloc_fn)(const void *old, uint16_t pers_key_len,
+								   uint16_t cached_key_len,
+								   cp_key_pair_t **pcpkp);
+
+int cp_new(cp_key_class_t *newclass_eff, struct hfsmount *hfsmp,
+		   struct cnode *cp, mode_t cmode, int32_t keyflags,
+		   cp_key_revision_t key_revision,
+		   cp_new_alloc_fn alloc_fn, void **pholder);
+
+int cp_rewrap(struct cnode *cp, __unused struct hfsmount *hfsmp,
+			  cp_key_class_t *newclass, cp_key_pair_t *cpkp, const void *old_holder,
+			  cp_new_alloc_fn alloc_fn, void **pholder);
+
+cprotect_t cp_entry_alloc(cprotect_t old, uint16_t pers_keylen,
+						  uint16_t cached_key_len, cp_key_pair_t **pcpkp);
+
+cp_key_os_version_t cp_os_version(void);
+
+cp_key_revision_t cp_next_key_revision(cp_key_revision_t rev);
+
+typedef uint32_t cp_getxattr_options_t;
+enum {
+	// Return just basic information (not the key)
+	CP_GET_XATTR_BASIC_INFO     = 1,
+};
+
+int cp_read_xattr_v5(struct hfsmount *hfsmp, struct cp_xattr_v5 *xattr,
+					 size_t xattr_len, cprotect_t *pcpr, cp_getxattr_options_t options);
+
+
+errno_t cp_handle_strategy(buf_t bp);
+
+typedef enum {
+	CP_READ_ACCESS	= 0x1,
+	CP_WRITE_ACCESS = 0x2
+} cp_mode_t;
+
+int cp_handle_open(struct vnode *vp, int mode);
+int cp_handle_vnop(struct vnode *vp, int mode, int ioflag);
+
+// -- cp_key_pair_t functions --
+
+size_t cpkp_size(uint16_t pers_key_len, uint16_t cached_key_len);
+size_t cpkp_sizex(const cp_key_pair_t *cpkp);
+void cpkp_init(cp_key_pair_t *cpkp, uint16_t max_pers_key_len,
+			   uint16_t max_cached_key_len);
+void cpkp_flush(cp_key_pair_t *cpkp);
+void cpkp_copy(const cp_key_pair_t *src, cp_key_pair_t *dst);
+uint16_t cpkp_max_pers_key_len(const cp_key_pair_t *cpkp);
+uint16_t cpkp_pers_key_len(const cp_key_pair_t *cpkp);
+bool cpkp_can_copy(const cp_key_pair_t *src, const cp_key_pair_t *dst);
+cpx_t cpkp_cpx(const cp_key_pair_t *cpkp) __attribute__((pure));
+
+// -- Helper Functions --
+
+static inline int cp_get_crypto_generation (cp_key_class_t protclass) {
+	if (protclass & CP_CRYPTO_G1) {
+		return 1;
+	}
+	else return 0;
+}
+
+__END_DECLS
+
+#endif	// CONFIG_PROTECT
+
+#endif /* !HFS_CPROTECT_H_ */
diff --git a/core/hfs_dbg.h b/core/hfs_dbg.h
new file mode 100644
index 0000000..eada538
--- /dev/null
+++ b/core/hfs_dbg.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2000, 2005 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef HFS_DBG_H_
+#define HFS_DBG_H_
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+#include <stdbool.h>
+
+// So that the analyzer acknowledges assertions...
+#if defined(__clang_analyzer__) || DEBUG
+#define panic_on_assert true
+#else
+extern bool panic_on_assert;
+#endif
+
+#if DEBUG
+extern bool hfs_corruption_panics;
+#else
+#define hfs_corruption_panics false
+#endif
+
+__attribute__((noreturn))
+void hfs_assert_fail(const char *file, unsigned line, const char *expr);
+
+#define hfs_assert(expr)										\
+	do {														\
+		if (__builtin_expect(panic_on_assert, false)			\
+			&& __builtin_expect(!(expr), false)) {				\
+			hfs_assert_fail(__FILE__, __LINE__, #expr);			\
+		}														\
+	} while (0)
+
+// On production, will printf rather than assert
+#define hfs_warn(format, ...)									\
+	do {														\
+		if (__builtin_expect(panic_on_assert, false)) {			\
+			panic(format, ## __VA_ARGS__);						\
+			__builtin_unreachable();							\
+		} else													\
+			printf(format, ## __VA_ARGS__);						\
+	} while (0)
+
+// Quiet on production
+#define hfs_debug(format, ...)									\
+	do {														\
+		if (__builtin_expect(panic_on_assert, false))			\
+			printf(format, ## __VA_ARGS__);						\
+	} while (0)
+
+// Panic on debug unless boot-arg tells us not to
+#define hfs_corruption_debug(format, ...)						\
+	do {														\
+		if (__builtin_expect(hfs_corruption_panics, false)) {	\
+			panic(format, ## __VA_ARGS__);						\
+			__builtin_unreachable();							\
+		}														\
+		else													\
+			printf(format, ## __VA_ARGS__);						\
+	} while (0)
+
+__END_DECLS
+
+#endif // HFS_DBG_H_
diff --git a/core/hfs_endian.c b/core/hfs_endian.c
new file mode 100644
index 0000000..bdc9c7a
--- /dev/null
+++ b/core/hfs_endian.c
@@ -0,0 +1,1227 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * hfs_endian.c
+ *
+ * This file implements endian swapping routines for the HFS/HFS Plus
+ * volume format.
+ */
+
+#include "hfs_endian.h"
+#include "hfs_dbg.h"
+#include "BTreesPrivate.h"
+
+#undef ENDIAN_DEBUG
+
+/*
+ * Internal swapping routines
+ *
+ * These routines handle swapping the records of leaf and index nodes.  The
+ * layout of the keys and records varies depending on the kind of B-tree
+ * (determined by fileID).
+ *
+ * The direction parameter must be kSwapBTNodeBigToHost or kSwapBTNodeHostToBig.
+ * The kSwapBTNodeHeaderRecordOnly "direction" is not valid for these routines.
+ */
+int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
+void hfs_swap_HFSPlusForkData (HFSPlusForkData *src);
+
+#if CONFIG_HFS_STD
+int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
+#endif
+
+/*
+ * hfs_swap_HFSPlusForkData
+ */
+void
+hfs_swap_HFSPlusForkData (
+    HFSPlusForkData *src
+)
+{
+    int i;
+
+	src->logicalSize		= SWAP_BE64 (src->logicalSize);
+
+	src->clumpSize			= SWAP_BE32 (src->clumpSize);
+	src->totalBlocks		= SWAP_BE32 (src->totalBlocks);
+
+    for (i = 0; i < kHFSPlusExtentDensity; i++) {
+        src->extents[i].startBlock	= SWAP_BE32 (src->extents[i].startBlock);
+        src->extents[i].blockCount	= SWAP_BE32 (src->extents[i].blockCount);
+    }
+}
+
+/*
+ * hfs_swap_BTNode
+ *
+ *  NOTE: This operation is not naturally symmetric.
+ *        We have to determine which way we're swapping things.
+ */
+int
+hfs_swap_BTNode (
+    BlockDescriptor *src,
+    vnode_t vp,
+    enum HFSBTSwapDirection direction,
+    u_int8_t allow_empty_node
+)
+{
+    BTNodeDescriptor *srcDesc = src->buffer;
+    u_int16_t *srcOffs = NULL;
+	BTreeControlBlockPtr btcb = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
+    u_int16_t i; /* index to match srcDesc->numRecords */
+    int error = 0;
+
+#ifdef ENDIAN_DEBUG
+    if (direction == kSwapBTNodeBigToHost) {
+        printf ("hfs: BE -> Native Swap\n");
+    } else if (direction == kSwapBTNodeHostToBig) {
+        printf ("hfs: Native -> BE Swap\n");
+    } else if (direction == kSwapBTNodeHeaderRecordOnly) {
+        printf ("hfs: Not swapping descriptors\n");
+    } else {
+        panic ("hfs_swap_BTNode: This is impossible");
+    }
+#endif
+
+    /*
+     * If we are doing a swap from on-disk to in-memory, then swap the node
+     * descriptor and record offsets before we need to use them.
+     */
+    if (direction == kSwapBTNodeBigToHost) {
+        srcDesc->fLink		= SWAP_BE32 (srcDesc->fLink);
+        srcDesc->bLink		= SWAP_BE32 (srcDesc->bLink);
+    
+    	/*
+    	 * When first opening a BTree, we have to read the header node before the
+    	 * control block is initialized.  In this case, totalNodes will be zero,
+    	 * so skip the bounds checking. Also, we should ignore the header node when
+		 * checking for invalid forwards and backwards links, since the header node's
+		 * links can point back to itself legitimately.
+    	 */
+    	if (btcb->totalNodes != 0) {
+			if (srcDesc->fLink >= btcb->totalNodes) {
+				hfs_corruption_debug("hfs_swap_BTNode: invalid forward link (0x%08x >= 0x%08x)\n", srcDesc->fLink, btcb->totalNodes);
+				error = fsBTInvalidHeaderErr;
+				goto fail;
+			}
+			if (srcDesc->bLink >= btcb->totalNodes) {
+				hfs_corruption_debug("hfs_swap_BTNode: invalid backward link (0x%08x >= 0x%08x)\n", srcDesc->bLink, btcb->totalNodes);
+				error = fsBTInvalidHeaderErr;
+				goto fail;
+			}
+			
+			if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+				hfs_corruption_debug("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n", srcDesc->fLink, (u_int32_t) src->blockNum);
+				error = fsBTInvalidHeaderErr;
+				goto fail;
+			}
+			if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+				hfs_corruption_debug("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n", srcDesc->bLink, (u_int32_t) src->blockNum);
+				error = fsBTInvalidHeaderErr;
+				goto fail;
+			}
+
+
+		}
+		
+		/* 
+		 * Check srcDesc->kind.  Don't swap it because it's only one byte.
+		 */
+		if (srcDesc->kind < kBTLeafNode || srcDesc->kind > kBTMapNode) {
+			printf("hfs_swap_BTNode: invalid node kind (%d)\n", srcDesc->kind);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+		
+		/*
+		 * Check srcDesc->height.  Don't swap it because it's only one byte.
+		 */
+		if (srcDesc->height > kMaxTreeDepth) {
+			printf("hfs_swap_BTNode: invalid node height (%d)\n", srcDesc->height);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+        
+        /* Don't swap srcDesc->reserved */
+    
+        srcDesc->numRecords	= SWAP_BE16 (srcDesc->numRecords);
+        
+        /*
+         * Swap the node offsets (including the free space one!).
+         */
+        srcOffs = (u_int16_t *)((char *)src->buffer + (src->blockSize - ((srcDesc->numRecords + 1) * sizeof (u_int16_t))));
+
+        /*
+         * Sanity check that the record offsets are within the node itself.
+         */
+        if ((char *)srcOffs > ((char *)src->buffer + src->blockSize) ||
+            (char *)srcOffs < ((char *)src->buffer + sizeof(BTNodeDescriptor))) {
+            printf("hfs_swap_BTNode: invalid record count (0x%04X)\n", srcDesc->numRecords);
+            error = fsBTInvalidHeaderErr;
+            goto fail;
+        }
+
+		/*
+		 * Swap and sanity check each of the record offsets.
+		 */
+        for (i = 0; i <= srcDesc->numRecords; i++) {
+            srcOffs[i]	= SWAP_BE16 (srcOffs[i]);
+
+            /*
+             * Sanity check: must be even, and within the node itself.
+             *
+             * We may be called to swap an unused node, which contains all zeroes.
+			 * Unused nodes are expected only when allow_empty_node is true.
+			 * If it is false and record offset is zero, return error.
+             */
+            if ((srcOffs[i] & 1) || (
+			    (allow_empty_node == false) && (srcOffs[i] == 0)) ||
+				(srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || 
+				(srcOffs[i] > (src->blockSize - 2 * (srcDesc->numRecords + 1)))) {
+            	printf("hfs_swap_BTNode: offset #%d invalid (0x%04X) (blockSize 0x%x numRecords %d)\n",
+					   i, srcOffs[i], src->blockSize, srcDesc->numRecords);
+            	error = fsBTInvalidHeaderErr;
+            	goto fail;
+            }
+
+            /*
+             * Make sure the offsets are strictly increasing.  Note that we're looping over
+             * them backwards, hence the order in the comparison.
+             */
+            if ((i != 0) && (srcOffs[i] >= srcOffs[i-1])) {
+            	printf("hfs_swap_BTNode: offsets %d and %d out of order (0x%04X, 0x%04X)\n",
+            	    i, i-1, srcOffs[i], srcOffs[i-1]);
+            	error = fsBTInvalidHeaderErr;
+            	goto fail;
+            }
+        }
+    }
+
+    /*
+     * Swap the records (ordered by frequency of access)
+     */
+    if ((srcDesc->kind == kBTIndexNode) ||
+        (srcDesc-> kind == kBTLeafNode)) {
+
+        if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) {
+            error = hfs_swap_HFSPlusBTInternalNode (src, VTOC(vp)->c_fileid, direction);
+        } 
+#if CONFIG_HFS_STD
+		else {
+            error = hfs_swap_HFSBTInternalNode (src, VTOC(vp)->c_fileid, direction);
+        }
+#endif
+        
+        if (error) goto fail;
+        
+    } else if (srcDesc-> kind == kBTMapNode) {
+        /* Don't swap the bitmaps, they'll be done in the bitmap routines */
+    
+    } else if (srcDesc-> kind == kBTHeaderNode) {
+        /* The header's offset is hard-wired because we cannot trust the offset pointers. */
+        BTHeaderRec *srcHead = (BTHeaderRec *)((char *)src->buffer + sizeof(BTNodeDescriptor));
+        
+        srcHead->treeDepth		=	SWAP_BE16 (srcHead->treeDepth);
+        
+        srcHead->rootNode		=	SWAP_BE32 (srcHead->rootNode);
+        srcHead->leafRecords	=	SWAP_BE32 (srcHead->leafRecords);
+        srcHead->firstLeafNode	=	SWAP_BE32 (srcHead->firstLeafNode);
+        srcHead->lastLeafNode	=	SWAP_BE32 (srcHead->lastLeafNode);
+        
+        srcHead->nodeSize		=	SWAP_BE16 (srcHead->nodeSize);
+        srcHead->maxKeyLength	=	SWAP_BE16 (srcHead->maxKeyLength);
+        
+        srcHead->totalNodes		=	SWAP_BE32 (srcHead->totalNodes);
+        srcHead->freeNodes		=	SWAP_BE32 (srcHead->freeNodes);
+        
+        srcHead->clumpSize		=	SWAP_BE32 (srcHead->clumpSize);
+        srcHead->attributes		=	SWAP_BE32 (srcHead->attributes);
+
+        /* Don't swap srcHead->reserved1 */
+        /* Don't swap srcHead->btreeType; it's only one byte */
+        /* Don't swap srcHead->reserved2 */
+        /* Don't swap srcHead->reserved3 */
+        /* Don't swap bitmap */
+    }
+    
+    /*
+     * If we are doing a swap from in-memory to on-disk, then swap the node
+     * descriptor and record offsets after we're done using them.
+     */
+    if (direction == kSwapBTNodeHostToBig) {
+		/*
+		 * Sanity check and swap the forward and backward links.
+		 * Ignore the header node since its forward and backwards links can legitimately
+		 * point to itself.
+		 */
+		if (srcDesc->fLink >= btcb->totalNodes) {
+			panic("hfs_UNswap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+		if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+			panic ("hfs_UNswap_BTNode: invalid forward link (0x%08x == 0x%08x)\n", 
+					srcDesc->fLink, (u_int32_t) src->blockNum);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+		
+		if (srcDesc->bLink >= btcb->totalNodes) {
+			panic("hfs_UNswap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+		if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+			panic ("hfs_UNswap_BTNode: invalid backward link (0x%08x == 0x%08x)\n", 
+					srcDesc->bLink, (u_int32_t) src->blockNum);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+
+
+        srcDesc->fLink		= SWAP_BE32 (srcDesc->fLink);
+        srcDesc->bLink		= SWAP_BE32 (srcDesc->bLink);
+    
+		/* 
+		 * Check srcDesc->kind.  Don't swap it because it's only one byte.
+		 */
+		if (srcDesc->kind < kBTLeafNode || srcDesc->kind > kBTMapNode) {
+			panic("hfs_UNswap_BTNode: invalid node kind (%d)\n", srcDesc->kind);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+
+		/* 
+		 * Check srcDesc->height.  Don't swap it because it's only one byte.
+		 */
+		if (srcDesc->height > kMaxTreeDepth) {
+			panic("hfs_UNswap_BTNode: invalid node height (%d)\n", srcDesc->height);
+			error = fsBTInvalidHeaderErr;
+			goto fail;
+		}
+
+        /* Don't swap srcDesc->reserved */
+    
+        /*
+         * Swap the node offsets (including the free space one!).
+         */
+        srcOffs = (u_int16_t *)((char *)src->buffer + (src->blockSize - ((srcDesc->numRecords + 1) * sizeof (u_int16_t))));
+
+        /*
+         * Sanity check that the record offsets are within the node itself.
+         */
+        if ((char *)srcOffs > ((char *)src->buffer + src->blockSize) ||
+        	(char *)srcOffs < ((char *)src->buffer + sizeof(BTNodeDescriptor))) {
+            panic("hfs_UNswap_BTNode: invalid record count (0x%04X)\n", srcDesc->numRecords);
+            error = fsBTInvalidHeaderErr;
+            goto fail;
+        }
+        
+		/*
+		 * Swap and sanity check each of the record offsets.
+		 */
+        for (i = 0; i <= srcDesc->numRecords; i++) {
+            /*
+             * Sanity check: must be even, and within the node itself.
+             *
+             * We may be called to swap an unused node, which contains all zeroes.
+	    	 * This can happen when the last record from a node gets deleted.
+             * This is why we allow the record offset to be zero.
+	     	 * Unused nodes are expected only when allow_empty_node is true 
+	     	 * (the caller should set it to true for kSwapBTNodeBigToHost). 
+             */
+            if ((srcOffs[i] & 1) || 
+			    ((allow_empty_node == false) && (srcOffs[i] == 0)) ||
+				(srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || 
+				(srcOffs[i] > (src->blockSize - 2 * (srcDesc->numRecords + 1)))) {
+            	panic("hfs_UNswap_BTNode: offset #%d invalid (0x%04X) (blockSize 0x%x numRecords %d)\n",
+					  i, srcOffs[i], src->blockSize, srcDesc->numRecords);
+            	error = fsBTInvalidHeaderErr;
+            	goto fail;
+            }
+
+            /*
+             * Make sure the offsets are strictly increasing.  Note that we're looping over
+             * them backwards, hence the order in the comparison.
+             */
+            if ((i < srcDesc->numRecords) && (srcOffs[i+1] >= srcOffs[i])) {
+            	panic("hfs_UNswap_BTNode: offsets %d and %d out of order (0x%04X, 0x%04X)\n",
+            	    i+1, i, srcOffs[i+1], srcOffs[i]);
+            	error = fsBTInvalidHeaderErr;
+            	goto fail;
+            }
+
+            srcOffs[i]	= SWAP_BE16 (srcOffs[i]);
+        }
+        
+        srcDesc->numRecords	= SWAP_BE16 (srcDesc->numRecords);
+    }
+
+fail:
+	if (error) {
+		/*
+		 * Log some useful information about where the corrupt node is.
+		 */
+		printf("hfs: node=%lld fileID=%u volume=%s device=%s\n", src->blockNum, VTOC(vp)->c_fileid,
+			VTOVCB(vp)->vcbVN, vfs_statfs(vnode_mount(vp))->f_mntfromname);
+		hfs_mark_inconsistent(VTOVCB(vp), HFS_INCONSISTENCY_DETECTED);
+	}
+	
+    return (error);
+}
+
+int
+hfs_swap_HFSPlusBTInternalNode (
+    BlockDescriptor *src,
+    HFSCatalogNodeID fileID,
+    enum HFSBTSwapDirection direction
+)
+{
+    BTNodeDescriptor *srcDesc = src->buffer;
+    u_int16_t *srcOffs = (u_int16_t *)((char *)src->buffer + (src->blockSize - (srcDesc->numRecords * sizeof (u_int16_t))));
+    char *nextRecord;	/*  Points to start of record following current one */
+    
+    /*
+     * i is an int32 because it needs to be negative to index the offset to free space.
+     * srcDesc->numRecords is a u_int16_t and is unlikely to become 32-bit so this should be ok.
+     */
+
+    int32_t i;
+    u_int32_t j;
+
+    if (fileID == kHFSExtentsFileID) {
+        HFSPlusExtentKey *srcKey;
+        HFSPlusExtentDescriptor *srcRec;
+		size_t recordSize;	/* Size of the data part of the record, or node number for index nodes */
+        
+        if (srcDesc->kind == kBTIndexNode)
+        	recordSize = sizeof(u_int32_t);
+        else
+        	recordSize = sizeof(HFSPlusExtentDescriptor);
+
+        for (i = 0; i < srcDesc->numRecords; i++) {
+        	/* Point to the start of the record we're currently checking. */
+            srcKey = (HFSPlusExtentKey *)((char *)src->buffer + srcOffs[i]);
+            
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+			nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+			/*
+			 * Make sure the key and data are within the buffer.  Since both key
+			 * and data are fixed size, this is relatively easy.  Note that this
+			 * relies on the keyLength being a constant; we verify the keyLength
+			 * below.
+			 */
+			if ((char *)srcKey + sizeof(HFSPlusExtentKey) + recordSize > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				}
+				return fsBTInvalidNodeErr;
+			}
+			
+            if (direction == kSwapBTNodeBigToHost) 
+            	srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+            if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				}
+				return fsBTInvalidNodeErr;
+            }
+            srcRec = (HFSPlusExtentDescriptor *)((char *)srcKey + srcKey->keyLength + sizeof(srcKey->keyLength));
+            if (direction == kSwapBTNodeHostToBig)
+            	srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+
+            /* Don't swap srcKey->forkType; it's only one byte */
+            /* Don't swap srcKey->pad */
+
+            srcKey->fileID			= SWAP_BE32 (srcKey->fileID);
+            srcKey->startBlock		= SWAP_BE32 (srcKey->startBlock);
+            
+            if (srcDesc->kind == kBTIndexNode) {
+            	/* For index nodes, the record data is just a child node number. */
+                *((u_int32_t *)srcRec) = SWAP_BE32 (*((u_int32_t *)srcRec));
+            } else {
+				/* Swap the extent data */
+				for (j = 0; j < kHFSPlusExtentDensity; j++) {
+					srcRec[j].startBlock	= SWAP_BE32 (srcRec[j].startBlock);
+					srcRec[j].blockCount	= SWAP_BE32 (srcRec[j].blockCount);
+				}
+            }
+        }
+
+    } else if (fileID == kHFSCatalogFileID) {
+        HFSPlusCatalogKey *srcKey;
+        int16_t *srcPtr;
+        u_int16_t keyLength;
+
+        for (i = 0; i < srcDesc->numRecords; i++) {
+        	/* Point to the start of the record we're currently checking. */
+            srcKey = (HFSPlusCatalogKey *)((char *)src->buffer + srcOffs[i]);
+
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+			nextRecord = (char *)src->buffer + (uintptr_t)(srcOffs[i-1]);
+
+			/*
+			 * Make sure we can safely dereference the keyLength and parentID fields. 
+			 */
+			if ((char *)srcKey + offsetof(HFSPlusCatalogKey, nodeName.unicode[0]) > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				}
+				return fsBTInvalidNodeErr;
+			}
+
+			/*
+			 * Swap and sanity check the key length
+			 */
+            if (direction == kSwapBTNodeBigToHost)
+            	srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+            keyLength = srcKey->keyLength;	/* Put it in a local (native order) because we use it several times */
+            if (direction == kSwapBTNodeHostToBig)
+            	srcKey->keyLength = SWAP_BE16 (keyLength);
+            
+            /* Sanity check the key length */
+            if (keyLength < kHFSPlusCatalogKeyMinimumLength || keyLength > kHFSPlusCatalogKeyMaximumLength) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, keyLength);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, keyLength);
+				}
+				return fsBTInvalidNodeErr;
+            }
+
+            /*
+             * Make sure that we can safely dereference the record's type field or
+             * an index node's child node number.
+             */
+            srcPtr = (int16_t *)((char *)srcKey + keyLength + sizeof(srcKey->keyLength));
+            if ((char *)srcPtr + sizeof(u_int32_t) > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1);
+				}
+				return fsBTInvalidNodeErr;
+            }
+
+            srcKey->parentID						= SWAP_BE32 (srcKey->parentID);
+
+			/*
+			 * Swap and sanity check the key's node name
+			 */
+            if (direction == kSwapBTNodeBigToHost)
+            	srcKey->nodeName.length	= SWAP_BE16 (srcKey->nodeName.length);
+            /* Make sure name length is consistent with key length */
+            if (keyLength < sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) +
+                srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0])) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: catalog record #%d keyLength=%d expected=%lu\n",
+						srcDesc->numRecords-i, keyLength, sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) +
+                    	srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0]));
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: catalog record #%d keyLength=%d expected=%lu\n",
+						srcDesc->numRecords-i, keyLength, sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) +
+                    	srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0]));
+				}
+				return fsBTInvalidNodeErr;
+            }
+            for (j = 0; j < srcKey->nodeName.length; j++) {
+                srcKey->nodeName.unicode[j]	= SWAP_BE16 (srcKey->nodeName.unicode[j]);
+            }
+            if (direction == kSwapBTNodeHostToBig)
+            	srcKey->nodeName.length	= SWAP_BE16 (srcKey->nodeName.length);
+ 
+            /* 
+             * For index nodes, the record data is just the child's node number.
+             * Skip over swapping the various types of catalog record.
+             */
+            if (srcDesc->kind == kBTIndexNode) {
+                *((u_int32_t *)srcPtr) = SWAP_BE32 (*((u_int32_t *)srcPtr));
+                continue;
+            }
+            
+            /* Make sure the recordType is in native order before using it. */
+            if (direction == kSwapBTNodeBigToHost)
+            	srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+            
+            if (srcPtr[0] == kHFSPlusFolderRecord) {
+                HFSPlusCatalogFolder *srcRec = (HFSPlusCatalogFolder *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSPlusBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSPlusBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+                }
+
+                srcRec->flags				= SWAP_BE16 (srcRec->flags);
+                srcRec->valence				= SWAP_BE32 (srcRec->valence);
+                srcRec->folderID			= SWAP_BE32 (srcRec->folderID);
+                srcRec->createDate			= SWAP_BE32 (srcRec->createDate);
+                srcRec->contentModDate		= SWAP_BE32 (srcRec->contentModDate);
+                srcRec->attributeModDate	= SWAP_BE32 (srcRec->attributeModDate);
+                srcRec->accessDate			= SWAP_BE32 (srcRec->accessDate);
+                srcRec->backupDate			= SWAP_BE32 (srcRec->backupDate);
+                
+                srcRec->bsdInfo.ownerID		= SWAP_BE32 (srcRec->bsdInfo.ownerID);
+                srcRec->bsdInfo.groupID		= SWAP_BE32 (srcRec->bsdInfo.groupID);
+    
+                /* Don't swap srcRec->bsdInfo.adminFlags; it's only one byte */
+                /* Don't swap srcRec->bsdInfo.ownerFlags; it's only one byte */
+    
+                srcRec->bsdInfo.fileMode			= SWAP_BE16 (srcRec->bsdInfo.fileMode);
+                srcRec->bsdInfo.special.iNodeNum	= SWAP_BE32 (srcRec->bsdInfo.special.iNodeNum);
+    
+                srcRec->textEncoding		= SWAP_BE32 (srcRec->textEncoding);
+    
+                /* Don't swap srcRec->userInfo */
+                /* Don't swap srcRec->finderInfo */
+                srcRec->folderCount = SWAP_BE32 (srcRec->folderCount);
+   
+            } else if (srcPtr[0] == kHFSPlusFileRecord) {
+                HFSPlusCatalogFile *srcRec = (HFSPlusCatalogFile *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSPlusBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSPlusBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+                }
+                
+                srcRec->flags				= SWAP_BE16 (srcRec->flags);
+    
+                srcRec->fileID				= SWAP_BE32 (srcRec->fileID);
+    
+                srcRec->createDate			= SWAP_BE32 (srcRec->createDate);
+                srcRec->contentModDate		= SWAP_BE32 (srcRec->contentModDate);
+                srcRec->attributeModDate	= SWAP_BE32 (srcRec->attributeModDate);
+                srcRec->accessDate			= SWAP_BE32 (srcRec->accessDate);
+                srcRec->backupDate			= SWAP_BE32 (srcRec->backupDate);
+    
+                srcRec->bsdInfo.ownerID		= SWAP_BE32 (srcRec->bsdInfo.ownerID);
+                srcRec->bsdInfo.groupID		= SWAP_BE32 (srcRec->bsdInfo.groupID);
+    
+                /* Don't swap srcRec->bsdInfo.adminFlags; it's only one byte */
+                /* Don't swap srcRec->bsdInfo.ownerFlags; it's only one byte */
+    
+                srcRec->bsdInfo.fileMode			= SWAP_BE16 (srcRec->bsdInfo.fileMode);
+                srcRec->bsdInfo.special.iNodeNum	= SWAP_BE32 (srcRec->bsdInfo.special.iNodeNum);
+    
+                srcRec->textEncoding		= SWAP_BE32 (srcRec->textEncoding);
+			
+                /* If kHFSHasLinkChainBit is set, reserved1 is hl_FirstLinkID.  
+				 * In all other context, it is expected to be zero.
+				 */
+                srcRec->reserved1 = SWAP_BE32 (srcRec->reserved1);
+
+                /* Don't swap srcRec->userInfo */
+                /* Don't swap srcRec->finderInfo */
+                /* Don't swap srcRec->reserved2 */
+    
+                hfs_swap_HFSPlusForkData (&srcRec->dataFork);
+                hfs_swap_HFSPlusForkData (&srcRec->resourceFork);
+            
+            } else if ((srcPtr[0] == kHFSPlusFolderThreadRecord) ||
+                       (srcPtr[0] == kHFSPlusFileThreadRecord)) {
+    
+				/*
+				 * Make sure there is room for parentID and name length.
+				 */
+                HFSPlusCatalogThread *srcRec = (HFSPlusCatalogThread *)srcPtr;
+				if ((char *) &srcRec->nodeName.unicode[0] > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+				}
+
+                /* Don't swap srcRec->reserved */
+                
+                srcRec->parentID						= SWAP_BE32 (srcRec->parentID);
+                
+                if (direction == kSwapBTNodeBigToHost)
+                	srcRec->nodeName.length	= SWAP_BE16 (srcRec->nodeName.length);
+
+                /* 
+                 * Make sure there is room for the name in the buffer.
+                 * Then swap the characters of the name itself.
+                 */
+				if ((char *) &srcRec->nodeName.unicode[srcRec->nodeName.length] > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+				}
+                for (j = 0; j < srcRec->nodeName.length; j++) {
+                    srcRec->nodeName.unicode[j]	= SWAP_BE16 (srcRec->nodeName.unicode[j]);
+                }
+                
+                if (direction == kSwapBTNodeHostToBig)
+                	srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length);
+
+            } else {
+				if (direction == kSwapBTNodeHostToBig) {
+            		panic("hfs_swap_HFSPlusBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1);
+				} else {
+            		printf("hfs_swap_HFSPlusBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1);
+				}
+				return fsBTInvalidNodeErr;
+            }
+    
+            /* We can swap the record type now that we're done using it. */
+            if (direction == kSwapBTNodeHostToBig)
+            	srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+        }
+        
+    } else if (fileID == kHFSAttributesFileID) {
+    	HFSPlusAttrKey *srcKey;
+    	HFSPlusAttrRecord *srcRec;
+    	u_int16_t keyLength;
+		u_int32_t attrSize = 0;
+
+    	for (i = 0; i < srcDesc->numRecords; i++) {
+        	/* Point to the start of the record we're currently checking. */
+    		srcKey = (HFSPlusAttrKey *)((char *)src->buffer + srcOffs[i]);
+
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+			nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+    		/* Make sure there is room in the buffer for a minimal key */
+    		if ((char *) &srcKey->attrName[1] > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: attr key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				}
+				return fsBTInvalidNodeErr;
+    		}
+    		
+    		/* Swap the key length field */
+    		if (direction == kSwapBTNodeBigToHost)
+    			srcKey->keyLength = SWAP_BE16(srcKey->keyLength);
+    		keyLength = srcKey->keyLength;	/* Keep a copy in native order */
+    		if (direction == kSwapBTNodeHostToBig)
+    			srcKey->keyLength = SWAP_BE16(srcKey->keyLength);
+
+            /*
+             * Make sure that we can safely dereference the record's type field or
+             * an index node's child node number.
+             */
+    		srcRec = (HFSPlusAttrRecord *)((char *)srcKey + keyLength + sizeof(srcKey->keyLength));
+    		if ((char *)srcRec + sizeof(u_int32_t) > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: attr key #%d too big (%d)\n", srcDesc->numRecords-i-1, keyLength);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d too big (%d)\n", srcDesc->numRecords-i-1, keyLength);
+				}
+				return fsBTInvalidNodeErr;
+    		}
+    		
+    		srcKey->fileID = SWAP_BE32(srcKey->fileID);
+    		srcKey->startBlock = SWAP_BE32(srcKey->startBlock);
+
+			/* 
+			 * Swap and check the attribute name
+			 */
+    		if (direction == kSwapBTNodeBigToHost)
+    			srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen);
+    		/* Sanity check the attribute name length */
+    		if (srcKey->attrNameLen > kHFSMaxAttrNameLen || keyLength < (kHFSPlusAttrKeyMinimumLength + sizeof(u_int16_t)*srcKey->attrNameLen)) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: attr key #%d keyLength=%d attrNameLen=%d\n", srcDesc->numRecords-i-1, keyLength, srcKey->attrNameLen);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d keyLength=%d attrNameLen=%d\n", srcDesc->numRecords-i-1, keyLength, srcKey->attrNameLen);
+				}
+				return fsBTInvalidNodeErr;
+    		}
+    		for (j = 0; j < srcKey->attrNameLen; j++)
+    			srcKey->attrName[j] = SWAP_BE16(srcKey->attrName[j]);
+    		if (direction == kSwapBTNodeHostToBig)
+    			srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen);
+    		
+            /* 
+             * For index nodes, the record data is just the child's node number.
+             * Skip over swapping the various types of attribute record.
+             */
+            if (srcDesc->kind == kBTIndexNode) {
+                *((u_int32_t *)srcRec) = SWAP_BE32 (*((u_int32_t *)srcRec));
+                continue;
+            }
+            
+            /* Swap the record data */
+            if (direction == kSwapBTNodeBigToHost)
+            	srcRec->recordType = SWAP_BE32(srcRec->recordType);
+            switch (srcRec->recordType) {
+            	case kHFSPlusAttrInlineData:
+            		/* Is there room for the inline data header? */
+            		if ((char *) &srcRec->attrData.attrData[0]  > nextRecord) {
+						if (direction == kSwapBTNodeHostToBig) {
+							panic("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big\n", srcDesc->numRecords-i-1);
+						} else {
+							printf("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big\n", srcDesc->numRecords-i-1);
+						}
+						return fsBTInvalidNodeErr;
+            		}
+            		
+            		/* We're not swapping the reserved fields */
+            		
+            		/* Swap the attribute size */
+            		if (direction == kSwapBTNodeHostToBig)
+            			attrSize = srcRec->attrData.attrSize;
+            		srcRec->attrData.attrSize = SWAP_BE32(srcRec->attrData.attrSize);
+            		if (direction == kSwapBTNodeBigToHost)
+            			attrSize = srcRec->attrData.attrSize;
+            			
+            		/* Is there room for the inline attribute data? */
+            		if ((char *) &srcRec->attrData.attrData[attrSize] > nextRecord) {
+						if (direction == kSwapBTNodeHostToBig) {
+							panic("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big (attrSize=%u)\n", srcDesc->numRecords-i-1, attrSize);
+						} else {
+							printf("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big (attrSize=%u)\n", srcDesc->numRecords-i-1, attrSize);
+						}
+						return fsBTInvalidNodeErr;
+            		}
+            		
+            		/* Not swapping the attribute data itself */
+            		break;
+            		
+            	case kHFSPlusAttrForkData:
+            		/* Is there room for the fork data record? */
+            		if ((char *)srcRec + sizeof(HFSPlusAttrForkData) > nextRecord) {
+						if (direction == kSwapBTNodeHostToBig) {
+							panic("hfs_swap_HFSPlusBTInternalNode: attr fork data #%d too big\n", srcDesc->numRecords-i-1);
+						} else {
+							printf("hfs_swap_HFSPlusBTInternalNode: attr fork data #%d too big\n", srcDesc->numRecords-i-1);
+						}
+						return fsBTInvalidNodeErr;
+            		}
+            		
+            		/* We're not swapping the reserved field */
+            		
+            		hfs_swap_HFSPlusForkData(&srcRec->forkData.theFork);
+            		break;
+            		
+            	case kHFSPlusAttrExtents:
+            		/* Is there room for an extent record? */
+            		if ((char *)srcRec + sizeof(HFSPlusAttrExtents) > nextRecord) {
+						if (direction == kSwapBTNodeHostToBig) {
+							panic("hfs_swap_HFSPlusBTInternalNode: attr extents #%d too big\n", srcDesc->numRecords-i-1);
+						} else {
+							printf("hfs_swap_HFSPlusBTInternalNode: attr extents #%d too big\n", srcDesc->numRecords-i-1);
+						}
+						return fsBTInvalidNodeErr;
+            		}
+            		
+            		/* We're not swapping the reserved field */
+            		
+            		for (j = 0; j < kHFSPlusExtentDensity; j++) {
+            			srcRec->overflowExtents.extents[j].startBlock =
+            				SWAP_BE32(srcRec->overflowExtents.extents[j].startBlock);
+            			srcRec->overflowExtents.extents[j].blockCount =
+            				SWAP_BE32(srcRec->overflowExtents.extents[j].blockCount);
+            		}
+            		break;
+            }
+            if (direction == kSwapBTNodeHostToBig)
+            	srcRec->recordType = SWAP_BE32(srcRec->recordType);
+    	}
+    } else if (fileID > kHFSFirstUserCatalogNodeID) {
+    	/* The only B-tree with a non-system CNID that we use is the hotfile B-tree */
+		HotFileKey *srcKey;
+		u_int32_t *srcRec;
+        
+		for (i = 0; i < srcDesc->numRecords; i++) {
+        	/* Point to the start of the record we're currently checking. */
+			srcKey = (HotFileKey *)((char *)src->buffer + srcOffs[i]);
+
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+			nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+			/* Make sure there is room for the key (HotFileKey) and data (u_int32_t) */
+			if ((char *)srcKey + sizeof(HotFileKey) + sizeof(u_int32_t) > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: hotfile #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: hotfile #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				}
+				return fsBTInvalidNodeErr;
+			}
+			
+			/* Swap and sanity check the key length field */
+			if (direction == kSwapBTNodeBigToHost)
+				srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+			if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSPlusBTInternalNode: hotfile #%d incorrect keyLength %d\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				} else {
+					printf("hfs_swap_HFSPlusBTInternalNode: hotfile #%d incorrect keyLength %d\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				}
+				return fsBTInvalidNodeErr;
+			}
+			srcRec = (u_int32_t *)((char *)srcKey + srcKey->keyLength + sizeof(srcKey->keyLength));
+			if (direction == kSwapBTNodeHostToBig)
+				srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+
+			/* Don't swap srcKey->forkType */
+			/* Don't swap srcKey->pad */
+
+			srcKey->temperature = SWAP_BE32 (srcKey->temperature);
+			srcKey->fileID = SWAP_BE32 (srcKey->fileID);
+             
+			*((u_int32_t *)srcRec) = SWAP_BE32 (*((u_int32_t *)srcRec));
+		}
+    } else {
+        panic ("hfs_swap_HFSPlusBTInternalNode: fileID %u is not a system B-tree\n", fileID);
+    }
+
+
+    return (0);
+}
+
+#if CONFIG_HFS_STD
+int
+hfs_swap_HFSBTInternalNode (
+    BlockDescriptor *src,
+    HFSCatalogNodeID fileID,
+    enum HFSBTSwapDirection direction
+)
+{
+    BTNodeDescriptor *srcDesc = src->buffer;
+    u_int16_t *srcOffs = (u_int16_t *)((char *)src->buffer + (src->blockSize - (srcDesc->numRecords * sizeof (u_int16_t))));
+	char *nextRecord;	/*  Points to start of record following current one */
+
+    /*
+     * i is an int32 because it needs to be negative to index the offset to free space.
+     * srcDesc->numRecords is a u_int16_t and is unlikely to become 32-bit so this should be ok.
+     */
+    int32_t i;
+    u_int32_t j;
+
+    if (fileID == kHFSExtentsFileID) {
+        HFSExtentKey *srcKey;
+        HFSExtentDescriptor *srcRec;
+		size_t recordSize;	/* Size of the data part of the record, or node number for index nodes */
+        
+        if (srcDesc->kind == kBTIndexNode)
+        	recordSize = sizeof(u_int32_t);
+        else
+        	recordSize = sizeof(HFSExtentDescriptor);
+
+        for (i = 0; i < srcDesc->numRecords; i++) {
+        	/* Point to the start of the record we're currently checking. */
+            srcKey = (HFSExtentKey *)((char *)src->buffer + srcOffs[i]);
+
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+			nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+			/*
+			 * Make sure the key and data are within the buffer.  Since both key
+			 * and data are fixed size, this is relatively easy.  Note that this
+			 * relies on the keyLength being a constant; we verify the keyLength
+			 * below.
+			 */
+			if ((char *)srcKey + sizeof(HFSExtentKey) + recordSize > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				} else {
+					printf("hfs_swap_HFSBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				}
+				return fsBTInvalidNodeErr;
+			}
+			
+            /* Don't swap srcKey->keyLength (it's only one byte), but do sanity check it */
+            if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				} else {
+					printf("hfs_swap_HFSBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				}
+				return fsBTInvalidNodeErr;
+            }
+
+            /* Don't swap srcKey->forkType; it's only one byte */
+
+            srcKey->fileID			= SWAP_BE32 (srcKey->fileID);
+            srcKey->startBlock		= SWAP_BE16 (srcKey->startBlock);
+
+            /* Point to record data (round up to even byte boundary) */
+            srcRec = (HFSExtentDescriptor *)((char *)srcKey + ((srcKey->keyLength + 2) & ~1));
+    
+            if (srcDesc->kind == kBTIndexNode) {
+            	/* For index nodes, the record data is just a child node number. */
+                *((u_int32_t *)srcRec) = SWAP_BE32 (*((u_int32_t *)srcRec));
+            } else {
+				/* Swap the extent data */
+				for (j = 0; j < kHFSExtentDensity; j++) {
+					srcRec[j].startBlock	= SWAP_BE16 (srcRec[j].startBlock);
+					srcRec[j].blockCount	= SWAP_BE16 (srcRec[j].blockCount);
+				}
+            }
+        }
+        
+    } else if (fileID == kHFSCatalogFileID) {
+        HFSCatalogKey *srcKey;
+        int16_t *srcPtr;
+        unsigned expectedKeyLength;
+
+        for (i = 0; i < srcDesc->numRecords; i++) {
+        	/* Point to the start of the record we're currently checking. */
+            srcKey = (HFSCatalogKey *)((char *)src->buffer + srcOffs[i]);
+
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+			nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+			/*
+			 * Make sure we can safely dereference the keyLength and parentID fields.
+			 * The value 8 below is 1 bytes for keyLength + 1 byte reserved + 4 bytes
+			 * for parentID + 1 byte for nodeName's length + 1 byte to round up the
+			 * record start to an even offset, which forms a minimal key.
+			 */
+			if ((char *)srcKey + 8 > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				} else {
+					printf("hfs_swap_HFSBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+				}
+				return fsBTInvalidNodeErr;
+			}
+			
+            /* Don't swap srcKey->keyLength (it's only one byte), but do sanity check it */
+            if (srcKey->keyLength < kHFSCatalogKeyMinimumLength || srcKey->keyLength > kHFSCatalogKeyMaximumLength) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				} else {
+					printf("hfs_swap_HFSBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+				}
+				return fsBTInvalidNodeErr;
+            }
+            
+            /* Don't swap srcKey->reserved */
+
+            srcKey->parentID			= SWAP_BE32 (srcKey->parentID);
+
+            /* Don't swap srcKey->nodeName */
+            
+			/* Make sure the keyLength is big enough for the key's content */
+			if (srcDesc->kind == kBTIndexNode)
+				expectedKeyLength = sizeof(*srcKey) - sizeof(srcKey->keyLength);
+			else
+				expectedKeyLength = srcKey->nodeName[0] + kHFSCatalogKeyMinimumLength;
+            if (srcKey->keyLength < expectedKeyLength) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSBTInternalNode: catalog record #%d keyLength=%u expected=%u\n",
+						srcDesc->numRecords-i, srcKey->keyLength, expectedKeyLength);
+				} else {
+					printf("hfs_swap_HFSBTInternalNode: catalog record #%d keyLength=%u expected=%u\n",
+						srcDesc->numRecords-i, srcKey->keyLength, expectedKeyLength);
+				}
+				return fsBTInvalidNodeErr;
+            }
+
+            /* Point to record data (round up to even byte boundary) */
+            srcPtr = (int16_t *)((char *)srcKey + ((srcKey->keyLength + 2) & ~1));
+            
+            /*
+             * Make sure that we can safely dereference the record's type field or
+             * and index node's child node number.
+             */
+            if ((char *)srcPtr + sizeof(u_int32_t) > nextRecord) {
+				if (direction == kSwapBTNodeHostToBig) {
+					panic("hfs_swap_HFSBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1);
+				} else {
+					printf("hfs_swap_HFSBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1);
+				}
+				return fsBTInvalidNodeErr;
+            }
+            
+            /* 
+             * For index nodes, the record data is just the child's node number.
+             * Skip over swapping the various types of catalog record.
+             */
+            if (srcDesc->kind == kBTIndexNode) {
+                *((u_int32_t *)srcPtr) = SWAP_BE32 (*((u_int32_t *)srcPtr));
+                continue;
+            }
+    
+            /* Make sure the recordType is in native order before using it. */
+            if (direction == kSwapBTNodeBigToHost)
+            	srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+            
+            if (srcPtr[0] == kHFSFolderRecord) {
+                HFSCatalogFolder *srcRec = (HFSCatalogFolder *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+                }
+                
+                srcRec->flags				= SWAP_BE16 (srcRec->flags);
+                srcRec->valence				= SWAP_BE16 (srcRec->valence);
+                
+                srcRec->folderID			= SWAP_BE32 (srcRec->folderID);
+                srcRec->createDate			= SWAP_BE32 (srcRec->createDate);
+                srcRec->modifyDate			= SWAP_BE32 (srcRec->modifyDate);
+                srcRec->backupDate			= SWAP_BE32 (srcRec->backupDate);
+    
+                /* Don't swap srcRec->userInfo */
+                /* Don't swap srcRec->finderInfo */
+                /* Don't swap resserved array */
+    
+            } else if (srcPtr[0] == kHFSFileRecord) {
+                HFSCatalogFile *srcRec = (HFSCatalogFile *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+                }
+                
+                srcRec->flags				= srcRec->flags;
+                srcRec->fileType			= srcRec->fileType;
+    
+                /* Don't swap srcRec->userInfo */
+    
+                srcRec->fileID				= SWAP_BE32 (srcRec->fileID);
+                
+                srcRec->dataStartBlock		= SWAP_BE16 (srcRec->dataStartBlock);
+                srcRec->dataLogicalSize		= SWAP_BE32 (srcRec->dataLogicalSize);
+                srcRec->dataPhysicalSize	= SWAP_BE32 (srcRec->dataPhysicalSize);
+                
+                srcRec->rsrcStartBlock		= SWAP_BE16 (srcRec->rsrcStartBlock);
+                srcRec->rsrcLogicalSize		= SWAP_BE32 (srcRec->rsrcLogicalSize);
+                srcRec->rsrcPhysicalSize	= SWAP_BE32 (srcRec->rsrcPhysicalSize);
+                
+                srcRec->createDate			= SWAP_BE32 (srcRec->createDate);
+                srcRec->modifyDate			= SWAP_BE32 (srcRec->modifyDate);
+                srcRec->backupDate			= SWAP_BE32 (srcRec->backupDate);
+    
+                /* Don't swap srcRec->finderInfo */
+    
+                srcRec->clumpSize			= SWAP_BE16 (srcRec->clumpSize);
+                
+                /* Swap the two sets of extents as an array of six (three each) u_int16_t */
+                for (j = 0; j < kHFSExtentDensity * 2; j++) {
+                    srcRec->dataExtents[j].startBlock	= SWAP_BE16 (srcRec->dataExtents[j].startBlock);
+                    srcRec->dataExtents[j].blockCount	= SWAP_BE16 (srcRec->dataExtents[j].blockCount);
+                }
+    
+                /* Don't swap srcRec->reserved */
+                
+            } else if ((srcPtr[0] == kHFSFolderThreadRecord) ||
+                    (srcPtr[0] == kHFSFileThreadRecord)) {
+                HFSCatalogThread *srcRec = (HFSCatalogThread *)srcPtr;
+                
+                /* Make sure there is room for parentID and name length */
+                if ((char *) &srcRec->nodeName[1] > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+                }
+    
+                /* Don't swap srcRec->reserved array */
+    
+                srcRec->parentID			= SWAP_BE32 (srcRec->parentID);
+    
+                /* Don't swap srcRec->nodeName */
+                
+    			/* Make sure there is room for the name in the buffer */
+                if ((char *) &srcRec->nodeName[srcRec->nodeName[0]] > nextRecord) {
+					if (direction == kSwapBTNodeHostToBig) {
+						panic("hfs_swap_HFSBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1);
+					} else {
+						printf("hfs_swap_HFSBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1);
+					}
+					return fsBTInvalidNodeErr;
+                }
+            } else {
+				if (direction == kSwapBTNodeHostToBig) {
+            		panic("hfs_swap_HFSBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1);
+				} else {
+            		printf("hfs_swap_HFSBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1);
+				}
+				return fsBTInvalidNodeErr;
+            }
+    
+            /* We can swap the record type now that we're done using it */
+            if (direction == kSwapBTNodeHostToBig)
+            	srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+        }
+        
+    } else {
+        panic ("hfs_swap_HFSBTInternalNode: fileID %u is not a system B-tree\n", fileID);
+    }
+
+    return (0);
+}
+#endif
+
diff --git a/core/hfs_endian.h b/core/hfs_endian.h
new file mode 100644
index 0000000..a916319
--- /dev/null
+++ b/core/hfs_endian.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000, 2002-2003, 2005-2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef __HFS_ENDIAN_H__
+#define __HFS_ENDIAN_H__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+/*
+ * hfs_endian.h
+ *
+ * This file prototypes endian swapping routines for the HFS/HFS Plus
+ * volume format.
+ */
+#include "hfs.h"
+#include "BTreesInternal.h"
+#include <libkern/OSByteOrder.h>
+
+/*********************/
+/* BIG ENDIAN Macros */
+/*********************/
+#define SWAP_BE16(__a) 							OSSwapBigToHostInt16 (__a)
+#define SWAP_BE32(__a) 							OSSwapBigToHostInt32 (__a)
+#define SWAP_BE64(__a) 							OSSwapBigToHostInt64 (__a)
+
+#if BYTE_ORDER == BIG_ENDIAN
+    
+    /* HFS is always big endian, no swapping needed */
+    #define SWAP_HFS_PLUS_FORK_DATA(__a)
+
+/************************/
+/* LITTLE ENDIAN Macros */
+/************************/
+#elif BYTE_ORDER == LITTLE_ENDIAN
+
+    #define SWAP_HFS_PLUS_FORK_DATA(__a)			hfs_swap_HFSPlusForkData ((__a))
+
+#else
+#warning Unknown byte order
+#error
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Constants for the "unswap" argument to hfs_swap_BTNode:
+ */
+enum HFSBTSwapDirection {
+	kSwapBTNodeBigToHost		=	0,
+	kSwapBTNodeHostToBig		=	1,
+
+	/*
+	 * kSwapBTNodeHeaderRecordOnly is used to swap just the header record
+	 * of a header node from big endian (on disk) to host endian (in memory).
+	 * It does not swap the node descriptor (forward/backward links, record
+	 * count, etc.).  It assumes the header record is at offset 0x000E.
+	 *
+	 * Since HFS Plus doesn't have fixed B-tree node sizes, we have to read
+	 * the header record to determine the actual node size for that tree
+	 * before we can set up the B-tree control block.  We read it initially
+	 * as 512 bytes, then re-read it once we know the correct node size.  Since
+	 * we may not have read the entire header node the first time, we can't
+	 * swap the record offsets, other records, or do most sanity checks.
+	 */
+	kSwapBTNodeHeaderRecordOnly	=	3
+};
+
+int  hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction, 
+	u_int8_t allow_empty_node);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* __HFS_FORMAT__ */
diff --git a/core/hfs_extents.c b/core/hfs_extents.c
new file mode 100644
index 0000000..ce4154d
--- /dev/null
+++ b/core/hfs_extents.c
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2014-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#if HFS_EXTENTS_TEST
+
+#include "../tests/hfs_extents_test.h"
+#include "hfs_extents.h"
+
+#else
+
+#include "hfs_extents.h"
+
+// In this file, group refers to a set of 8 extents
+
+static uint32_t hfs_total_blocks(const HFSPlusExtentDescriptor *ext, int count);
+static errno_t hfs_ext_iter_next_group(struct hfs_ext_iter *iter);
+static errno_t hfs_ext_iter_update(struct hfs_ext_iter *iter,
+								   HFSPlusExtentDescriptor *extents,
+								   int count,
+								   HFSPlusExtentRecord cat_extents);
+static errno_t hfs_ext_iter_check_group(hfs_ext_iter_t *iter);
+
+#endif
+
+#define CHECK(x, var, goto_label)									\
+	do {															\
+		var = (x);													\
+		if (var) {													\
+			printf("%s:%u error: %d\n", __func__, __LINE__, var);	\
+			goto goto_label;										\
+		}															\
+	} while (0)
+
+#define min(a,b) \
+	({ typeof (a) _a = (a); typeof (b) _b = (b); _a < _b ? _a : _b; })
+
+static __attribute__((pure))
+const HFSPlusExtentKey *hfs_ext_iter_key(const hfs_ext_iter_t *iter)
+{
+	return (const HFSPlusExtentKey *)&iter->bt_iter.key;
+}
+
+static __attribute__((pure))
+HFSPlusExtentKey *hfs_ext_iter_key_mut(hfs_ext_iter_t *iter)
+{
+	return (HFSPlusExtentKey *)&iter->bt_iter.key;
+}
+
+// Returns the total number of blocks for the @count extents provided
+uint32_t hfs_total_blocks(const HFSPlusExtentDescriptor *extents, int count)
+{
+	uint32_t block_count = 0;
+	for (int i = 0; i < count; ++i)
+		block_count += extents[i].blockCount;
+	return block_count;
+}
+
+/*
+ * Checks a group of extents: makes sure that if it's the last group
+ * for a fork, that all the remaining extents are properly zeroed and
+ * if it's not then checks that all extents are set.  This also sets
+ * @group_block_count and @last_in_fork.  Returns ESTALE if
+ * inconsistent.
+ */
+errno_t hfs_ext_iter_check_group(hfs_ext_iter_t *iter)
+{
+	filefork_t *ff = VTOF(iter->vp);
+	const HFSPlusExtentKey *key = hfs_ext_iter_key(iter);
+	uint32_t count = 0;
+	int i;
+
+	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+		if (!iter->group[i].blockCount)
+			break;
+		count += iter->group[i].blockCount;
+	}
+
+	if (i < kHFSPlusExtentDensity) {
+		iter->last_in_fork = true;
+		if (key->startBlock + count != ff_allocblocks(ff))
+			goto bad;
+
+		// Check remainder of extents
+		for (++i; i < kHFSPlusExtentDensity; ++i) {
+			if (iter->group[i].blockCount)
+				goto bad;
+		}
+	} else {
+		if (key->startBlock + count > ff_allocblocks(ff))
+			goto bad;
+
+		iter->last_in_fork = (key->startBlock + count == ff_allocblocks(ff));
+	}
+
+	iter->group_block_count = count;
+
+	return 0;
+
+bad:
+
+#if DEBUG
+	printf("hfs_ext_iter_check_group: bad group; start: %u, total blocks: %u\n",
+		   key->startBlock, ff_allocblocks(ff));
+
+	for (int j = 0; j < kHFSPlusExtentDensity; ++j) {
+		printf("%s<%u, %u>", j ? ", " : "",
+			   iter->group[j].startBlock, iter->group[j].blockCount);
+	}
+
+	printf("\n");
+#endif
+
+	return ESTALE;
+}
+
+// NOTE: doesn't copy group data
+static void hfs_ext_iter_copy(const hfs_ext_iter_t *src, hfs_ext_iter_t *dst)
+{
+	dst->vp = src->vp;
+	memcpy(&dst->bt_iter.key, &src->bt_iter.key, sizeof(HFSPlusExtentKey));
+
+	dst->file_block = src->file_block;
+	dst->ndx = src->ndx;
+
+	dst->bt_iter.hint			= src->bt_iter.hint;
+	dst->bt_iter.version		= 0;
+	dst->bt_iter.reserved		= 0;
+	dst->bt_iter.hitCount		= 0;
+	dst->bt_iter.maxLeafRecs	= 0;
+}
+
+bool hfs_ext_iter_is_catalog_extents(hfs_ext_iter_t *iter)
+{
+	return hfs_ext_iter_key(iter)->startBlock == 0;
+}
+
+#if !HFS_EXTENTS_TEST
+
+/*
+ * Finds the extent for offset.  It might be in the catalog or the extents
+ * file.
+ */
+errno_t hfs_ext_find(vnode_t vp, off_t offset, hfs_ext_iter_t *iter)
+{
+	errno_t ret;
+	hfsmount_t *hfsmp = VTOHFS(vp);
+
+	iter->vp = vp;
+
+	uint32_t end_block, index;
+	HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter);
+
+	filefork_t *ff = VTOF(vp);
+
+	CHECK(SearchExtentFile(hfsmp, ff, offset,
+						   key, iter->group, &index,
+						   &iter->bt_iter.hint.nodeNum, &end_block), ret, exit);
+
+	iter->ndx = index;
+	iter->file_block = end_block - iter->group[index].blockCount;
+
+	if (!key->keyLength) {
+		// We're pointing at the catalog record extents so fix up the key
+		key->keyLength	= kHFSPlusExtentKeyMaximumLength;
+		key->forkType	= (VNODE_IS_RSRC(iter->vp)
+						   ? kHFSResourceForkType : kHFSDataForkType);
+		key->pad		= 0;
+		key->fileID		= VTOC(iter->vp)->c_fileid;
+		key->startBlock = 0;
+	}
+
+	CHECK(hfs_ext_iter_check_group(iter), ret, exit);
+
+	ret = 0;
+
+exit:
+
+	return MacToVFSError(ret);
+}
+
+static uint32_t hfs_ext_iter_next_group_block(const hfs_ext_iter_t *iter)
+{
+	const HFSPlusExtentKey *key = hfs_ext_iter_key(iter);
+
+	return key->startBlock + iter->group_block_count;
+}
+
+/*
+ * Move the iterator to the next group.  Don't call if there's a chance
+ * there is no entry; the caller should check last_in_fork instead.
+ */
+static errno_t hfs_ext_iter_next_group(hfs_ext_iter_t *iter)
+{
+	errno_t ret;
+	hfsmount_t *hfsmp = VTOHFS(iter->vp);
+	filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork;
+	HFSPlusExtentKey *key = hfs_ext_iter_key_mut(iter);
+	const bool catalog_extents = hfs_ext_iter_is_catalog_extents(iter);
+	const uint32_t next_block = hfs_ext_iter_next_group_block(iter);
+
+	FSBufferDescriptor fbd = {
+		.bufferAddress = &iter->group,
+		.itemCount = 1,
+		.itemSize = sizeof(iter->group)
+	};
+
+	if (catalog_extents) {
+		key->startBlock = next_block;
+
+		CHECK(BTSearchRecord(tree, &iter->bt_iter, &fbd, NULL,
+							 &iter->bt_iter), ret, exit);
+	} else {
+		const uint32_t	 file_id = key->fileID;
+		const uint8_t 	 fork_type = key->forkType;
+
+		CHECK(BTIterateRecord(tree, kBTreeNextRecord, &iter->bt_iter,
+							  &fbd, NULL), ret, exit);
+
+		if (key->fileID != file_id
+			|| key->forkType != fork_type
+			|| key->startBlock != next_block) {
+			// This indicates an inconsistency
+			ret = ESTALE;
+			goto exit;
+		}
+	}
+
+	iter->file_block = key->startBlock;
+	iter->ndx = 0;
+
+	CHECK(hfs_ext_iter_check_group(iter), ret, exit);
+
+	ret = 0;
+
+exit:
+
+	return MacToVFSError(ret);
+}
+
+/*
+ * Updates with the extents provided and sets the key up for the next group.
+ * It is assumed that any previous record that might collide has been deleted.
+ * NOTE: @extents must point to a buffer that can be zero padded to multiple
+ * of 8 extents.
+ */
+errno_t hfs_ext_iter_update(hfs_ext_iter_t *iter,
+							HFSPlusExtentDescriptor *extents,
+							int count,
+							HFSPlusExtentRecord cat_extents)
+{
+	errno_t				 ret;
+	hfsmount_t			*hfsmp	= VTOHFS(iter->vp);
+	cnode_t				*cp		= VTOC(iter->vp);
+	HFSPlusExtentKey	*key	= hfs_ext_iter_key_mut(iter);
+	int					 ndx	= 0;
+
+	if (!extents)
+		extents = iter->group;
+
+	if (count % kHFSPlusExtentDensity) {
+		// Zero out last group
+		bzero(&extents[count], (kHFSPlusExtentDensity
+								- (count % 8)) * sizeof(*extents));
+	}
+
+	if (hfs_ext_iter_is_catalog_extents(iter)) {
+		// Caller is responsible for in-memory updates
+
+		if (cat_extents)
+			hfs_ext_copy_rec(extents, cat_extents);
+
+		struct cat_fork fork;
+
+		hfs_fork_copy(&fork, &VTOF(iter->vp)->ff_data, extents);
+		hfs_prepare_fork_for_update(VTOF(iter->vp), &fork, &fork, hfsmp->blockSize);
+
+		bool is_rsrc = VNODE_IS_RSRC(iter->vp);
+		CHECK(cat_update(hfsmp, &cp->c_desc, &cp->c_attr,
+						 is_rsrc ? NULL : &fork,
+						 is_rsrc ? &fork : NULL), ret, exit);
+
+		// Set the key to the next group
+		key->startBlock = hfs_total_blocks(extents, kHFSPlusExtentDensity);
+
+		ndx += 8;
+	}
+
+	// Deal with the remainder which must be overflow extents
+	for (; ndx < count; ndx += 8) {
+		filefork_t * const tree = hfsmp->hfs_extents_cp->c_datafork;
+
+		FSBufferDescriptor fbd = {
+			.bufferAddress = &extents[ndx],
+			.itemCount = 1,
+			.itemSize = sizeof(HFSPlusExtentRecord)
+		};
+
+		CHECK(BTInsertRecord(tree, &iter->bt_iter, &fbd,
+							 sizeof(HFSPlusExtentRecord)), ret, exit);
+
+		// Set the key to the next group
+		key->startBlock += hfs_total_blocks(&extents[ndx], kHFSPlusExtentDensity);
+	}
+
+	ret = 0;
+
+exit:
+
+	return ret;
+}
+
+#endif // !HFS_EXTENTS_TEST
+
+static void push_ext(HFSPlusExtentDescriptor *extents, int *count,
+					 const HFSPlusExtentDescriptor *ext)
+{
+	if (!ext->blockCount)
+		return;
+
+	if (*count && hfs_ext_end(&extents[*count - 1]) == ext->startBlock)
+		extents[*count - 1].blockCount += ext->blockCount;
+	else
+		extents[(*count)++] = *ext;
+}
+
+/*
+ * NOTE: Here we rely on the replacement extents not being too big as
+ * otherwise the number of BTree records that we have to delete could be
+ * too large.
+ */
+errno_t hfs_ext_replace(hfsmount_t *hfsmp, vnode_t vp,
+						uint32_t file_block,
+						const HFSPlusExtentDescriptor *repl,
+						int repl_count,
+						HFSPlusExtentRecord catalog_extents)
+{
+	errno_t						 ret;
+	filefork_t * const			 tree = hfsmp->hfs_extents_cp->c_datafork;
+	hfs_ext_iter_t				*iter_in = NULL, *iter_out;
+	HFSPlusExtentDescriptor		*extents = NULL;
+	int							 buffered_extents = 0;
+	const int					 max_roll_back_extents = 16384; // 128k
+	HFSPlusExtentDescriptor		*roll_back_extents = NULL;
+	int							 roll_back_count = 0;
+	const uint32_t				 end_file_block = file_block + hfs_total_blocks(repl, repl_count);
+	filefork_t					*ff = VTOF(vp);
+	uint32_t					 start_group_block = 0, block = 0;
+
+	// Indicate we haven't touched catalog extents
+	catalog_extents[0].blockCount = 0;
+
+	if (end_file_block > ff_allocblocks(ff))
+		return EINVAL;
+
+	iter_in = hfs_malloc(sizeof(*iter_in) * 2);
+	iter_out = iter_in + 1;
+	HFSPlusExtentKey *key_in = hfs_ext_iter_key_mut(iter_in);
+
+	// Get to where we want to start
+	off_t offset = hfs_blk_to_bytes(file_block, hfsmp->blockSize);
+
+	/*
+	 * If the replacement is at the start of a group, we want to pull in the
+	 * group before so that we tidy up any padding that we might have done
+	 * in a prior hfs_ext_replace call.
+	 */
+	if (offset > 0)
+		--offset;
+
+	CHECK(hfs_ext_find(vp, offset, iter_in), ret, exit);
+
+	start_group_block = key_in->startBlock;
+
+	roll_back_extents = hfs_malloc(max_roll_back_extents
+								   * sizeof(HFSPlusExtentDescriptor));
+
+	// Move to the first extent in this group
+	iter_in->ndx = 0;
+
+	hfs_ext_iter_copy(iter_in, iter_out);
+
+	// Create a buffer for our extents
+	buffered_extents = roundup(3 * kHFSPlusExtentDensity + repl_count,
+							   kHFSPlusExtentDensity);
+	extents = hfs_malloc(sizeof(*extents) * buffered_extents);
+	int count = 0;
+
+	/*
+	 * Iterate through the extents that are affected by this replace operation.
+	 * We cannot push more than 16 + repl_count extents here; 8 for the group
+	 * containing the replacement start, repl_count for the replacements and 8
+	 * for the group containing the end.  If we went back a group due to
+	 * decrementing the offset above, it's still the same because we know in 
+	 * that case the replacement starts at the beginning of the next group.
+	 */
+	block = start_group_block;
+	for (;;) {
+		if (!iter_in->ndx) {
+			hfs_ext_copy_rec(iter_in->group, &roll_back_extents[roll_back_count]);
+			roll_back_count += kHFSPlusExtentDensity;
+
+			if (!hfs_ext_iter_is_catalog_extents(iter_in)) {
+				// Delete this extent group; we're going to replace it
+				CHECK(BTDeleteRecord(tree, &iter_in->bt_iter), ret, exit);
+			}
+		}
+
+		HFSPlusExtentDescriptor *ext = &iter_in->group[iter_in->ndx];
+		if (!ext->blockCount) {
+		    /*
+			 * We ran out of existing extents so we just write the
+			 * extents and we're done.
+			 */
+			goto finish;
+		}
+
+		// If the current extent does not overlap replacement...
+		if (block + ext->blockCount <= file_block || block >= end_file_block) {
+			// Keep the current extent exactly as it is
+			push_ext(extents, &count, ext);
+		} else {
+			HFSPlusExtentDescriptor dealloc_ext = *ext;
+
+			if (block <= file_block) {
+				/*
+				 * The middle or tail of the current extent overlaps
+				 * the replacement extents.  Keep the non-overlapping
+				 * head of the current extent.
+				 */
+				uint32_t trimmed_len = file_block - block;
+
+				if (trimmed_len) {
+					// Push (keep) non-overlapping head of current extent
+					push_ext(extents, &count,
+							 &(HFSPlusExtentDescriptor){ ext->startBlock,
+								 trimmed_len });
+
+					/*
+					 * Deallocate the part of the current extent that
+					 * overlaps the replacement extents.  That starts
+					 * at @file_block.  For now, assume it goes
+					 * through the end of the current extent.  (If the
+					 * current extent extends beyond the end of the
+					 * replacement extents, we'll update the
+					 * blockCount below.)
+					 */
+					dealloc_ext.startBlock += trimmed_len;
+					dealloc_ext.blockCount -= trimmed_len;
+				}
+
+				// Insert the replacements
+				for (int i = 0; i < repl_count; ++i)
+					push_ext(extents, &count, &repl[i]);
+			}
+
+			if (block + ext->blockCount > end_file_block) {
+				/*
+				 * The head or middle of the current extent overlaps
+				 * the replacement extents.  Keep the non-overlapping
+				 * tail of the current extent.
+				 */
+				uint32_t overlap = end_file_block - block;
+
+				// Push (keep) non-overlapping tail of current extent
+				push_ext(extents, &count,
+						 &(HFSPlusExtentDescriptor){ ext->startBlock + overlap,
+							 ext->blockCount - overlap });
+
+				/*
+				 * Deallocate the part of current extent that overlaps
+				 * the replacements.
+				 */
+				dealloc_ext.blockCount = (ext->startBlock + overlap
+										  - dealloc_ext.startBlock);
+			}
+
+			CHECK(BlockDeallocate(hfsmp, dealloc_ext.startBlock,
+								  dealloc_ext.blockCount, 0), ret, exit);
+		}
+
+		// Move to next (existing) extent from iterator
+		block += ext->blockCount;
+
+		if (++iter_in->ndx >= kHFSPlusExtentDensity) {
+			if (block >= end_file_block) {
+				if (iter_in->last_in_fork || !(count % kHFSPlusExtentDensity)) {
+					/*
+					 * This is the easy case.  We've hit the end or we have a 
+					 * multiple of 8, so we can just write out the extents we 
+					 * have and it should all fit within a transaction.
+					 */
+
+					goto finish;
+				}
+
+				if (count + kHFSPlusExtentDensity > buffered_extents
+					|| (roll_back_count
+						+ kHFSPlusExtentDensity > max_roll_back_extents)) {
+					/*
+					 * We've run out of room for the next group, so drop out
+					 * and take a different strategy.
+					 */
+					break;
+				}
+			}
+
+			CHECK(hfs_ext_iter_next_group(iter_in), ret, exit);
+		}
+	} // for (;;)
+
+	/*
+	 * We're not at the end so we need to try and pad to a multiple of 8
+	 * so that we don't have to touch all the subsequent records.  We pad
+	 * by stealing single blocks.
+	 */
+
+	int stop_at = 0;
+
+	for (;;) {
+		// @in points to the record we're stealing from
+		int in = count - 1;
+
+		count = roundup(count, kHFSPlusExtentDensity);
+
+		// @out is where we put the stolen single blocks
+		int out = count - 1;
+
+		do {
+			if (out <= in) {
+				// We suceeded in padding; we're done
+				goto finish;
+			}
+
+			/*
+			 * "Steal" a block, or move a one-block extent within the
+			 * @extents array.
+			 *
+			 * If the extent we're "stealing" from (@in) is only one
+			 * block long, we'll end up copying it to @out, setting
+			 * @in's blockCount to zero, and decrementing @in.  So, we
+			 * either split a multi-block extent; or move it within
+			 * the @extents array.
+			 */
+			extents[out].blockCount = 1;
+			extents[out].startBlock = (extents[in].startBlock
+									   + extents[in].blockCount - 1);
+			--out;
+		} while (--extents[in].blockCount || --in >= stop_at);
+
+		// We ran out of extents
+		if (roll_back_count + kHFSPlusExtentDensity > max_roll_back_extents) {
+			ret = ENOSPC;
+			goto exit;
+		}
+
+		// Need to shift extents starting at out + 1
+		++out;
+		memmove(&extents[stop_at], &extents[out],
+				(count - out) * sizeof(*extents));
+		count -= out - stop_at;
+
+		// Pull in the next group
+		CHECK(hfs_ext_iter_next_group(iter_in), ret, exit);
+
+		// Take a copy of these extents for roll back purposes
+		hfs_ext_copy_rec(iter_in->group, &roll_back_extents[roll_back_count]);
+		roll_back_count += kHFSPlusExtentDensity;
+
+		// Delete this group; we're going to replace it
+		CHECK(BTDeleteRecord(tree, &iter_in->bt_iter), ret, exit);
+
+		if (iter_in->last_in_fork) {
+			// Great!  We've hit the end.  Coalesce and write out.
+			int old_count = count;
+			count = 0;
+
+			/*
+			 * First coalesce the extents we already have.  Takes
+			 * advantage of push_ext coalescing the input extent with
+			 * the last extent in @extents.  If the extents are not
+			 * contiguous, then this just copies the extents over
+			 * themselves and sets @count back to @old_count.
+			 */
+			for (int i = 0; i < old_count; ++i)
+				push_ext(extents, &count, &extents[i]);
+
+			// Make room if necessary
+			const int flush_count = buffered_extents - kHFSPlusExtentDensity;
+			if (count > flush_count) {
+				CHECK(hfs_ext_iter_update(iter_out, extents,
+										  flush_count, catalog_extents), ret, exit);
+
+				memmove(&extents[0], &extents[flush_count],
+						(count - flush_count) * sizeof(*extents));
+
+				count -= flush_count;
+			}
+
+			// Add in the extents we just read in
+			for (int i = 0; i < kHFSPlusExtentDensity; ++i) {
+				HFSPlusExtentDescriptor *ext = &iter_in->group[i];
+				if (!ext->blockCount)
+					break;
+				push_ext(extents, &count, ext);
+			}
+
+			goto finish;
+		} // if (iter_in->last_in_fork)
+
+		/*
+		 * Otherwise, we're not at the end, so we add these extents and then
+		 * try and pad out again to a multiple of 8.  We start by making room.
+		 */
+		if (count > buffered_extents - kHFSPlusExtentDensity) {
+			// Only write out one group here
+			CHECK(hfs_ext_iter_update(iter_out, extents,
+									  kHFSPlusExtentDensity,
+									  catalog_extents), ret, exit);
+
+			memmove(&extents[0], &extents[kHFSPlusExtentDensity],
+					(count - kHFSPlusExtentDensity) * sizeof(*extents));
+
+			count -= kHFSPlusExtentDensity;
+		}
+
+		// Record where to stop when padding above
+		stop_at = count;
+
+		// Copy in the new extents
+		hfs_ext_copy_rec(iter_in->group, &extents[count]);
+		count += kHFSPlusExtentDensity;
+	} // for (;;)
+
+finish:
+
+	// Write the remaining extents
+	CHECK(hfs_ext_iter_update(iter_out, extents, count,
+							  catalog_extents), ret, exit);
+
+	CHECK(BTFlushPath(hfsmp->hfs_catalog_cp->c_datafork), ret, exit);
+	CHECK(BTFlushPath(hfsmp->hfs_extents_cp->c_datafork), ret, exit);
+
+exit:
+
+	if (ret && roll_back_count) {
+
+#define RB_FAILED														\
+	do {																\
+		printf("hfs_ext_replace:%u: roll back failed\n", __LINE__);		\
+		hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);				\
+		goto roll_back_failed;											\
+	} while (0)
+
+		// First delete any groups we inserted
+		HFSPlusExtentKey *key_out = hfs_ext_iter_key_mut(iter_out);
+
+		key_in->startBlock = start_group_block;
+		if (!key_in->startBlock && key_out->startBlock > key_in->startBlock) {
+			key_in->startBlock += hfs_total_blocks(catalog_extents,
+												   kHFSPlusExtentDensity);
+		}
+
+		if (key_out->startBlock > key_in->startBlock) {
+			FSBufferDescriptor fbd = {
+				.bufferAddress = &iter_in->group,
+				.itemCount = 1,
+				.itemSize = sizeof(iter_in->group)
+			};
+
+			if (BTSearchRecord(tree, &iter_in->bt_iter, &fbd, NULL,
+							   &iter_in->bt_iter)) {
+				RB_FAILED;
+			}
+
+			for (;;) {
+				if (BTDeleteRecord(tree, &iter_in->bt_iter))
+					RB_FAILED;
+
+				key_in->startBlock += hfs_total_blocks(iter_in->group,
+													   kHFSPlusExtentDensity);
+
+				if (key_in->startBlock >= key_out->startBlock)
+					break;
+
+				if (BTSearchRecord(tree, &iter_in->bt_iter, &fbd, NULL,
+								   &iter_in->bt_iter)) {
+					RB_FAILED;
+				}
+			}
+		}
+
+		// Position iter_out
+		key_out->startBlock = start_group_block;
+
+		// Roll back all the extents
+		if (hfs_ext_iter_update(iter_out, roll_back_extents, roll_back_count,
+								catalog_extents)) {
+			RB_FAILED;
+		}
+
+		// And we need to reallocate the blocks we deallocated
+		const uint32_t end_block = min(block, end_file_block);
+		block = start_group_block;
+		for (int i = 0; i < roll_back_count && block < end_block; ++i) {
+			HFSPlusExtentDescriptor *ext = &roll_back_extents[i];
+
+			if (block + ext->blockCount <= file_block)
+				continue;
+
+			HFSPlusExtentDescriptor alloc_ext = *ext;
+
+			if (block <= file_block) {
+				uint32_t trimmed_len = file_block - block;
+
+				alloc_ext.startBlock += trimmed_len;
+				alloc_ext.blockCount -= trimmed_len;
+			}
+
+			if (block + ext->blockCount > end_file_block) {
+				uint32_t overlap = end_file_block - block;
+
+				alloc_ext.blockCount = (ext->startBlock + overlap
+										- alloc_ext.startBlock);
+			}
+
+			if (hfs_block_alloc(hfsmp, &alloc_ext, HFS_ALLOC_ROLL_BACK, NULL))
+				RB_FAILED;
+
+			block += ext->blockCount;
+		}
+
+		if (BTFlushPath(hfsmp->hfs_catalog_cp->c_datafork)
+			|| BTFlushPath(hfsmp->hfs_extents_cp->c_datafork)) {
+			RB_FAILED;
+		}
+	} // if (ret && roll_back_count)
+
+roll_back_failed:
+
+	hfs_free(iter_in, sizeof(*iter_in) * 2);
+	hfs_free(extents, sizeof(*extents) * buffered_extents);
+	hfs_free(roll_back_extents, (max_roll_back_extents
+								 * sizeof(HFSPlusExtentDescriptor)));
+
+	return MacToVFSError(ret);
+}
diff --git a/core/hfs_extents.h b/core/hfs_extents.h
new file mode 100644
index 0000000..fce2e49
--- /dev/null
+++ b/core/hfs_extents.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2014-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef HFS_EXTENTS_H_
+#define HFS_EXTENTS_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "hfs_format.h"
+
+#if !HFS_EXTENTS_TEST && !HFS_ALLOC_TEST
+#include "hfs_cnode.h"
+#include "hfs.h"
+#include "BTreesInternal.h"
+#endif
+
+typedef struct hfs_ext_iter {
+	struct vnode		   *vp;			// If NULL, this is an xattr extent
+	BTreeIterator			bt_iter;
+	uint8_t					ndx;		// Index in group
+	bool					last_in_fork;
+	uint32_t				file_block;
+	uint32_t				group_block_count;
+	HFSPlusExtentRecord		group;
+} hfs_ext_iter_t;
+
+errno_t hfs_ext_find(vnode_t vp, off_t offset, hfs_ext_iter_t *iter);
+
+errno_t hfs_ext_replace(hfsmount_t *hfsmp, vnode_t vp,
+						uint32_t file_block,
+						const HFSPlusExtentDescriptor *repl,
+						int count,
+						HFSPlusExtentRecord catalog_extents);
+
+bool hfs_ext_iter_is_catalog_extents(hfs_ext_iter_t *iter);
+
+static inline void hfs_ext_copy_rec(const HFSPlusExtentRecord src,
+									HFSPlusExtentRecord dst)
+{
+	memcpy(dst, src, sizeof(HFSPlusExtentRecord));
+}
+
+static inline uint32_t hfs_ext_end(const HFSPlusExtentDescriptor *ext)
+{
+	return ext->startBlock + ext->blockCount;
+}
+
+#endif // HFS_EXTENTS_H_
diff --git a/core/hfs_format.h b/core/hfs_format.h
new file mode 100644
index 0000000..89df0dc
--- /dev/null
+++ b/core/hfs_format.h
@@ -0,0 +1,818 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef __HFS_FORMAT__
+#define __HFS_FORMAT__
+
+#include <sys/types.h>
+#include <sys/appleapiopts.h>
+#include "hfs_unistr.h"
+
+/*
+ * hfs_format.h
+ *
+ * This file describes the on-disk format for HFS and HFS Plus volumes.
+ *
+ * Note: Starting 10.9, definition of struct HFSUniStr255 exists in hfs_unitstr.h
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* some on-disk hfs structures have 68K alignment (misaligned) */
+
+/* Signatures used to differentiate between HFS and HFS Plus volumes */
+enum {
+	kHFSSigWord		= 0x4244,	/* 'BD' in ASCII */
+	kHFSPlusSigWord		= 0x482B,	/* 'H+' in ASCII */
+	kHFSXSigWord		= 0x4858,	/* 'HX' in ASCII */
+
+	kHFSPlusVersion		= 0x0004,	/* 'H+' volumes are version 4 only */
+	kHFSXVersion		= 0x0005,	/* 'HX' volumes start with version 5 */
+
+	kHFSPlusMountVersion	= 0x31302E30,	/* '10.0' for Mac OS X */
+	kHFSJMountVersion	= 0x4846534a,	/* 'HFSJ' for journaled HFS+ on OS X */
+	kFSKMountVersion	= 0x46534b21	/* 'FSK!' for failed journal replay */
+};
+
+
+#ifdef __APPLE_API_PRIVATE
+/*
+ * Mac OS X has two special directories on HFS+ volumes for hardlinked files
+ * and hardlinked directories as well as for open-unlinked files.
+ *
+ * These directories and their contents are not exported from the filesystem
+ * under Mac OS X.
+ */
+#define HFSPLUSMETADATAFOLDER       "\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80HFS+ Private Data"
+#define HFSPLUS_DIR_METADATA_FOLDER ".HFS+ Private Directory Data\xd"
+
+/*
+ * Files in the "HFS+ Private Data" folder have one of the following prefixes
+ * followed by a decimal number (no leading zeros) for the file ID.
+ *
+ * Note: Earlier version of Mac OS X used a 32 bit random number for the link
+ * ref number instead of the file id.
+ *
+ * e.g.  iNode7182000 and temp3296
+ */
+#define HFS_INODE_PREFIX	"iNode"
+#define HFS_DELETE_PREFIX	"temp"
+
+/*
+ * Files in the ".HFS+ Private Directory Data" folder have the following 
+ * prefix followed by a decimal number (no leading zeros) for the file ID.
+ *
+ * e.g. dir_555
+ */
+#define HFS_DIRINODE_PREFIX	"dir_"
+
+/*
+ * Hardlink inodes save the head of the link chain in
+ * an extended attribute named FIRST_LINK_XATTR_NAME.
+ * The attribute data is the decimal value in ASCII
+ * of the cnid for the first link in the chain.
+ *
+ * This extended attribute is private (i.e. its not
+ * exported in the getxattr/listxattr POSIX APIs).
+ */
+#define FIRST_LINK_XATTR_NAME	"com.apple.system.hfs.firstlink"
+#define FIRST_LINK_XATTR_REC_SIZE (sizeof(HFSPlusAttrData) - 2 + 12)
+
+/*
+ * The name space ID for generating an HFS volume UUID
+ *
+ * B3E20F39-F292-11D6-97A4-00306543ECAC
+ */
+#define HFS_UUID_NAMESPACE_ID  "\xB3\xE2\x0F\x39\xF2\x92\x11\xD6\x97\xA4\x00\x30\x65\x43\xEC\xAC"
+
+#endif /* __APPLE_API_PRIVATE */
+
+/*
+ * Indirect link files (hard links) have the following type/creator.
+ */
+enum {
+	kHardLinkFileType = 0x686C6E6B,  /* 'hlnk' */
+	kHFSPlusCreator   = 0x6866732B   /* 'hfs+' */
+};
+
+
+/*
+ *	File type and creator for symbolic links
+ */
+enum {
+      kSymLinkFileType  = 0x736C6E6B, /* 'slnk' */
+      kSymLinkCreator   = 0x72686170  /* 'rhap' */
+};
+
+
+enum {
+	kHFSMaxVolumeNameChars		= 27,
+	kHFSMaxFileNameChars		= 31,
+	kHFSPlusMaxFileNameChars	= 255
+};
+
+
+/* Extent overflow file data structures */
+
+/* HFS Extent key */
+struct HFSExtentKey {
+	u_int8_t 	keyLength;	/* length of key, excluding this field */
+	u_int8_t 	forkType;	/* 0 = data fork, FF = resource fork */
+	u_int32_t 	fileID;		/* file ID */
+	u_int16_t 	startBlock;	/* first file allocation block number in this extent */
+} __attribute__((aligned(2), packed));
+typedef struct HFSExtentKey HFSExtentKey;
+
+/* HFS Plus Extent key */
+struct HFSPlusExtentKey {
+	u_int16_t 	keyLength;		/* length of key, excluding this field */
+	u_int8_t 	forkType;		/* 0 = data fork, FF = resource fork */
+	u_int8_t 	pad;			/* make the other fields align on 32-bit boundary */
+	u_int32_t 	fileID;			/* file ID */
+	u_int32_t 	startBlock;		/* first file allocation block number in this extent */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusExtentKey HFSPlusExtentKey;
+
+/* Number of extent descriptors per extent record */
+enum {
+	kHFSExtentDensity	= 3,
+	kHFSPlusExtentDensity	= 8
+};
+
+/* HFS extent descriptor */
+struct HFSExtentDescriptor {
+	u_int16_t 	startBlock;		/* first allocation block */
+	u_int16_t 	blockCount;		/* number of allocation blocks */
+} __attribute__((aligned(2), packed));
+typedef struct HFSExtentDescriptor HFSExtentDescriptor;
+
+/* HFS Plus extent descriptor */
+struct HFSPlusExtentDescriptor {
+	u_int32_t 	startBlock;		/* first allocation block */
+	u_int32_t 	blockCount;		/* number of allocation blocks */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusExtentDescriptor HFSPlusExtentDescriptor;
+
+/* HFS extent record */
+typedef HFSExtentDescriptor HFSExtentRecord[3];
+
+/* HFS Plus extent record */
+typedef HFSPlusExtentDescriptor HFSPlusExtentRecord[8];
+
+
+/* Finder information */
+struct FndrFileInfo {
+	u_int32_t 	fdType;		/* file type */
+	u_int32_t 	fdCreator;	/* file creator */
+	u_int16_t 	fdFlags;	/* Finder flags */
+	struct {
+	    int16_t	v;		/* file's location */
+	    int16_t	h;
+	} fdLocation;
+	int16_t 	opaque;
+} __attribute__((aligned(2), packed));
+typedef struct FndrFileInfo FndrFileInfo;
+
+struct FndrDirInfo {
+	struct {			/* folder's window rectangle */
+	    int16_t	top;
+	    int16_t	left;
+	    int16_t	bottom;
+	    int16_t	right;
+	} frRect;
+	unsigned short 	frFlags;	/* Finder flags */
+	struct {
+	    u_int16_t	v;		/* folder's location */
+	    u_int16_t	h;
+	} frLocation;
+	int16_t 	opaque;
+} __attribute__((aligned(2), packed));
+typedef struct FndrDirInfo FndrDirInfo;
+
+struct FndrOpaqueInfo {
+	int8_t opaque[16];
+} __attribute__((aligned(2), packed));
+typedef struct FndrOpaqueInfo FndrOpaqueInfo;
+
+struct FndrExtendedDirInfo {
+	u_int32_t document_id;
+	u_int32_t date_added;
+	u_int16_t extended_flags;
+	u_int16_t reserved3;
+	u_int32_t write_gen_counter;
+} __attribute__((aligned(2), packed));
+
+struct FndrExtendedFileInfo {
+	u_int32_t document_id;
+	u_int32_t date_added;
+	u_int16_t extended_flags;
+	u_int16_t reserved2;
+	u_int32_t write_gen_counter;
+} __attribute__((aligned(2), packed));
+
+/* HFS Plus Fork data info - 80 bytes */
+struct HFSPlusForkData {
+	u_int64_t 		logicalSize;	/* fork's logical size in bytes */
+	u_int32_t 		clumpSize;	/* fork's clump size in bytes */
+	u_int32_t 		totalBlocks;	/* total blocks used by this fork */
+	HFSPlusExtentRecord 	extents;	/* initial set of extents */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusForkData HFSPlusForkData;
+
+
+/* Mac OS X has 16 bytes worth of "BSD" info.
+ *
+ * Note:  Mac OS 9 implementations and applications
+ * should preserve, but not change, this information.
+ */
+struct HFSPlusBSDInfo {
+	u_int32_t 	ownerID;	/* user-id of owner or hard link chain previous link */
+	u_int32_t 	groupID;	/* group-id of owner or hard link chain next link */
+	u_int8_t 	adminFlags;	/* super-user changeable flags */
+	u_int8_t 	ownerFlags;	/* owner changeable flags */
+	u_int16_t 	fileMode;	/* file type and permission bits */
+	union {
+	    u_int32_t	iNodeNum;	/* indirect node number (hard links only) */
+	    u_int32_t	linkCount;	/* links that refer to this indirect node */
+	    u_int32_t	rawDevice;	/* special file device (FBLK and FCHR only) */
+	} special;
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusBSDInfo HFSPlusBSDInfo;
+
+/*
+ * Hardlink "links" resolve to an inode
+ * and the actual uid/gid comes from that
+ * inode.
+ *
+ * We repurpose the links's uid/gid fields
+ * for the hardlink link chain. The chain
+ * consists of a doubly linked list of file
+ * ids.
+ */
+ 
+#define hl_firstLinkID     reserved1         /* Valid only if HasLinkChain flag is set (indirect nodes only) */
+
+#define hl_prevLinkID      bsdInfo.ownerID   /* Valid only if HasLinkChain flag is set */
+#define hl_nextLinkID      bsdInfo.groupID   /* Valid only if HasLinkChain flag is set */
+
+#define hl_linkReference   bsdInfo.special.iNodeNum
+#define hl_linkCount       bsdInfo.special.linkCount
+
+
+/* Catalog file data structures */
+
+enum {
+	kHFSRootParentID		= 1,	/* Parent ID of the root folder */
+	kHFSRootFolderID		= 2,	/* Folder ID of the root folder */
+	kHFSExtentsFileID		= 3,	/* File ID of the extents file */
+	kHFSCatalogFileID		= 4,	/* File ID of the catalog file */
+	kHFSBadBlockFileID		= 5,	/* File ID of the bad allocation block file */
+	kHFSAllocationFileID		= 6,	/* File ID of the allocation file (HFS Plus only) */
+	kHFSStartupFileID		= 7,	/* File ID of the startup file (HFS Plus only) */
+	kHFSAttributesFileID		= 8,	/* File ID of the attribute file (HFS Plus only) */
+	kHFSAttributeDataFileID         = 13,	/* Used in Mac OS X runtime for extent based attributes */
+	                                        /* kHFSAttributeDataFileID is never stored on disk. */
+	kHFSRepairCatalogFileID		= 14,	/* Used when rebuilding Catalog B-tree */
+	kHFSBogusExtentFileID		= 15,	/* Used for exchanging extents in extents file */
+	kHFSFirstUserCatalogNodeID	= 16
+};
+
+/* HFS catalog key */
+struct HFSCatalogKey {
+	u_int8_t 	keyLength;		/* key length (in bytes) */
+	u_int8_t 	reserved;		/* reserved (set to zero) */
+	u_int32_t 	parentID;		/* parent folder ID */
+	u_int8_t 	nodeName[kHFSMaxFileNameChars + 1]; /* catalog node name */
+} __attribute__((aligned(2), packed));
+typedef struct HFSCatalogKey HFSCatalogKey;
+
+/* HFS Plus catalog key */
+struct HFSPlusCatalogKey {
+	u_int16_t 		keyLength;	/* key length (in bytes) */
+	u_int32_t 		parentID;	/* parent folder ID */
+	HFSUniStr255 		nodeName;	/* catalog node name */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusCatalogKey HFSPlusCatalogKey;
+
+/* Catalog record types */
+enum {
+	/* HFS Catalog Records */
+	kHFSFolderRecord		= 0x0100,	/* Folder record */
+	kHFSFileRecord			= 0x0200,	/* File record */
+	kHFSFolderThreadRecord		= 0x0300,	/* Folder thread record */
+	kHFSFileThreadRecord		= 0x0400,	/* File thread record */
+
+	/* HFS Plus Catalog Records */
+	kHFSPlusFolderRecord		= 1,		/* Folder record */
+	kHFSPlusFileRecord		= 2,		/* File record */
+	kHFSPlusFolderThreadRecord	= 3,		/* Folder thread record */
+	kHFSPlusFileThreadRecord	= 4		/* File thread record */
+};
+
+
+/* Catalog file record flags */
+enum {
+	kHFSFileLockedBit	= 0x0000,	/* file is locked and cannot be written to */
+	kHFSFileLockedMask	= 0x0001,
+
+	kHFSThreadExistsBit	= 0x0001,	/* a file thread record exists for this file */
+	kHFSThreadExistsMask	= 0x0002,
+
+	kHFSHasAttributesBit	= 0x0002,	/* object has extended attributes */
+	kHFSHasAttributesMask	= 0x0004,
+
+	kHFSHasSecurityBit	= 0x0003,	/* object has security data (ACLs) */
+	kHFSHasSecurityMask	= 0x0008,
+
+	kHFSHasFolderCountBit	= 0x0004,	/* only for HFSX, folder maintains a separate sub-folder count */
+	kHFSHasFolderCountMask	= 0x0010,	/* (sum of folder records and directory hard links) */
+
+	kHFSHasLinkChainBit	= 0x0005,	/* has hardlink chain (inode or link) */
+	kHFSHasLinkChainMask	= 0x0020,
+
+	kHFSHasChildLinkBit	= 0x0006,	/* folder has a child that's a dir link */
+	kHFSHasChildLinkMask	= 0x0040,
+
+	kHFSHasDateAddedBit     = 0x0007,	/* File/Folder has the date-added stored in the finder info. */
+	kHFSHasDateAddedMask    = 0x0080, 
+
+	kHFSFastDevPinnedBit    = 0x0008,       /* this file has been pinned to the fast-device by the hot-file code on cooperative fusion */
+	kHFSFastDevPinnedMask   = 0x0100,
+
+	kHFSDoNotFastDevPinBit  = 0x0009,       /* this file can not be pinned to the fast-device */
+	kHFSDoNotFastDevPinMask = 0x0200,
+
+	kHFSFastDevCandidateBit  = 0x000a,      /* this item is a potential candidate for fast-dev pinning (as are any of its descendents */
+	kHFSFastDevCandidateMask = 0x0400,
+
+	kHFSAutoCandidateBit     = 0x000b,      /* this item was automatically marked as a fast-dev candidate by the kernel */
+	kHFSAutoCandidateMask    = 0x0800
+
+	// There are only 4 flag bits remaining: 0x1000, 0x2000, 0x4000, 0x8000
+
+};
+
+
+/* HFS catalog folder record - 70 bytes */
+struct HFSCatalogFolder {
+	int16_t 		recordType;		/* == kHFSFolderRecord */
+	u_int16_t 		flags;			/* folder flags */
+	u_int16_t 		valence;		/* folder valence */
+	u_int32_t		folderID;		/* folder ID */
+	u_int32_t 		createDate;		/* date and time of creation */
+	u_int32_t 		modifyDate;		/* date and time of last modification */
+	u_int32_t 		backupDate;		/* date and time of last backup */
+	FndrDirInfo 		userInfo;		/* Finder information */
+	FndrOpaqueInfo		finderInfo;		/* additional Finder information */
+	u_int32_t 		reserved[4];		/* reserved - initialized as zero */
+} __attribute__((aligned(2), packed));
+typedef struct HFSCatalogFolder HFSCatalogFolder;
+
+/* HFS Plus catalog folder record - 88 bytes */
+struct HFSPlusCatalogFolder {
+	int16_t 		recordType;		/* == kHFSPlusFolderRecord */
+	u_int16_t 		flags;			/* file flags */
+	u_int32_t 		valence;		/* folder's item count */
+	u_int32_t 		folderID;		/* folder ID */
+	u_int32_t 		createDate;		/* date and time of creation */
+	u_int32_t 		contentModDate;		/* date and time of last content modification */
+	u_int32_t 		attributeModDate;	/* date and time of last attribute modification */
+	u_int32_t 		accessDate;		/* date and time of last access (MacOS X only) */
+	u_int32_t 		backupDate;		/* date and time of last backup */
+	HFSPlusBSDInfo		bsdInfo;		/* permissions (for MacOS X) */
+	FndrDirInfo 		userInfo;		/* Finder information */
+	FndrOpaqueInfo	 	finderInfo;		/* additional Finder information */
+	u_int32_t 		textEncoding;		/* hint for name conversions */
+	u_int32_t 		folderCount;		/* number of enclosed folders, active when HasFolderCount is set */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusCatalogFolder HFSPlusCatalogFolder;
+
+/* HFS catalog file record - 102 bytes */
+struct HFSCatalogFile {
+	int16_t 		recordType;		/* == kHFSFileRecord */
+	u_int8_t 		flags;			/* file flags */
+	int8_t 			fileType;		/* file type (unused ?) */
+	FndrFileInfo 		userInfo;		/* Finder information */
+	u_int32_t 		fileID;			/* file ID */
+	u_int16_t 		dataStartBlock;		/* not used - set to zero */
+	int32_t 		dataLogicalSize;	/* logical EOF of data fork */
+	int32_t 		dataPhysicalSize;	/* physical EOF of data fork */
+	u_int16_t		rsrcStartBlock;		/* not used - set to zero */
+	int32_t			rsrcLogicalSize;	/* logical EOF of resource fork */
+	int32_t			rsrcPhysicalSize;	/* physical EOF of resource fork */
+	u_int32_t		createDate;		/* date and time of creation */
+	u_int32_t		modifyDate;		/* date and time of last modification */
+	u_int32_t		backupDate;		/* date and time of last backup */
+	FndrOpaqueInfo		finderInfo;		/* additional Finder information */
+	u_int16_t		clumpSize;		/* file clump size (not used) */
+	HFSExtentRecord		dataExtents;		/* first data fork extent record */
+	HFSExtentRecord		rsrcExtents;		/* first resource fork extent record */
+	u_int32_t		reserved;		/* reserved - initialized as zero */
+} __attribute__((aligned(2), packed));
+typedef struct HFSCatalogFile HFSCatalogFile;
+
+/* HFS Plus catalog file record - 248 bytes */
+struct HFSPlusCatalogFile {
+	int16_t 		recordType;		/* == kHFSPlusFileRecord */
+	u_int16_t 		flags;			/* file flags */
+	u_int32_t 		reserved1;		/* reserved - initialized as zero */
+	u_int32_t 		fileID;			/* file ID */
+	u_int32_t 		createDate;		/* date and time of creation */
+	u_int32_t 		contentModDate;		/* date and time of last content modification */
+	u_int32_t 		attributeModDate;	/* date and time of last attribute modification */
+	u_int32_t 		accessDate;		/* date and time of last access (MacOS X only) */
+	u_int32_t 		backupDate;		/* date and time of last backup */
+	HFSPlusBSDInfo 		bsdInfo;		/* permissions (for MacOS X) */
+	FndrFileInfo 		userInfo;		/* Finder information */
+	FndrOpaqueInfo	 	finderInfo;		/* additional Finder information */
+	u_int32_t 		textEncoding;		/* hint for name conversions */
+	u_int32_t 		reserved2;		/* reserved - initialized as zero */
+
+	/* Note: these start on double long (64 bit) boundary */
+	HFSPlusForkData 	dataFork;		/* size and block data for data fork */
+	HFSPlusForkData 	resourceFork;		/* size and block data for resource fork */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusCatalogFile HFSPlusCatalogFile;
+
+/* HFS catalog thread record - 46 bytes */
+struct HFSCatalogThread {
+	int16_t 	recordType;		/* == kHFSFolderThreadRecord or kHFSFileThreadRecord */
+	int32_t 	reserved[2];		/* reserved - initialized as zero */
+	u_int32_t 	parentID;		/* parent ID for this catalog node */
+	u_int8_t 	nodeName[kHFSMaxFileNameChars + 1]; /* name of this catalog node */
+} __attribute__((aligned(2), packed));
+typedef struct HFSCatalogThread HFSCatalogThread;
+
+/* HFS Plus catalog thread record -- 264 bytes */
+struct HFSPlusCatalogThread {
+	int16_t 	recordType;		/* == kHFSPlusFolderThreadRecord or kHFSPlusFileThreadRecord */
+	int16_t 	reserved;		/* reserved - initialized as zero */
+	u_int32_t 	parentID;		/* parent ID for this catalog node */
+	HFSUniStr255 	nodeName;		/* name of this catalog node (variable length) */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusCatalogThread HFSPlusCatalogThread;
+
+#ifdef __APPLE_API_UNSTABLE
+/*
+ * 	These are the types of records in the attribute B-tree.  The values were
+ * 	chosen so that they wouldn't conflict with the catalog record types.
+ */
+enum {
+	kHFSPlusAttrInlineData	= 0x10,   /* attributes whose data fits in a b-tree node */
+	kHFSPlusAttrForkData	= 0x20,   /* extent based attributes (data lives in extents) */
+	kHFSPlusAttrExtents	= 0x30    /* overflow extents for large attributes */
+};
+
+
+/*
+ *  	HFSPlusAttrForkData
+ * 	For larger attributes, whose value is stored in allocation blocks.
+ * 	If the attribute has more than 8 extents, there will be additional
+ * 	records (of type HFSPlusAttrExtents) for this attribute.
+ */
+struct HFSPlusAttrForkData {
+	u_int32_t 	recordType;		/* == kHFSPlusAttrForkData*/
+	u_int32_t 	reserved;
+	HFSPlusForkData theFork;		/* size and first extents of value*/
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusAttrForkData HFSPlusAttrForkData;
+
+/*
+ * 	HFSPlusAttrExtents
+ * 	This record contains information about overflow extents for large,
+ * 	fragmented attributes.
+ */
+struct HFSPlusAttrExtents {
+	u_int32_t 		recordType;	/* == kHFSPlusAttrExtents*/
+	u_int32_t 		reserved;
+	HFSPlusExtentRecord	extents;	/* additional extents*/
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusAttrExtents HFSPlusAttrExtents;
+
+/*
+ * Atrributes B-tree Data Record
+ *
+ * For small attributes, whose entire value is stored
+ * within a single B-tree record.
+ */
+struct HFSPlusAttrData {
+	u_int32_t    recordType;   /* == kHFSPlusAttrInlineData */
+	u_int32_t    reserved[2];
+	u_int32_t    attrSize;     /* size of attribute data in bytes */
+	u_int8_t     attrData[2];  /* variable length */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusAttrData HFSPlusAttrData;
+
+
+/* HFSPlusAttrInlineData is obsolete use HFSPlusAttrData instead */
+struct HFSPlusAttrInlineData {
+	u_int32_t 	recordType;
+	u_int32_t 	reserved;
+	u_int32_t 	logicalSize;
+	u_int8_t 	userData[2];
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusAttrInlineData HFSPlusAttrInlineData;
+
+
+/* A generic Attribute Record */
+union HFSPlusAttrRecord {
+	u_int32_t 		recordType;
+	HFSPlusAttrInlineData 	inlineData;   /* NOT USED */
+	HFSPlusAttrData 	attrData;
+	HFSPlusAttrForkData 	forkData;
+	HFSPlusAttrExtents 	overflowExtents;
+};
+typedef union HFSPlusAttrRecord HFSPlusAttrRecord;
+
+/* Attribute key */
+enum { kHFSMaxAttrNameLen = 127 };
+struct HFSPlusAttrKey {
+	u_int16_t     keyLength;       /* key length (in bytes) */
+	u_int16_t     pad;	       /* set to zero */
+	u_int32_t     fileID;          /* file associated with attribute */
+	u_int32_t     startBlock;      /* first allocation block number for extents */
+	u_int16_t     attrNameLen;     /* number of unicode characters */
+	u_int16_t     attrName[kHFSMaxAttrNameLen];   /* attribute name (Unicode) */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusAttrKey HFSPlusAttrKey;
+
+#define kHFSPlusAttrKeyMaximumLength   (sizeof(HFSPlusAttrKey) - sizeof(u_int16_t))
+#define kHFSPlusAttrKeyMinimumLength   (kHFSPlusAttrKeyMaximumLength - kHFSMaxAttrNameLen*sizeof(u_int16_t))
+
+#endif /* __APPLE_API_UNSTABLE */
+
+
+/* Key and node lengths */
+enum {
+	kHFSPlusExtentKeyMaximumLength = sizeof(HFSPlusExtentKey) - sizeof(u_int16_t),
+	kHFSExtentKeyMaximumLength	= sizeof(HFSExtentKey) - sizeof(u_int8_t),
+	kHFSPlusCatalogKeyMaximumLength = sizeof(HFSPlusCatalogKey) - sizeof(u_int16_t),
+	kHFSPlusCatalogKeyMinimumLength = kHFSPlusCatalogKeyMaximumLength - sizeof(HFSUniStr255) + sizeof(u_int16_t),
+	kHFSCatalogKeyMaximumLength	= sizeof(HFSCatalogKey) - sizeof(u_int8_t),
+	kHFSCatalogKeyMinimumLength	= kHFSCatalogKeyMaximumLength - (kHFSMaxFileNameChars + 1) + sizeof(u_int8_t),
+	kHFSPlusCatalogMinNodeSize	= 4096,
+	kHFSPlusExtentMinNodeSize	= 512,
+	kHFSPlusAttrMinNodeSize		= 4096
+};
+
+/* HFS and HFS Plus volume attribute bits */
+enum {
+	/* Bits 0-6 are reserved (always cleared by MountVol call) */
+	kHFSVolumeHardwareLockBit	= 7,		/* volume is locked by hardware */
+	kHFSVolumeUnmountedBit		= 8,		/* volume was successfully unmounted */
+	kHFSVolumeSparedBlocksBit	= 9,		/* volume has bad blocks spared */
+	kHFSVolumeNoCacheRequiredBit = 10,		/* don't cache volume blocks (i.e. RAM or ROM disk) */
+	kHFSBootVolumeInconsistentBit = 11,		/* boot volume is inconsistent (System 7.6 and later) */
+	kHFSCatalogNodeIDsReusedBit = 12,
+	kHFSVolumeJournaledBit = 13,			/* this volume has a journal on it */
+	kHFSVolumeInconsistentBit = 14,			/* serious inconsistencies detected at runtime */
+	kHFSVolumeSoftwareLockBit	= 15,		/* volume is locked by software */
+	/*
+	 * HFS only has 16 bits of attributes in the MDB, but HFS Plus has 32 bits.
+	 * Therefore, bits 16-31 can only be used on HFS Plus.
+	 */
+	kHFSUnusedNodeFixBit = 31,				/* Unused nodes in the Catalog B-tree have been zero-filled.  See Radar #6947811. */
+	kHFSContentProtectionBit = 30,			/* Volume has per-file content protection */
+
+	/***  Keep these in sync with the bits above ! ****/
+	kHFSVolumeHardwareLockMask		= 0x00000080,
+	kHFSVolumeUnmountedMask			= 0x00000100,
+	kHFSVolumeSparedBlocksMask		= 0x00000200,
+	kHFSVolumeNoCacheRequiredMask 	= 0x00000400,
+	kHFSBootVolumeInconsistentMask	= 0x00000800,
+	kHFSCatalogNodeIDsReusedMask 	= 0x00001000,
+	kHFSVolumeJournaledMask			= 0x00002000,
+	kHFSVolumeInconsistentMask 		= 0x00004000,
+	kHFSVolumeSoftwareLockMask		= 0x00008000,
+	
+	/* Bits 16-31 are allocated from high to low */
+
+	kHFSContentProtectionMask 		= 0x40000000,
+	kHFSUnusedNodeFixMask 			= 0x80000000,
+	
+	kHFSMDBAttributesMask			= 0x8380
+};
+
+enum {
+	kHFSUnusedNodesFixDate = 0xc5ef2480		/* March 25, 2009 */
+};
+
+/* HFS Master Directory Block - 162 bytes */
+/* Stored at sector #2 (3rd sector) and second-to-last sector. */
+struct HFSMasterDirectoryBlock {
+	u_int16_t 		drSigWord;	/* == kHFSSigWord */
+	u_int32_t 		drCrDate;	/* date and time of volume creation */
+	u_int32_t 		drLsMod;	/* date and time of last modification */
+	u_int16_t 		drAtrb;		/* volume attributes */
+	u_int16_t 		drNmFls;	/* number of files in root folder */
+	u_int16_t 		drVBMSt;	/* first block of volume bitmap */
+	u_int16_t 		drAllocPtr;	/* start of next allocation search */
+	u_int16_t 		drNmAlBlks;	/* number of allocation blocks in volume */
+	u_int32_t 		drAlBlkSiz;	/* size (in bytes) of allocation blocks */
+	u_int32_t 		drClpSiz;	/* default clump size */
+	u_int16_t 		drAlBlSt;	/* first allocation block in volume */
+	u_int32_t 		drNxtCNID;	/* next unused catalog node ID */
+	u_int16_t 		drFreeBks;	/* number of unused allocation blocks */
+	u_int8_t 		drVN[kHFSMaxVolumeNameChars + 1];  /* volume name */
+	u_int32_t 		drVolBkUp;	/* date and time of last backup */
+	u_int16_t 		drVSeqNum;	/* volume backup sequence number */
+	u_int32_t 		drWrCnt;	/* volume write count */
+	u_int32_t 		drXTClpSiz;	/* clump size for extents overflow file */
+	u_int32_t 		drCTClpSiz;	/* clump size for catalog file */
+	u_int16_t 		drNmRtDirs;	/* number of directories in root folder */
+	u_int32_t 		drFilCnt;	/* number of files in volume */
+	u_int32_t 		drDirCnt;	/* number of directories in volume */
+	u_int32_t 		drFndrInfo[8];	/* information used by the Finder */
+	u_int16_t 		drEmbedSigWord;	/* embedded volume signature (formerly drVCSize) */
+	HFSExtentDescriptor	drEmbedExtent;	/* embedded volume location and size (formerly drVBMCSize and drCtlCSize) */
+	u_int32_t		drXTFlSize;	/* size of extents overflow file */
+	HFSExtentRecord		drXTExtRec;	/* extent record for extents overflow file */
+	u_int32_t 		drCTFlSize;	/* size of catalog file */
+	HFSExtentRecord 	drCTExtRec;	/* extent record for catalog file */
+} __attribute__((aligned(2), packed));
+typedef struct HFSMasterDirectoryBlock	HFSMasterDirectoryBlock;
+
+
+#ifdef __APPLE_API_UNSTABLE
+#define SET_HFS_TEXT_ENCODING(hint)  \
+	(0x656e6300 | ((hint) & 0xff))
+#define GET_HFS_TEXT_ENCODING(hint)  \
+	(((hint) & 0xffffff00) == 0x656e6300 ? (hint) & 0x000000ff : 0xffffffffU)
+#endif /* __APPLE_API_UNSTABLE */
+
+
+/* HFS Plus Volume Header - 512 bytes */
+/* Stored at sector #2 (3rd sector) and second-to-last sector. */
+struct HFSPlusVolumeHeader {
+	u_int16_t 	signature;		/* == kHFSPlusSigWord */
+	u_int16_t 	version;		/* == kHFSPlusVersion */
+	u_int32_t 	attributes;		/* volume attributes */
+	u_int32_t 	lastMountedVersion;	/* implementation version which last mounted volume */
+	u_int32_t 	journalInfoBlock;	/* block addr of journal info (if volume is journaled, zero otherwise) */
+
+	u_int32_t 	createDate;		/* date and time of volume creation */
+	u_int32_t 	modifyDate;		/* date and time of last modification */
+	u_int32_t 	backupDate;		/* date and time of last backup */
+	u_int32_t 	checkedDate;		/* date and time of last disk check */
+
+	u_int32_t 	fileCount;		/* number of files in volume */
+	u_int32_t 	folderCount;		/* number of directories in volume */
+
+	u_int32_t 	blockSize;		/* size (in bytes) of allocation blocks */
+	u_int32_t 	totalBlocks;		/* number of allocation blocks in volume (includes this header and VBM*/
+	u_int32_t 	freeBlocks;		/* number of unused allocation blocks */
+
+	u_int32_t 	nextAllocation;		/* start of next allocation search */
+	u_int32_t 	rsrcClumpSize;		/* default resource fork clump size */
+	u_int32_t 	dataClumpSize;		/* default data fork clump size */
+	u_int32_t 	nextCatalogID;		/* next unused catalog node ID */
+
+	u_int32_t 	writeCount;		/* volume write count */
+	u_int64_t 	encodingsBitmap;	/* which encodings have been use  on this volume */
+
+	u_int8_t 	finderInfo[32];		/* information used by the Finder */
+
+	HFSPlusForkData	 allocationFile;	/* allocation bitmap file */
+	HFSPlusForkData  extentsFile;		/* extents B-tree file */
+	HFSPlusForkData  catalogFile;		/* catalog B-tree file */
+	HFSPlusForkData  attributesFile;	/* extended attributes B-tree file */
+	HFSPlusForkData	 startupFile;		/* boot file (secondary loader) */
+} __attribute__((aligned(2), packed));
+typedef struct HFSPlusVolumeHeader HFSPlusVolumeHeader;
+
+
+/* B-tree structures */
+
+enum BTreeKeyLimits{
+	kMaxKeyLength	= 520
+};
+
+union BTreeKey{
+	u_int8_t	length8;
+	u_int16_t	length16;
+	u_int8_t	rawData [kMaxKeyLength+2];
+};
+typedef union BTreeKey BTreeKey;
+
+/* BTNodeDescriptor -- Every B-tree node starts with these fields. */
+struct BTNodeDescriptor {
+	u_int32_t	fLink;			/* next node at this level*/
+	u_int32_t 	bLink;			/* previous node at this level*/
+	int8_t 		kind;			/* kind of node (leaf, index, header, map)*/
+	u_int8_t 	height;			/* zero for header, map; child is one more than parent*/
+	u_int16_t 	numRecords;		/* number of records in this node*/
+	u_int16_t 	reserved;		/* reserved - initialized as zero */
+} __attribute__((aligned(2), packed));
+typedef struct BTNodeDescriptor BTNodeDescriptor;
+
+/* Constants for BTNodeDescriptor kind */
+enum {
+	kBTLeafNode	= -1,
+	kBTIndexNode	= 0,
+	kBTHeaderNode	= 1,
+	kBTMapNode	= 2
+};
+
+/* BTHeaderRec -- The first record of a B-tree header node */
+struct BTHeaderRec {
+	u_int16_t	treeDepth;		/* maximum height (usually leaf nodes) */
+	u_int32_t 	rootNode;		/* node number of root node */
+	u_int32_t 	leafRecords;		/* number of leaf records in all leaf nodes */
+	u_int32_t 	firstLeafNode;		/* node number of first leaf node */
+	u_int32_t 	lastLeafNode;		/* node number of last leaf node */
+	u_int16_t 	nodeSize;		/* size of a node, in bytes */
+	u_int16_t 	maxKeyLength;		/* reserved */
+	u_int32_t 	totalNodes;		/* total number of nodes in tree */
+	u_int32_t 	freeNodes;		/* number of unused (free) nodes in tree */
+	u_int16_t 	reserved1;		/* unused */
+	u_int32_t 	clumpSize;		/* reserved */
+	u_int8_t 	btreeType;		/* reserved */
+	u_int8_t 	keyCompareType;		/* Key string Comparison Type */
+	u_int32_t 	attributes;		/* persistent attributes about the tree */
+	u_int32_t 	reserved3[16];		/* reserved */
+} __attribute__((aligned(2), packed));
+typedef struct BTHeaderRec BTHeaderRec;
+
+/* Constants for BTHeaderRec attributes */
+enum {
+	kBTBadCloseMask		 = 0x00000001,	/* reserved */
+	kBTBigKeysMask		 = 0x00000002,	/* key length field is 16 bits */
+	kBTVariableIndexKeysMask = 0x00000004	/* keys in index nodes are variable length */
+};
+
+
+/* Catalog Key Name Comparison Type */
+enum {
+	kHFSCaseFolding   = 0xCF,  /* case folding (case-insensitive) */
+	kHFSBinaryCompare = 0xBC  /* binary compare (case-sensitive) */
+};
+
+#include <uuid/uuid.h>
+
+/* JournalInfoBlock - Structure that describes where our journal lives */
+
+// the original size of the reserved field in the JournalInfoBlock was
+// 32*sizeof(u_int32_t).  To keep the total size of the structure the 
+// same we subtract the size of new fields (currently: ext_jnl_uuid and
+// machine_uuid).  If you add additional fields, place them before the
+// reserved field and subtract their size in this macro.
+//
+#define JIB_RESERVED_SIZE  ((32*sizeof(u_int32_t)) - sizeof(uuid_string_t) - 48)
+
+struct JournalInfoBlock {
+	u_int32_t	flags;
+    	u_int32_t       device_signature[8];  // signature used to locate our device.
+	u_int64_t       offset;               // byte offset to the journal on the device
+	u_int64_t       size;                 // size in bytes of the journal
+	uuid_string_t   ext_jnl_uuid;
+	char            machine_serial_num[48];
+	char    	reserved[JIB_RESERVED_SIZE];
+} __attribute__((aligned(2), packed));
+typedef struct JournalInfoBlock JournalInfoBlock;
+
+enum {
+    kJIJournalInFSMask          = 0x00000001,
+    kJIJournalOnOtherDeviceMask = 0x00000002,
+    kJIJournalNeedInitMask      = 0x00000004
+};
+
+//
+// This the content type uuid for "external journal" GPT 
+// partitions.  Each instance of a partition also has a
+// uuid that uniquely identifies that instance.
+//
+#define EXTJNL_CONTENT_TYPE_UUID "4A6F7572-6E61-11AA-AA11-00306543ECAC"
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HFS_FORMAT__ */
diff --git a/core/hfs_fsctl.h b/core/hfs_fsctl.h
new file mode 100644
index 0000000..0b89b25
--- /dev/null
+++ b/core/hfs_fsctl.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2004-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _HFS_FSCTL_H_
+#define _HFS_FSCTL_H_
+
+#include <sys/appleapiopts.h>
+
+#include <sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/time.h>
+#include <stdint.h>
+
+#ifdef __APPLE_API_UNSTABLE
+
+struct hfs_backingstoreinfo {
+	int  signature;   /* == 3419115 */
+	int  version;     /* version of this struct (1) */
+	int  backingfd;   /* disk image file (on backing fs) */
+	int  bandsize;    /* sparse disk image band size */
+};
+
+
+typedef char pathname_t[MAXPATHLEN];
+
+struct hfs_journal_info {
+	off_t	jstart;
+	off_t	jsize;
+};
+
+
+// Will be deprecated and replaced by hfs_fsinfo
+struct hfsinfo_metadata {
+	uint32_t total;
+	uint32_t extents;
+	uint32_t catalog;
+	uint32_t allocation;
+	uint32_t attribute;
+	uint32_t journal;
+	uint32_t reserved[4];
+};
+
+/*
+ * Flags for hfs_fsinfo_data structure
+ */
+#define HFS_FSINFO_CLASS_A      0x0001	/* Information for class A files requested */
+#define HFS_FSINFO_CLASS_B      0x0002	/* Information for class B files requested */
+#define HFS_FSINFO_CLASS_C      0x0004	/* Information for class C files requested */
+#define HFS_FSINFO_CLASS_D      0x0008	/* Information for class D files requested */
+
+/*
+ * Maximum number of buckets to represent range from 0 to 1TB (2^40) in
+ * increments of power of 2, and one catch-all bucket for anything that
+ * is greater than 1TB
+ */
+#define HFS_FSINFO_DATA_MAX_BUCKETS     42
+
+/*
+ * Maximum number of buckets to represents percentage range from 0 to 100
+ * in increments of 10.
+ */
+#define HFS_FSINFO_PERCENT_MAX_BUCKETS  10
+
+/*
+ * Maximum number of buckets to represent number of file/directory name characters
+ * (range 1 to 255) in increments of 5.
+ */
+#define HFS_FSINFO_NAME_MAX_BUCKETS     51
+
+/*
+ * Version number to ensure that the caller and the kernel have same understanding
+ * of the hfs_fsinfo_data structure.  This version needs to be bumped whenever the
+ * number of buckets is changed.
+ */
+#define HFS_FSINFO_VERSION              1
+
+/*
+ * hfs_fsinfo_data is generic data structure to aggregate information like sizes
+ * or counts in buckets of power of 2.  Each bucket represents a range of values
+ * that is determined based on its index in the array.  Specifically, buckets[i]
+ * represents values that are greater than or equal to 2^(i-1) and less than 2^i,
+ * except the last bucket which represents range greater than or equal to 2^(i-1)
+ *
+ * The current maximum number of buckets is 41, so we can represent range from
+ * 0 up to 1TB in increments of power of 2, and then a catch-all bucket of
+ * anything that is greater than or equal to 1TB.
+ *
+ * For example,
+ * bucket[0]  -> greater than or equal to 0 and less than 1
+ * bucket[1]  -> greater than or equal to 1 and less than 2
+ * bucket[10] -> greater than or equal to 2^(10-1) = 512 and less than 2^10 = 1024
+ * bucket[20] -> greater than or equal to 2^(20-1) = 512KB and less than 2^20 = 1MB
+ * bucket[41] -> greater than or equal to 2^(41-1) = 1TB
+ *
+ * Note that fsctls that populate this data structure can take long time to
+ * execute as this operation can be I/O intensive (traversing btrees) and compute
+ * intensive.
+ *
+ * WARNING: Any changes to this structure should also update version number to
+ * ensure that the clients and kernel are reading/writing correctly.
+ */
+
+/* 
+ * The header includes the user input fields.
+ */
+typedef struct hfs_fsinfo_header {
+	uint32_t request_type;
+	uint16_t version;
+	uint16_t flags;
+} hfs_fsinfo_header_t;
+
+struct hfs_fsinfo_data {
+	hfs_fsinfo_header_t header;
+	uint32_t			bucket[HFS_FSINFO_DATA_MAX_BUCKETS];
+};
+
+/*
+ * Structure to represent information about metadata files
+ *
+ * WARNING: Any changes to this structure should also update version number to
+ * ensure that the clients and kernel are reading/writing correctly.
+ */
+struct hfs_fsinfo_metadata {
+	hfs_fsinfo_header_t header;
+	uint32_t			extents;
+	uint32_t			catalog;
+	uint32_t			allocation;
+	uint32_t			attribute;
+	uint32_t			journal;
+};
+
+/*
+ * Structure to represent distribution of number of file name characters
+ * in increments of 5s.  Each bucket represents a range of values that is
+ * determined based on its index in the array.  So bucket[i] represents values
+ * that are greater than or equal to (i*5) and less than ((i+1)*10).
+ *
+ * Since this structure represents range of file name characters and the
+ * maximum number of unicode characters in HFS+ is 255, the maximum number
+ * of buckets will be 52 [0..51].
+ *
+ * For example,
+ * bucket[4] -> greater than or equal to 20 and less than 25 characters
+ * bucket[51] -> equal to 255 characters
+ *
+ * WARNING: Any changes to this structure should also update version number to
+ * ensure that the clients and kernel are reading/writing correctly.
+ */
+struct hfs_fsinfo_name {
+	hfs_fsinfo_header_t	header;
+	uint32_t			bucket[HFS_FSINFO_NAME_MAX_BUCKETS];
+};
+
+/*
+ * Structure to represent information about content protection classes
+ *
+ * WARNING: Any changes to this structure should also update version number to
+ * ensure that the clients and kernel are reading/writing correctly.
+ */
+struct hfs_fsinfo_cprotect {
+	hfs_fsinfo_header_t	header;
+	uint32_t class_A;
+	uint32_t class_B;
+	uint32_t class_C;
+	uint32_t class_D;
+	uint32_t class_E;
+	uint32_t class_F;
+};
+
+/*
+ * Union of all the different values returned by HFSIOC_FSINFO fsctl
+ */
+union hfs_fsinfo {
+	hfs_fsinfo_header_t			header;
+	struct hfs_fsinfo_data		data;
+	struct hfs_fsinfo_metadata	metadata;
+	struct hfs_fsinfo_name		name;
+	struct hfs_fsinfo_cprotect cprotect;
+};
+typedef union hfs_fsinfo hfs_fsinfo;
+
+/*
+ * Type of FSINFO requested, specified by the caller in request_type field
+ */
+enum {
+	/* Information about number of allocation blocks for each metadata file, returns struct hfs_fsinfo_metadata */
+	HFS_FSINFO_METADATA_BLOCKS_INFO	= 1,
+	
+	/* Information about number of extents for each metadata file, returns struct hfs_fsinfo_metadata */
+	HFS_FSINFO_METADATA_EXTENTS		= 2,
+	
+	/* Information about percentage of free nodes vs used nodes in metadata btrees, returns struct hfs_fsinfo_metadata */
+	HFS_FSINFO_METADATA_PERCENTFREE	= 3,
+	
+	/* Distribution of number of extents for data files (data fork, no rsrc fork, no xattr), returns struct hfs_fsinfo_data */
+	HFS_FSINFO_FILE_EXTENT_COUNT	= 4,
+	
+	/* Distribution of extent sizes for data files (data fork, no rsrc fork, no xattr), returns struct hfs_fsinfo_data */
+	HFS_FSINFO_FILE_EXTENT_SIZE		= 5,
+	
+	/* Distribution of file sizes for data files (data fork, no rsrc fork, no xattr), returns struct hfs_fsinfo_data */
+	HFS_FSINFO_FILE_SIZE			= 6,
+
+	/* Distribution of valence for all directories, returns struct hfs_fsinfo_data */
+	HFS_FSINFO_DIR_VALENCE			= 7,
+	
+	/* Distribution of file/directory name size in unicode characters, returns struct hfs_fsinfo_name */
+	HFS_FSINFO_NAME_SIZE			= 8,
+	
+	/* Distribution of extended attribute sizes, returns hfs_fsinfo_data */
+	HFS_FSINFO_XATTR_SIZE			= 9,
+	
+	/* Distribution of free space for the entire file system, returns struct hfs_fsinfo_data */
+	HFS_FSINFO_FREE_EXTENTS			= 10,
+
+	/* Information about number of files belonging to each class, returns hfs_fsinfo_cprotect */
+	HFS_FSINFO_FILE_CPROTECT_COUNT	= 11,
+
+	/*
+	 * Distribution of symbolic link sizes for data files (data fork, no rsrc fork, no xattr),
+	 * returns struct hfs_fsinfo_data
+	 */
+	HFS_FSINFO_SYMLINK_SIZE			= 12,
+};
+
+
+/* HFS FS CONTROL COMMANDS */
+
+#define HFSIOC_RESIZE_PROGRESS  _IOR('h', 1, u_int32_t)
+
+#define HFSIOC_RESIZE_VOLUME  _IOW('h', 2, u_int64_t)
+
+#define HFSIOC_CHANGE_NEXT_ALLOCATION  _IOWR('h', 3, u_int32_t)
+/* Magic value for next allocation to use with fcntl to set next allocation
+ * to zero and never update it again on new block allocation.
+ */
+#define HFS_NO_UPDATE_NEXT_ALLOCATION 	0xffffFFFF
+
+#if defined(KERNEL)
+#define HFSIOC_GET_VOL_CREATE_TIME_32  _IOR('h', 4, int32_t)
+#define HFSIOC_GET_VOL_CREATE_TIME_64  _IOR('h', 4, int64_t)
+#else
+#define HFSIOC_GET_VOL_CREATE_TIME  _IOR('h', 4, time_t)
+#endif /* KERNEL */
+
+#define HFSIOC_SETBACKINGSTOREINFO  _IOW('h', 7, struct hfs_backingstoreinfo)
+
+#define HFSIOC_CLRBACKINGSTOREINFO  _IO('h', 8)
+
+// 'h', 9 used to be HFSIOC_BULKACCESS which is now deprecated
+
+/* Unsupported - Previously used to enable/disable ACLs */
+#define HFSIOC_UNSUPPORTED  _IOW('h', 10, int32_t)
+
+#define HFSIOC_PREV_LINK  _IOWR('h', 11, u_int32_t)
+
+#define HFSIOC_NEXT_LINK  _IOWR('h', 12, u_int32_t)
+
+#define HFSIOC_GETPATH  _IOWR('h', 13, pathname_t)
+/* By default, the path returned by HFS_GETPATH is an absolute path, 
+ * i.e. it also contains the mount point of the volume on which the 
+ * fileID exists.  If the following bit is set, the path returned is
+ * relative to the root of the volume.
+ */
+#define HFS_GETPATH_VOLUME_RELATIVE	0x1
+
+/* Enable/disable extent-based extended attributes */
+#define HFSIOC_SET_XATTREXTENTS_STATE  _IOW('h', 14, u_int32_t)
+
+#if defined(KERNEL)
+#define HFSIOC_EXT_BULKACCESS32 _IOW('h', 15, struct user32_ext_access_t)
+#define HFSIOC_EXT_BULKACCESS64 _IOW('h', 15, struct user64_ext_access_t)
+#else
+#define HFSIOC_EXT_BULKACCESS _IOW('h', 15, struct ext_access_t)
+#endif /* KERNEL */
+
+#define HFSIOC_MARK_BOOT_CORRUPT _IO('h', 16)
+
+#define HFSIOC_GET_JOURNAL_INFO	_IOR('h', 17, struct hfs_journal_info)
+
+#define HFSIOC_SET_VERY_LOW_DISK _IOW('h', 20, u_int32_t)
+
+#define HFSIOC_SET_LOW_DISK _IOW('h', 21, u_int32_t)
+
+#define HFSIOC_SET_DESIRED_DISK _IOW('h', 22, u_int32_t)
+
+#define HFSIOC_SET_ALWAYS_ZEROFILL _IOW('h', 23, int32_t)
+	/* XXXJRT Keep until 31866920 is resolved. */
+#define HFS_SET_ALWAYS_ZEROFILL IOCBASECMD(HFSIOC_SET_ALWAYS_ZEROFILL)
+
+#define HFSIOC_VOLUME_STATUS  _IOR('h', 24, u_int32_t)
+
+/* Disable metadata zone for given volume */
+#define HFSIOC_DISABLE_METAZONE	_IO('h', 25)
+
+/* Change the next CNID value */
+#define HFSIOC_CHANGE_NEXTCNID	_IOWR('h', 26, u_int32_t)
+	/* XXXJRT Keep until 31866920 is resolved. */
+#define HFS_CHANGE_NEXTCNID		IOCBASECMD(HFSIOC_CHANGE_NEXTCNID)
+	
+/* Get the low disk space values */
+#define	HFSIOC_GET_VERY_LOW_DISK	_IOR('h', 27, u_int32_t)
+
+#define	HFSIOC_GET_LOW_DISK	_IOR('h', 28, u_int32_t)
+
+#define	HFSIOC_GET_DESIRED_DISK	_IOR('h', 29, u_int32_t)
+
+/* 30 was HFSIOC_GET_WRITE_GEN_COUNTER and is now deprecated */
+
+/* 31 was HFSIOC_GET_DOCUMENT_ID and is now deprecated */
+
+/* revisiond only uses this when something transforms in a way the kernel can't track such as "foo.rtf" -> "foo.rtfd" */
+#define HFSIOC_TRANSFER_DOCUMENT_ID  _IOW('h', 32, u_int32_t)
+
+
+/* 
+ * XXX: Will be deprecated and replaced by HFSIOC_GET_FSINFO
+ *
+ * Get information about number of file system allocation blocks used by metadata 
+ * files on the volume, including individual btrees and journal file.  The caller 
+ * can determine the size of file system allocation block using value returned as 
+ * f_bsize by statfs(2).
+ */
+#define HFSIOC_FSINFO_METADATA_BLOCKS  _IOWR('h', 38, struct hfsinfo_metadata)
+
+/* Send TRIMs for all free blocks to the underlying device */
+#define HFSIOC_CS_FREESPACE_TRIM _IOWR('h', 39, u_int32_t)
+
+
+/* Get file system information for the given volume */
+#define HFSIOC_GET_FSINFO        _IOWR('h', 45, hfs_fsinfo)
+
+/* Re-pin hotfile data; argument controls what state gets repinned */
+#define HFSIOC_REPIN_HOTFILE_STATE _IOWR('h', 46, u_int32_t)
+
+#define HFS_REPIN_METADATA      0x0001
+#define HFS_REPIN_USERDATA      0x0002
+
+/* Mark a directory or file as worth caching on any underlying "fast" device */
+#define HFSIOC_SET_HOTFILE_STATE _IOWR('h', 47, u_int32_t)
+
+/* flags to pass to SET_HOTFILE_STATE */
+#define HFS_MARK_FASTDEVCANDIDATE   0x0001
+#define HFS_UNMARK_FASTDEVCANDIDATE 0x0002
+#define HFS_NEVER_FASTDEVCANDIDATE  0x0004
+
+#define HFSIOC_SET_MAX_DEFRAG_SIZE _IOWR('h', 48, u_int32_t)
+
+#define HFSIOC_FORCE_ENABLE_DEFRAG _IOWR('h', 49, u_int32_t)
+
+/* These fsctls are ported from apfs. */
+#ifndef APFSIOC_SET_NEAR_LOW_DISK
+#define APFSIOC_SET_NEAR_LOW_DISK _IOW('J', 17, u_int32_t)
+#endif /* APFSIOC_SET_NEAR_LOW_DISK */
+
+#ifndef APFSIOC_GET_NEAR_LOW_DISK
+#define APFSIOC_GET_NEAR_LOW_DISK _IOR('J', 18, u_int32_t)
+#endif /* APFSIOC_GET_NEAR_LOW_DISK */
+
+#endif /* __APPLE_API_UNSTABLE */
+
+#endif /* ! _HFS_FSCTL_H_ */
diff --git a/core/hfs_fsinfo.c b/core/hfs_fsinfo.c
new file mode 100644
index 0000000..ef8f362
--- /dev/null
+++ b/core/hfs_fsinfo.c
@@ -0,0 +1,889 @@
+/*
+ * Copyright (c) 2014-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/xattr.h>
+#include <sys/utfconv.h>
+#include <libkern/OSByteOrder.h>
+#include <sys/stat.h>
+
+#include "hfs.h"
+#include "hfs_fsctl.h"
+#include "hfs_endian.h"
+#include "BTreesInternal.h"
+#include "BTreesPrivate.h"
+#include "FileMgrInternal.h"
+
+#include "hfs_cprotect.h"
+
+
+union HFSPlusRecord {
+	HFSPlusCatalogFolder folder_record;
+	HFSPlusCatalogFile file_record;
+	HFSPlusCatalogThread thread_record;
+	HFSPlusExtentRecord extent_record;
+	HFSPlusAttrRecord attr_record;
+}; 
+typedef union HFSPlusRecord HFSPlusRecord;
+
+union HFSPlusKey {
+	HFSPlusExtentKey extent_key;
+	HFSPlusAttrKey attr_key;
+};
+typedef union HFSPlusKey HFSPlusKey;
+
+typedef enum traverse_btree_flag {
+	
+	//If set, extents btree will also be traversed along with catalog btree, so grab correct locks upfront
+	TRAVERSE_BTREE_EXTENTS = 1,
+
+	// Getting content-protection attributes, allocate enough space to accomodate the records.
+	TRAVERSE_BTREE_XATTR_CPROTECT = 2,
+	
+} traverse_btree_flag_t;
+
+
+
+static errno_t hfs_fsinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfs_fsinfo_metadata *fsinfo);
+static errno_t hfs_fsinfo_metadata_extents(struct hfsmount *hfsmp, struct hfs_fsinfo_metadata *fsinfo);
+static errno_t hfs_fsinfo_metadata_percentfree(struct hfsmount *hfsmp, struct hfs_fsinfo_metadata *fsinfo);
+static errno_t fsinfo_file_extent_count_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t fsinfo_file_extent_size_catalog_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t fsinfo_file_extent_size_overflow_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t fsinfo_file_size_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t fsinfo_dir_valence_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t fsinfo_name_size_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t fsinfo_xattr_size_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+static errno_t traverse_btree(struct hfsmount *hfsmp, uint32_t btree_fileID, int flags, void *fsinfo,
+		int (*callback)(struct hfsmount *, HFSPlusKey *, HFSPlusRecord *, void *));
+static errno_t hfs_fsinfo_free_extents(struct hfsmount *hfsmp, struct hfs_fsinfo_data *fsinfo);
+static void fsinfo_free_extents_callback(void *data, off_t free_extent_size);
+#if CONFIG_PROTECT
+static errno_t fsinfo_cprotect_count_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+#endif
+static errno_t fsinfo_symlink_size_callback(struct hfsmount *hfsmp, HFSPlusKey *key, HFSPlusRecord *record, void *data);
+
+/* 
+ * Entry function for all the fsinfo requests from hfs_vnop_ioctl() 
+ * Depending on the type of request, this function will call the 
+ * appropriate sub-function and return success or failure back to 
+ * the caller.
+ */
+errno_t hfs_get_fsinfo(struct hfsmount *hfsmp, void *a_data)
+{
+	int error = 0;
+	hfs_fsinfo *fsinfo_union;
+	uint32_t request_type;
+	uint32_t header_len = sizeof(hfs_fsinfo_header_t);
+
+	fsinfo_union = (hfs_fsinfo *)a_data;
+	request_type = fsinfo_union->header.request_type;
+
+	// Zero out output fields to fsinfo_union, keep the user input fields intact.
+	bzero((char *)fsinfo_union + header_len, sizeof(hfs_fsinfo) - header_len);
+
+	switch (request_type) {
+		case HFS_FSINFO_METADATA_BLOCKS_INFO:
+			error = hfs_fsinfo_metadata_blocks(hfsmp, &(fsinfo_union->metadata));
+			break;
+
+		case HFS_FSINFO_METADATA_EXTENTS:
+			error = hfs_fsinfo_metadata_extents(hfsmp, &(fsinfo_union->metadata));
+			break;
+
+		case HFS_FSINFO_METADATA_PERCENTFREE:
+			error = hfs_fsinfo_metadata_percentfree(hfsmp, &(fsinfo_union->metadata));
+			break;
+
+		case HFS_FSINFO_FILE_EXTENT_COUNT:
+			/* Traverse catalog btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSCatalogFileID, TRAVERSE_BTREE_EXTENTS, &(fsinfo_union->data), fsinfo_file_extent_count_callback);
+			break;
+
+		case HFS_FSINFO_FILE_EXTENT_SIZE:
+			/* Traverse the catalog btree first */
+			error = traverse_btree(hfsmp, kHFSCatalogFileID, 0, &(fsinfo_union->data), &fsinfo_file_extent_size_catalog_callback);
+			if (error) {
+				break;
+			}
+			/* Traverse the overflow extents btree now */
+			error = traverse_btree(hfsmp, kHFSExtentsFileID, 0, &(fsinfo_union->data), &fsinfo_file_extent_size_overflow_callback);
+			break;
+
+		case HFS_FSINFO_FILE_SIZE:
+			/* Traverse catalog btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSCatalogFileID, 0, &(fsinfo_union->data), &fsinfo_file_size_callback);
+			break;
+
+		case HFS_FSINFO_DIR_VALENCE:
+			/* Traverse catalog btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSCatalogFileID, 0, &(fsinfo_union->data), &fsinfo_dir_valence_callback);
+			break;
+
+		case HFS_FSINFO_NAME_SIZE:
+			/* Traverse catalog btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSCatalogFileID, 0, &(fsinfo_union->name), &fsinfo_name_size_callback);
+			break;
+
+		case HFS_FSINFO_XATTR_SIZE:
+			/* Traverse attribute btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSAttributesFileID, 0, &(fsinfo_union->data), &fsinfo_xattr_size_callback);
+			break;
+
+		case HFS_FSINFO_FREE_EXTENTS:
+			error = hfs_fsinfo_free_extents(hfsmp, &(fsinfo_union->data));
+			break;
+
+		case HFS_FSINFO_SYMLINK_SIZE:
+			/* Traverse catalog btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSCatalogFileID, 0, &(fsinfo_union->data), &fsinfo_symlink_size_callback);
+			break;
+
+#if CONFIG_PROTECT
+		case HFS_FSINFO_FILE_CPROTECT_COUNT:
+			/* Traverse attribute btree and invoke callback for all records */
+			error = traverse_btree(hfsmp, kHFSAttributesFileID, TRAVERSE_BTREE_XATTR_CPROTECT, &(fsinfo_union->cprotect), &fsinfo_cprotect_count_callback);
+			break;
+#endif
+
+		default:
+			return ENOTSUP;
+	};
+
+	return error;
+}
+
+/* 
+ * This function provides information about total number of allocation blocks 
+ * for each individual metadata file.
+ */
+static errno_t
+hfs_fsinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfs_fsinfo_metadata *fsinfo)
+{
+	int lockflags = 0;
+	int ret_lockflags = 0;
+
+	/* 
+	 * Getting number of allocation blocks for all metadata files 
+	 * should be a relatively quick operation, so we grab locks for all
+	 * the btrees at the same time
+	 */
+	lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
+	ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_SHARED_LOCK);
+
+	/* Get information about all the btrees */
+	fsinfo->extents    = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
+	fsinfo->catalog    = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
+	fsinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
+	if (hfsmp->hfs_attribute_cp)
+		fsinfo->attribute  = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
+	else
+		fsinfo->attribute = 0;
+
+	/* Done with btrees, give up the locks */
+	hfs_systemfile_unlock(hfsmp, ret_lockflags);
+
+	/* Get information about journal file */
+	fsinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
+
+	return 0;
+}
+
+/* 
+ * Helper function to count the number of valid extents in a file fork structure
+ */
+static uint32_t
+hfs_count_extents_fp(struct filefork *ff)
+{
+	int i;
+	uint32_t count = 0;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		if (ff->ff_data.cf_extents[i].blockCount == 0) {
+			break;
+		}
+		count++;
+	}
+	return count;
+}
+
+
+/* 
+ * This is a helper function that counts the total number of valid 
+ * extents in all the overflow extent records for given fileID 
+ * in overflow extents btree
+ */
+static errno_t
+hfs_count_overflow_extents(struct hfsmount *hfsmp, uint32_t fileID, uint32_t *num_extents)
+{
+	int error;
+	FCB *fcb;
+	struct BTreeIterator *iterator = NULL;
+	FSBufferDescriptor btdata;
+	HFSPlusExtentKey *extentKey;
+	HFSPlusExtentRecord extentData;
+	uint32_t extent_count = 0;
+	int i;
+
+	fcb = VTOF(hfsmp->hfs_extents_vp);
+	iterator = hfs_mallocz(sizeof(struct BTreeIterator));
+	
+	extentKey = (HFSPlusExtentKey *) &iterator->key;	
+	extentKey->keyLength = kHFSPlusExtentKeyMaximumLength;
+	extentKey->forkType = kHFSDataForkType;
+	extentKey->fileID = fileID;
+	extentKey->startBlock = 0;
+
+	btdata.bufferAddress = &extentData;
+	btdata.itemSize = sizeof(HFSPlusExtentRecord);
+	btdata.itemCount = 1;
+
+	/* Search for overflow extent record */
+	error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
+	
+	/*
+	 * We used startBlock of zero, so we will not find any records and errors
+	 * are expected.  It will also position the iterator just before the first 
+	 * overflow extent record for given fileID (if any). 
+	 */
+	if (error && error != fsBTRecordNotFoundErr && error != fsBTEndOfIterationErr)
+			goto out;
+	error = 0;
+
+	for (;;) {
+		
+		if (msleep(NULL, NULL, PINOD | PCATCH,
+				   "hfs_fsinfo", NULL) == EINTR) {
+			error = EINTR;
+			break;
+		}
+		
+		error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
+		if (error != 0) {
+			/* These are expected errors, so mask them */
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;
+			}
+			break;
+		}
+
+		/* If we encounter different fileID, stop the iteration */
+		if (extentKey->fileID != fileID) {
+			break;
+		}
+		
+		if (extentKey->forkType != kHFSDataForkType)
+			break;
+		
+		/* This is our record of interest; only count the datafork extents. */
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extentData[i].blockCount == 0) {
+				break;
+			}
+			extent_count++;
+		}
+	}
+
+out:
+	hfs_free(iterator, sizeof(*iterator));
+
+	if (error == 0) {
+		*num_extents = extent_count;
+	}
+	return MacToVFSError(error);
+}
+
+/*
+ * This function provides information about total number of extents (including 
+ * extents from overflow extents btree, if any) for each individual metadata 
+ * file.
+ */
+static errno_t
+hfs_fsinfo_metadata_extents(struct hfsmount *hfsmp, struct hfs_fsinfo_metadata *fsinfo)
+{
+	int error = 0;
+	int lockflags = 0;
+	int ret_lockflags = 0;
+	uint32_t overflow_count;
+
+	/*
+	 * Counting the number of extents for all metadata files should
+	 * be a relatively quick operation, so we grab locks for all the
+	 * btrees at the same time
+	 */
+	lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
+	ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_SHARED_LOCK);
+
+	/* Get number of extents for extents overflow btree */
+	fsinfo->extents = hfs_count_extents_fp(hfsmp->hfs_extents_cp->c_datafork);
+
+	/* Get number of extents for catalog btree */
+	fsinfo->catalog = hfs_count_extents_fp(hfsmp->hfs_catalog_cp->c_datafork);
+	if (fsinfo->catalog >= kHFSPlusExtentDensity) {
+		error = hfs_count_overflow_extents(hfsmp, kHFSCatalogFileID, &overflow_count);
+		if (error) {
+			goto out;
+		}
+		fsinfo->catalog += overflow_count;
+	}
+
+	/* Get number of extents for allocation file */
+	fsinfo->allocation = hfs_count_extents_fp(hfsmp->hfs_allocation_cp->c_datafork);
+	if (fsinfo->allocation >= kHFSPlusExtentDensity) {
+		error = hfs_count_overflow_extents(hfsmp, kHFSAllocationFileID, &overflow_count);
+		if (error) {
+			goto out;
+		}
+		fsinfo->allocation += overflow_count;
+	}
+
+	/*
+	 * Get number of extents for attribute btree.
+	 *	hfs_attribute_cp might be NULL.
+	 */
+	if (hfsmp->hfs_attribute_cp) {
+		fsinfo->attribute = hfs_count_extents_fp(hfsmp->hfs_attribute_cp->c_datafork);
+		if (fsinfo->attribute >= kHFSPlusExtentDensity) {
+			error = hfs_count_overflow_extents(hfsmp, kHFSAttributesFileID, &overflow_count);
+			if (error) {
+				goto out;
+			}
+			fsinfo->attribute += overflow_count;
+		}
+	}
+	/* Journal always has one extent */
+	fsinfo->journal = 1;
+out:
+	hfs_systemfile_unlock(hfsmp, ret_lockflags);
+	return error;
+}
+
+/* 
+ * Helper function to calculate percentage i.e. X is what percent of Y?
+ */
+static inline uint32_t 
+hfs_percent(uint32_t X, uint32_t Y)
+{
+	return (X * 100ll) / Y;
+}
+
+/*
+ * This function provides percentage of free nodes vs total nodes for each 
+ * individual metadata btrees, i.e. for catalog, overflow extents and 
+ * attributes btree.  This information is not applicable for allocation 
+ * file and journal file.
+ */
+static errno_t
+hfs_fsinfo_metadata_percentfree(struct hfsmount *hfsmp, struct hfs_fsinfo_metadata *fsinfo)
+{
+	int lockflags = 0;
+	int ret_lockflags = 0;
+	BTreeControlBlockPtr btreePtr;
+	uint32_t free_nodes, total_nodes;
+
+	/*
+	 * Getting total and used nodes for all metadata btrees should 
+	 * be a relatively quick operation, so we grab locks for all the
+	 * btrees at the same time
+	 */
+	lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
+	ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_SHARED_LOCK);
+	
+	/* Overflow extents btree */
+	btreePtr = VTOF(hfsmp->hfs_extents_vp)->fcbBTCBPtr;
+	total_nodes = btreePtr->totalNodes;
+	free_nodes = btreePtr->freeNodes;
+	fsinfo->extents = hfs_percent(free_nodes, total_nodes);
+
+	/* Catalog btree */
+	btreePtr = VTOF(hfsmp->hfs_catalog_vp)->fcbBTCBPtr;
+	total_nodes = btreePtr->totalNodes;
+	free_nodes = btreePtr->freeNodes;
+	fsinfo->catalog = hfs_percent(free_nodes, total_nodes);
+
+	/* Attributes btree */
+	if (hfsmp->hfs_attribute_vp) {
+		btreePtr = VTOF(hfsmp->hfs_attribute_vp)->fcbBTCBPtr;
+		total_nodes = btreePtr->totalNodes;
+		free_nodes = btreePtr->freeNodes;
+		fsinfo->attribute = hfs_percent(free_nodes, total_nodes);
+	}
+
+	hfs_systemfile_unlock(hfsmp, ret_lockflags);
+	return 0;
+}
+
+/* 
+ * Helper function to calculate log base 2 for given number 
+ */
+static inline int 
+hfs_log2(uint64_t entry) 
+{
+	return (63 - __builtin_clzll(entry|1));
+}
+
+/*
+ * Helper function to account for input entry into the data 
+ * array based on its log base 2 value
+ */
+void hfs_fsinfo_data_add(struct hfs_fsinfo_data *fsinfo, uint64_t entry)
+{
+	/* 
+	 * From hfs_fsctl.h - 
+	 *
+	 * hfs_fsinfo_data is generic data structure to aggregate information like sizes 
+	 * or counts in buckets of power of 2.  Each bucket represents a range of values 
+	 * that is determined based on its index in the array.  Specifically, buckets[i] 
+	 * represents values that are greater than or equal to 2^(i-1) and less than 2^i, 
+	 * except the last bucket which represents range greater than or equal to 2^(i-1)
+	 *
+	 * The current maximum number of buckets is 41, so we can represent range from
+	 * 0 up to 1TB in increments of power of 2, and then a catch-all bucket of 
+	 * anything that is greater than or equal to 1TB.
+	 *
+	 * For example, 
+	 * bucket[0]  -> greater than or equal to 0 and less than 1
+	 * bucket[1]  -> greater than or equal to 1 and less than 2
+	 * bucket[10] -> greater than or equal to 2^(10-1) = 512 and less than 2^10 = 1024
+	 * bucket[20] -> greater than or equal to 2^(20-1) = 512KB and less than 2^20 = 1MB
+	 * bucket[41] -> greater than or equal to 2^(41-1) = 1TB
+	 */
+	uint32_t bucket;
+
+	if (entry) {
+		/* 
+		 * Calculate log base 2 value for the entry.
+		 * Account for this value in the appropriate bucket.
+		 * The last bucket is a catch-all bucket of
+		 * anything that is greater than or equal to 1TB
+		 */
+		bucket = MIN(hfs_log2(entry) + 1, HFS_FSINFO_DATA_MAX_BUCKETS-1);
+		++fsinfo->bucket[bucket];
+	} else {
+		/* Entry is zero, so account it in 0th offset */
+		fsinfo->bucket[0]++;
+	}
+}
+
+/* 
+ * Function to traverse all the records of a btree and then call caller-provided 
+ * callback function for every record found.  The type of btree is chosen based 
+ * on the fileID provided by the caller.  This fuction grabs the correct locks 
+ * depending on the type of btree it will be traversing and flags provided 
+ * by the caller.
+ *
+ * Note: It might drop and reacquire the locks during execution.
+ */
+static errno_t
+traverse_btree(struct hfsmount *hfsmp, uint32_t btree_fileID, int flags,
+			   void *fsinfo, int (*callback)(struct hfsmount *, HFSPlusKey *, HFSPlusRecord *, void *))
+{
+	int error = 0;
+	int lockflags = 0;
+	int ret_lockflags = 0;
+	FCB *fcb;
+	struct BTreeIterator *iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	int btree_operation;
+	HFSPlusRecord record;
+	HFSPlusKey *key;
+	uint64_t start, timeout_abs;
+
+	switch(btree_fileID) {
+		case kHFSExtentsFileID: 
+			fcb = VTOF(hfsmp->hfs_extents_vp);
+			lockflags = SFL_EXTENTS;
+			break;
+		case kHFSCatalogFileID:
+			fcb = VTOF(hfsmp->hfs_catalog_vp);
+			lockflags = SFL_CATALOG;
+			break;
+		case kHFSAttributesFileID:
+			// Attributes file doesnât exist, There are no records to iterate.
+			if (hfsmp->hfs_attribute_vp == NULL)
+				return error;
+			fcb = VTOF(hfsmp->hfs_attribute_vp);
+			lockflags = SFL_ATTRIBUTE;
+			break;
+
+		default:
+			return EINVAL;
+	}
+
+	iterator = hfs_mallocz(sizeof(struct BTreeIterator));
+
+	/* The key is initialized to zero because we are traversing entire btree */
+	key = (HFSPlusKey *)&iterator->key;
+
+	if (flags & TRAVERSE_BTREE_EXTENTS) {
+		lockflags |= SFL_EXTENTS;
+	}
+
+	btdata.bufferAddress = &record;
+	btdata.itemSize = sizeof(HFSPlusRecord);
+	btdata.itemCount = 1;
+
+	/* Lock btree for duration of traversal */
+	ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_SHARED_LOCK);
+	btree_operation = kBTreeFirstRecord;
+
+	nanoseconds_to_absolutetime(HFS_FSINFO_MAX_LOCKHELD_TIME, &timeout_abs);
+	start = mach_absolute_time();
+
+	while (1) {
+
+		if (msleep(NULL, NULL, PINOD | PCATCH,
+				   "hfs_fsinfo", NULL) == EINTR) {
+			error = EINTR;
+			break;
+		}
+
+		error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
+		if (error != 0) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;
+			}
+			break;
+		}
+		/* Lookup next btree record on next call to BTIterateRecord() */
+		btree_operation = kBTreeNextRecord;
+
+		/* Call our callback function and stop iteration if there are any errors */
+		error = callback(hfsmp, key, &record, fsinfo);
+		if (error) {
+			break;
+		}
+
+		/* let someone else use the tree after we've processed over HFS_FSINFO_MAX_LOCKHELD_TIME */
+		if ((mach_absolute_time() - start) >= timeout_abs) {
+
+			/* release b-tree locks and let someone else get the lock */
+			hfs_systemfile_unlock (hfsmp, ret_lockflags);
+
+			/* add tsleep here to force context switch and fairness */
+			tsleep((caddr_t)hfsmp, PRIBIO, "hfs_fsinfo", 1);
+
+			/*
+			 * re-acquire the locks in the same way that we wanted them originally.
+			 * note: it is subtle but worth pointing out that in between the time that we
+			 * released and now want to re-acquire these locks that the b-trees may have shifted
+			 * slightly but significantly. For example, the catalog or other b-tree could have grown
+			 * past 8 extents and now requires the extents lock to be held in order to be safely
+			 * manipulated. We can't be sure of the state of the b-tree from where we last left off.
+			 */
+
+			ret_lockflags = hfs_systemfile_lock (hfsmp, lockflags, HFS_SHARED_LOCK);
+
+			/*
+			 * It's highly likely that the search key we stashed away before dropping lock
+			 * no longer points to an existing item.  Iterator's IterateRecord is able to
+			 * re-position itself and process the next record correctly.  With lock dropped,
+			 * there might be records missed for statistic gathering, which is ok. The
+			 * point is to get aggregate values.
+			 */
+
+			start = mach_absolute_time();
+
+			/* loop back around and get another record */
+		}
+	}
+
+	hfs_systemfile_unlock(hfsmp, ret_lockflags);
+	hfs_free(iterator, sizeof(*iterator));
+	return MacToVFSError(error);
+}
+
+/* 
+ * Callback function to get distribution of number of extents 
+ * for all user files in given file system.  Note that this only 
+ * accounts for data fork, no resource fork. 
+ */
+static errno_t
+fsinfo_file_extent_count_callback(struct hfsmount *hfsmp, 
+		__unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	int i;
+	int error = 0;
+	uint32_t num_extents = 0;
+	uint32_t num_overflow = 0;
+	uint32_t blockCount;
+
+	if (record->file_record.recordType == kHFSPlusFileRecord) {
+		/* Count total number of extents for this file */
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			blockCount = record->file_record.dataFork.extents[i].blockCount;
+			if (blockCount == 0) {
+				break;
+			}
+			num_extents++;
+		}
+		/* This file has overflow extent records, so search overflow btree */
+		if (num_extents >= kHFSPlusExtentDensity) {
+			/* The caller also hold extents overflow btree lock */
+			error = hfs_count_overflow_extents(hfsmp, record->file_record.fileID, &num_overflow);
+			if (error) {
+				goto out;
+			}
+			num_extents += num_overflow;
+		}
+		hfs_fsinfo_data_add(data, num_extents);
+	}
+out:
+	return error;
+}
+
+/* 
+ * Callback function to get distribution of individual extent sizes
+ * (in bytes) for all user files in given file system from catalog 
+ * btree only.  Note that this only accounts for data fork, no resource 
+ * fork. 
+ */
+static errno_t fsinfo_file_extent_size_catalog_callback(struct hfsmount *hfsmp,
+		__unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	int i;
+	uint32_t blockCount;
+	uint64_t extent_size;
+
+	if (record->file_record.recordType == kHFSPlusFileRecord) {
+		/* Traverse through all valid extents */
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			blockCount = record->file_record.dataFork.extents[i].blockCount;
+			if (blockCount == 0) {
+				break;
+			}
+			extent_size = hfs_blk_to_bytes(blockCount, hfsmp->blockSize);
+			hfs_fsinfo_data_add(data, extent_size);
+		}
+	}
+	return 0;
+}
+
+/* 
+ * Callback function to get distribution of individual extent sizes
+ * (in bytes) for all user files in given file system from overflow 
+ * extents btree only.  Note that this only accounts for data fork, 
+ * no resource fork. 
+ */
+static errno_t fsinfo_file_extent_size_overflow_callback(struct hfsmount *hfsmp,
+		HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	int i;
+	uint32_t blockCount;
+	uint64_t extent_size;
+
+	if (key->extent_key.fileID >= kHFSFirstUserCatalogNodeID) {
+		// Only count the data fork extents.
+		if (key->extent_key.forkType == kHFSDataForkType) {
+			for (i = 0; i < kHFSPlusExtentDensity; i++) {
+				blockCount = record->extent_record[i].blockCount;
+				if (blockCount == 0) {
+					break;
+				}
+				extent_size = hfs_blk_to_bytes(blockCount, hfsmp->blockSize);
+				hfs_fsinfo_data_add(data, extent_size);
+			}
+		}
+	}
+	return 0;
+}
+
+/* 
+ * Callback function to get distribution of file sizes (in bytes) 
+ * for all user files in given file system.  Note that this only 
+ * accounts for data fork, no resource fork. 
+ */
+static errno_t fsinfo_file_size_callback(__unused struct hfsmount *hfsmp,
+		__unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	if (record->file_record.recordType == kHFSPlusFileRecord) {
+		/* Record of interest, account for the size in the bucket */
+		hfs_fsinfo_data_add(data, record->file_record.dataFork.logicalSize);
+	}
+	return 0;
+}
+
+/*
+ * Callback function to get distribution of directory valence 
+ * for all directories in the given file system.
+ */
+static errno_t fsinfo_dir_valence_callback(__unused struct hfsmount *hfsmp,
+		__unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	if (record->folder_record.recordType == kHFSPlusFolderRecord) {
+		hfs_fsinfo_data_add(data, record->folder_record.valence);
+	}
+	return 0;
+}
+
+/* 
+ * Callback function to get distribution of number of unicode 
+ * characters in name for all files and directories for a given 
+ * file system.
+ */
+static errno_t fsinfo_name_size_callback(__unused struct hfsmount *hfsmp,
+		__unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	struct hfs_fsinfo_name *fsinfo = (struct hfs_fsinfo_name *)data;
+	uint32_t length;
+
+	if ((record->folder_record.recordType == kHFSPlusFolderThreadRecord) ||
+	    (record->folder_record.recordType == kHFSPlusFileThreadRecord)) {
+		length = record->thread_record.nodeName.length;
+		/* Make sure that the nodeName is bounded, otherwise return error */
+		if (length > kHFSPlusMaxFileNameChars) {
+			return EIO;
+		}
+		
+		// sanity check for a name length of zero, which isn't valid on disk.
+		if (length == 0)
+			return EIO;
+		
+		/* Round it down to nearest multiple of 5 to match our buckets granularity */
+		length = (length - 1)/ 5;
+		/* Account this value into our bucket */
+		fsinfo->bucket[length]++;
+	}
+	return 0;
+}
+
+/* 
+ * Callback function to get distribution of size of all extended 
+ * attributes for a given file system.
+ */
+static errno_t fsinfo_xattr_size_callback(__unused struct hfsmount *hfsmp,
+		__unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	if (record->attr_record.recordType == kHFSPlusAttrInlineData) {
+		/* Inline attribute */
+		hfs_fsinfo_data_add(data, record->attr_record.attrData.attrSize);
+	} else if (record->attr_record.recordType == kHFSPlusAttrForkData) {
+		/* Larger attributes with extents information */
+		hfs_fsinfo_data_add(data, record->attr_record.forkData.theFork.logicalSize);
+	}
+	return 0;
+}
+
+
+/*
+ * Callback function to get distribution of free space extents for a given file system.
+ */
+static void fsinfo_free_extents_callback(void *data, off_t free_extent_size)
+{
+	// Assume a minimum of 4 KB block size
+	hfs_fsinfo_data_add(data, free_extent_size / 4096);
+}
+
+/*
+ * Function to get distribution of free space extents for a given file system.
+ */
+static errno_t hfs_fsinfo_free_extents(struct hfsmount *hfsmp, struct hfs_fsinfo_data *fsinfo)
+{
+	return hfs_find_free_extents(hfsmp, &fsinfo_free_extents_callback, fsinfo);
+}
+
+/*
+ * Callback function to get distribution of symblock link sizes (in bytes)
+ * for all user files in given file system.  Note that this only
+ * accounts for data fork, no resource fork.
+ */
+static errno_t fsinfo_symlink_size_callback(__unused struct hfsmount *hfsmp,
+									 __unused HFSPlusKey *key, HFSPlusRecord *record, void *data)
+{
+	if (record->file_record.recordType == kHFSPlusFileRecord) {
+		/* Record of interest, account for the size in the bucket */
+		if (S_ISLNK(record->file_record.bsdInfo.fileMode))
+			hfs_fsinfo_data_add((struct hfs_fsinfo_data *)data, record->file_record.dataFork.logicalSize);
+	}
+	return 0;
+}
+
+#if CONFIG_PROTECT
+/*
+ * Callback function to get total number of files/directories
+ * for each content protection class
+ */
+static int fsinfo_cprotect_count_callback(struct hfsmount *hfsmp, HFSPlusKey *key,
+										  HFSPlusRecord *record, void *data)
+{
+	struct hfs_fsinfo_cprotect *fsinfo = (struct hfs_fsinfo_cprotect *)data;
+	static const uint16_t cp_xattrname_utf16[] = CONTENT_PROTECTION_XATTR_NAME_CHARS;
+	/*
+	 * NOTE: cp_xattrname_utf16_len is the number of UTF-16 code units in
+	 * the EA name string.
+	 */
+	static const size_t cp_xattrname_utf16_len = sizeof(cp_xattrname_utf16)/2;
+	struct cp_xattr_v5 *xattr;
+	size_t xattr_len = sizeof(struct cp_xattr_v5);
+	struct cprotect cp_entry;
+	struct cprotect *cp_entryp = &cp_entry;
+	int error = 0;
+
+	/* Content protect xattrs are inline attributes only, so skip all others */
+	if (record->attr_record.recordType != kHFSPlusAttrInlineData)
+		return 0;
+
+	/* We only look at content protection xattrs */
+	if ((key->attr_key.attrNameLen != cp_xattrname_utf16_len) ||
+		(bcmp(key->attr_key.attrName, cp_xattrname_utf16, 2 * cp_xattrname_utf16_len))) {
+		return 0;
+	}
+
+	xattr = (struct cp_xattr_v5 *)((void *)(record->attr_record.attrData.attrData));
+	error = cp_read_xattr_v5(hfsmp, xattr, xattr_len, (cprotect_t *)&cp_entryp,
+							 CP_GET_XATTR_BASIC_INFO);
+	if (error)
+		return 0;
+
+	/* No key present, skip this record */
+	if (!ISSET(cp_entry.cp_flags, CP_HAS_A_KEY))
+		return 0;
+
+	/* Now account for the persistent class */
+	switch (CP_CLASS(cp_entry.cp_pclass)) {
+		case PROTECTION_CLASS_A:
+			fsinfo->class_A++;
+			break;
+		case PROTECTION_CLASS_B:
+			fsinfo->class_B++;
+			break;
+		case PROTECTION_CLASS_C:
+			fsinfo->class_C++;
+			break;
+		case PROTECTION_CLASS_D:
+			fsinfo->class_D++;
+			break;
+		case PROTECTION_CLASS_E:
+			fsinfo->class_E++;
+			break;
+		case PROTECTION_CLASS_F:
+			fsinfo->class_F++;
+			break;
+	};
+
+	return 0;
+}
+#endif
diff --git a/core/hfs_hotfiles.c b/core/hfs_hotfiles.c
new file mode 100644
index 0000000..8d07a23
--- /dev/null
+++ b/core/hfs_hotfiles.c
@@ -0,0 +1,3929 @@
+/*
+ * Copyright (c) 2003-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <libkern/OSAtomic.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/ubc.h>
+#include <sys/vnode.h>
+#include <sys/kauth.h>
+#include <sys/vm.h>
+
+#include "hfs.h"
+#include "hfs_endian.h"
+#include "hfs_format.h"
+#include "hfs_mount.h"
+#include "hfs_hotfiles.h"
+
+#include "BTreeScanner.h"
+
+
+#define HFC_DEBUG  0
+#define HFC_VERBOSE 0
+
+
+/*
+ * Minimum post Tiger base time.
+ * Thu Mar 31 17:00:00 2005
+ */
+#define HFC_MIN_BASE_TIME   0x424c8f00L
+
+/*
+ * Hot File List (runtime).
+ */
+typedef struct hotfileinfo {
+	u_int32_t  hf_fileid;
+	u_int32_t  hf_temperature;
+	u_int32_t  hf_blocks;
+} hotfileinfo_t;
+
+typedef struct hotfilelist {
+	size_t		  hfl_size;
+	u_int32_t     hfl_magic;
+	u_int32_t     hfl_version;
+	time_t        hfl_duration;    /* duration of sample period */
+	int           hfl_count;       /* count of hot files recorded */
+	int           hfl_next;        /* next file to move */
+	int           hfl_totalblocks; /* total hot file blocks */
+	int           hfl_reclaimblks; /* blocks to reclaim in HFV */
+	u_int32_t     hfl_spare[2];
+	hotfileinfo_t hfl_hotfile[1];  /* array of hot files */
+} hotfilelist_t;
+
+
+/*
+ * Hot File Entry (runtime).
+ */
+typedef struct hotfile_entry {
+	struct  hotfile_entry  *left;
+	struct  hotfile_entry  *right;
+	u_int32_t  fileid;
+	u_int32_t  temperature;
+	u_int32_t  blocks;
+} hotfile_entry_t;
+
+
+//
+// We cap the max temperature for non-system files to "MAX_NORMAL_TEMP"
+// so that they will always have a lower temperature than system (aka 
+// "auto-cached") files.  System files have MAX_NORMAL_TEMP added to
+// their temperature which produces two bands of files (all non-system
+// files will have a temp less than MAX_NORMAL_TEMP and all system
+// files will have a temp greatern than MAX_NORMAL_TEMP).
+//
+// This puts non-system files on the left side of the hotfile btree 
+// (and we start evicting from the left-side of the tree).  The idea is 
+// that we will evict non-system files more aggressively since their
+// working set changes much more dynamically than system files (which 
+// are for the most part, static).
+//
+// NOTE: these values have to fit into a 32-bit int.  We use a
+//       value of 1-billion which gives a pretty broad range
+//       and yet should not run afoul of any sign issues.
+//
+#define MAX_NORMAL_TEMP    1000000000
+#define HF_TEMP_RANGE      MAX_NORMAL_TEMP
+
+
+//
+// These used to be defines of the hard coded values.  But if
+// we're on an cooperative fusion (CF) system we need to change 
+// the values (which happens in hfs_recording_init()
+// 
+uint32_t hfc_default_file_count = 1000;
+uint32_t hfc_default_duration   = (3600 * 60);
+uint32_t hfc_max_file_count     = 5000;
+uint64_t hfc_max_file_size      = (10 * 1024 * 1024);
+
+
+/*
+ * Hot File Recording Data (runtime).
+ */
+typedef struct hotfile_data {
+	size_t				size;
+	struct hfsmount	   *hfsmp;
+	long				refcount;
+	u_int32_t			activefiles;  /* active number of hot files */
+	u_int32_t			threshold;
+	u_int32_t			maxblocks;
+	hotfile_entry_t	   *rootentry;
+	hotfile_entry_t	   *freelist;
+	hotfile_entry_t	   *coldest;
+	hotfile_entry_t		entries[];
+} hotfile_data_t;
+
+static int  hfs_recording_start (struct hfsmount *);
+static int  hfs_recording_stop (struct hfsmount *);
+
+/* Hotfiles pinning routines */
+static int hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned);
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned);
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc);
+
+/*
+ * Hot File Data recording functions (in-memory binary tree).
+ */
+static int               hf_insert (hotfile_data_t *, hotfile_entry_t *);
+static void              hf_delete (hotfile_data_t *, u_int32_t, u_int32_t);
+static hotfile_entry_t * hf_coldest (hotfile_data_t *);
+static hotfile_entry_t * hf_getnewentry (hotfile_data_t *);
+static void              hf_getsortedlist (hotfile_data_t *, hotfilelist_t *);
+
+#if HFC_DEBUG
+static hotfile_entry_t * hf_lookup (hotfile_data_t *, u_int32_t, u_int32_t);
+static void  hf_maxdepth(hotfile_entry_t *, int, int *);
+static void  hf_printtree (hotfile_entry_t *);
+#endif
+
+/*
+ * Hot File misc support functions.
+ */
+static int  hotfiles_collect (struct hfsmount *);
+static int  hotfiles_age (struct hfsmount *);
+static int  hotfiles_adopt (struct hfsmount *);
+static int  hotfiles_evict (struct hfsmount *, vfs_context_t);
+static int  hotfiles_refine (struct hfsmount *);
+static int  hotextents(struct hfsmount *, HFSPlusExtentDescriptor *);
+static int  hfs_addhotfile_internal(struct vnode *);
+static int  hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp);
+
+
+/*
+ * Hot File Cluster B-tree (on disk) functions.
+ */
+static int  hfc_btree_create (struct hfsmount *, unsigned int, unsigned int);
+static int  hfc_btree_open (struct hfsmount *, struct vnode **);
+static int  hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs);
+static int  hfc_btree_close (struct hfsmount *, struct vnode *);
+static int  hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key);
+static int  hfc_btree_delete(struct hfsmount *hfsmp);
+static int  hfc_comparekeys (HotFileKey *, HotFileKey *);
+
+
+char hfc_tag[] = "CLUSTERED HOT FILES B-TREE     ";
+
+
+/*
+ *========================================================================
+ *                       HOT FILE INTERFACE ROUTINES
+ *========================================================================
+ */
+
+/*
+ * Start recording the hottest files on a file system.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hfs_recording_start(struct hfsmount *hfsmp)
+{
+	hotfile_data_t *hotdata;
+	struct timeval tv;
+	int maxentries;
+	size_t size;
+	int i;
+	int error;
+
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) ||
+	    (hfsmp->jnl == NULL) ||
+	    (hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) {
+		return (EPERM);
+	}
+	if (HFSTOVCB(hfsmp)->freeBlocks < (2 * (u_int32_t)hfsmp->hfs_hotfile_maxblks)) {
+		return (ENOSPC);
+	}
+	if (hfsmp->hfc_stage != HFC_IDLE) {
+		return (EBUSY);
+	}
+	hfsmp->hfc_stage = HFC_BUSY;
+
+	if (hfsmp->hfc_recdata) {
+		hfs_free(hfsmp->hfc_recdata, hfsmp->hfc_recdata->size);
+		hfsmp->hfc_recdata = NULL;
+	}
+	if (hfsmp->hfc_filelist) {
+		hfs_free(hfsmp->hfc_filelist, hfsmp->hfc_filelist->hfl_size);
+		hfsmp->hfc_filelist = NULL;
+	}
+
+	microtime(&tv);  /* Times are base on GMT time. */
+
+	/*
+	 * On first startup check for suspended recording.
+	 */
+	if (hfsmp->hfc_timebase == 0 &&
+	    hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) == 0) {
+		HotFilesInfo hotfileinfo;
+
+		if ((BTGetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo,
+		                   sizeof(hotfileinfo)) == 0) &&
+		    (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC) &&
+		    (SWAP_BE32 (hotfileinfo.timeleft) > 0) &&
+		    (SWAP_BE32 (hotfileinfo.timebase) > 0)) {
+			if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+				if (hfsmp->hfs_hotfile_freeblks == 0) {
+					hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+				}
+				hfsmp->hfc_maxfiles = 0x7fffffff;
+				printf("hfs: %s: %s: hotfile freeblocks: %d, max: %d\n", hfsmp->vcbVN, __FUNCTION__,
+				       hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks);
+			} else {
+				hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt);
+			}
+			hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase);
+			int timeleft = (int)SWAP_BE32(hotfileinfo.timeleft);
+			if (timeleft < 0 || timeleft > (int)(HFC_DEFAULT_DURATION*2)) {
+				// in case this field got botched, don't let it screw things up
+				// printf("hfs: hotfiles: bogus looking timeleft: %d\n", timeleft);
+				timeleft = HFC_DEFAULT_DURATION;
+			}
+			hfsmp->hfc_timeout = timeleft + tv.tv_sec ;
+			/* Fix up any bogus timebase values. */
+			if (hfsmp->hfc_timebase < HFC_MIN_BASE_TIME) {
+				hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION;
+			}
+#if HFC_VERBOSE
+			printf("hfs: Resume recording hot files on %s (%d secs left (%d); timeout %ld)\n",
+			       hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft), timeleft, hfsmp->hfc_timeout - tv.tv_sec);
+#endif
+		} else {
+			hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
+			hfsmp->hfc_timebase = tv.tv_sec + 1;
+			hfsmp->hfc_timeout = hfsmp->hfc_timebase + HFC_DEFAULT_DURATION;
+		}
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+	} else {
+		struct cat_attr cattr;
+		u_int32_t cnid;
+
+		/*
+		 * Make sure a btree file exists.
+		 */
+		cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
+		if ((cnid == 0) &&
+		    !S_ISREG(cattr.ca_mode) &&
+		    (error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT))) {
+			hfsmp->hfc_stage = HFC_IDLE;
+			wakeup((caddr_t)&hfsmp->hfc_stage);
+			return (error);
+		}
+#if HFC_VERBOSE
+		printf("hfs: begin recording hot files on %s (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+		       hfsmp->vcbVN,
+		       hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+		       hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+		hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
+		hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION;
+
+		/* Reset time base.  */
+		if (hfsmp->hfc_timebase == 0) {
+			hfsmp->hfc_timebase = tv.tv_sec + 1;
+		} else {
+			time_t cumulativebase;
+
+			cumulativebase = hfsmp->hfc_timeout - (HFC_CUMULATIVE_CYCLES * HFC_DEFAULT_DURATION);
+			hfsmp->hfc_timebase = MAX(hfsmp->hfc_timebase, cumulativebase);
+		}
+	}
+
+	if ((hfsmp->hfc_maxfiles == 0) ||
+	    (hfsmp->hfc_maxfiles > HFC_MAXIMUM_FILE_COUNT)) {
+		hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT;
+	}
+	maxentries = hfsmp->hfc_maxfiles;
+
+	size = sizeof(hotfile_data_t) + maxentries * sizeof(hotfile_entry_t);
+	hotdata = hfs_mallocz(size);
+	hotdata->size = size;
+
+	for (i = 1; i < maxentries ; i++)
+		hotdata->entries[i-1].right = &hotdata->entries[i];
+	
+	hotdata->freelist = &hotdata->entries[0];
+	/* 
+	 * Establish minimum temperature and maximum file size.
+	 */
+	hotdata->threshold = HFC_MINIMUM_TEMPERATURE;
+	hotdata->maxblocks = HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize;
+	hotdata->hfsmp = hfsmp;
+	
+	hfsmp->hfc_recdata = hotdata;
+	hfsmp->hfc_stage = HFC_RECORDING;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+	return (0);
+}
+
+/*
+ * Stop recording the hotest files on a file system.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hfs_recording_stop(struct hfsmount *hfsmp)
+{
+	hotfile_data_t *hotdata;
+	hotfilelist_t  *listp;
+	struct timeval tv;
+	size_t  size;
+	enum hfc_stage newstage = HFC_IDLE;
+	int  error;
+
+	if (hfsmp->hfc_stage != HFC_RECORDING)
+		return (EPERM);
+
+	hfsmp->hfc_stage = HFC_BUSY;
+
+	hotfiles_collect(hfsmp);
+
+
+	/*
+	 * Convert hot file data into a simple file id list....
+	 *
+	 * then dump the sample data
+	 */
+#if HFC_VERBOSE
+	printf("hfs: end of hot file recording on %s\n", hfsmp->vcbVN);
+#endif
+	hotdata = hfsmp->hfc_recdata;
+	if (hotdata == NULL)
+		return (0);
+	hfsmp->hfc_recdata = NULL;
+	hfsmp->hfc_stage = HFC_EVALUATION;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+
+#if HFC_VERBOSE
+	printf("hfs:   curentries: %d\n", hotdata->activefiles);
+#endif
+	/*
+	 * If no hot files recorded then we're done.
+	 */
+	if (hotdata->rootentry == NULL) {
+		error = 0;
+		goto out;
+	}
+
+	/* Open the B-tree file for writing... */
+	if (hfsmp->hfc_filevp)
+		panic("hfs_recording_stop: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+	if (error) {
+		goto out;
+	}
+
+	/*
+	 * Age the previous set of clustered hot files.
+	 */
+	error = hotfiles_age(hfsmp);
+	if (error) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+		goto out;
+	}
+
+	/*
+	 * Create a sorted list of hotest files.
+	 */
+	size = sizeof(hotfilelist_t);
+	size += sizeof(hotfileinfo_t) * (hotdata->activefiles - 1);
+	listp = hfs_mallocz(size);
+	listp->hfl_size = size;
+
+	hf_getsortedlist(hotdata, listp);	/* NOTE: destroys hot file tree! */
+	microtime(&tv);
+	listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase;
+	hfs_assert(!hfsmp->hfc_filelist);
+	hfsmp->hfc_filelist = listp;
+
+	/*
+	 * Account for duplicates.
+	 */
+	error = hotfiles_refine(hfsmp);
+	if (error) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+		goto out;
+	}
+
+	/*
+	 * Compute the amount of space to reclaim...
+	 */
+	if (listp->hfl_totalblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
+		listp->hfl_reclaimblks =
+			MIN(listp->hfl_totalblocks, hfsmp->hfs_hotfile_maxblks) -
+			hfsmp->hfs_hotfile_freeblks;
+#if HFC_VERBOSE
+		printf("hfs_recording_stop: need to reclaim %d blocks\n", listp->hfl_reclaimblks);
+#endif
+		if (listp->hfl_reclaimblks)
+			newstage = HFC_EVICTION;
+		else
+			newstage = HFC_ADOPTION;
+	} else {
+		newstage = HFC_ADOPTION;
+	}
+	
+	if (newstage == HFC_ADOPTION && listp->hfl_totalblocks == 0) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+		newstage = HFC_IDLE;
+	}
+out:
+#if HFC_VERBOSE
+	if (newstage == HFC_EVICTION)
+		printf("hfs: evicting coldest files\n");
+	else if (newstage == HFC_ADOPTION)
+		printf("hfs: adopting hotest files\n");
+#endif
+	hfs_free(hotdata, hotdata->size);
+
+	hfsmp->hfc_stage = newstage;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+	return (error);
+}
+
+static void
+save_btree_user_info(struct hfsmount *hfsmp)
+{
+	HotFilesInfo hotfileinfo;
+	struct timeval tv;
+
+	microtime(&tv);
+	hotfileinfo.magic       = SWAP_BE32 (HFC_MAGIC);
+	hotfileinfo.version     = SWAP_BE32 (HFC_VERSION);
+	hotfileinfo.duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
+	hotfileinfo.timebase    = SWAP_BE32 (hfsmp->hfc_timebase);
+	hotfileinfo.timeleft    = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec);
+	hotfileinfo.threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
+	hotfileinfo.maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
+	if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+		hotfileinfo.usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfs_hotfile_cur_freeblks(hfsmp));
+#if HFC_VERBOSE
+		printf("hfs: %s: saving usedblocks = %d (timeleft: %d; timeout %ld)\n", hfsmp->vcbVN, (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks),
+		       SWAP_BE32(hotfileinfo.timeleft), hfsmp->hfc_timeout);
+#endif
+	} else {
+		hotfileinfo.maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+	}
+	strlcpy((char *)hotfileinfo.tag, hfc_tag, sizeof hotfileinfo.tag);
+	(void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo));
+}
+
+/*
+ * Suspend recording the hotest files on a file system.
+ */
+int
+hfs_recording_suspend(struct hfsmount *hfsmp)
+{
+	hotfile_data_t *hotdata = NULL;
+	int  error;
+
+	if (hfsmp->hfc_stage == HFC_DISABLED)
+		return (0);
+
+	lck_mtx_lock(&hfsmp->hfc_mutex);
+
+	/*
+	 * XXX NOTE
+	 * A suspend can occur during eval/evict/adopt stage.
+	 * In that case we would need to write out info and
+	 * flush our HFBT vnode. Currently we just bail.
+	 */
+
+	hotdata = hfsmp->hfc_recdata;
+	if (hotdata == NULL || hfsmp->hfc_stage != HFC_RECORDING) {
+		error = 0;
+		goto out;
+	}
+	hfsmp->hfc_stage = HFC_BUSY;
+
+#if HFC_VERBOSE
+	printf("hfs: suspend hot file recording on %s\n", hfsmp->vcbVN);
+#endif
+	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+	if (error) {
+		printf("hfs_recording_suspend: err %d opening btree\n", error);
+		goto out;
+	}
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    goto out;
+	}
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		goto end_transaction;
+	}
+
+	save_btree_user_info(hfsmp);
+
+	hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+end_transaction:
+	hfs_end_transaction(hfsmp);
+
+out:
+	if (hfsmp->hfc_filevp) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+	}
+	if (hotdata) {
+		hfs_free(hotdata, hotdata->size);
+		hfsmp->hfc_recdata = NULL;
+	}
+	hfsmp->hfc_stage = HFC_DISABLED;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+
+	lck_mtx_unlock(&hfsmp->hfc_mutex);
+	return (error);
+}
+
+
+static void
+reset_file_ids(struct hfsmount *hfsmp, uint32_t *fileid_table, int num_ids)
+{
+	int i, error;
+
+	for(i=0; i < num_ids; i++) {
+		struct vnode *vp;
+
+		error = hfs_vget(hfsmp, fileid_table[i], &vp, 0, 0);
+		if (error) {
+			if (error == ENOENT) {
+				error = 0;
+				continue;  /* stale entry, go to next */
+			}
+			continue;
+		}
+
+		// hfs_vget returns a locked cnode so no need to lock here
+
+		if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+			error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, NULL);
+		}
+
+		/*
+		 * The updates to the catalog must be journaled
+		 */
+		hfs_start_transaction(hfsmp);
+
+		//
+		// turn off _all_ the hotfile related bits since we're resetting state
+		//
+		if (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) {
+			vnode_clearfastdevicecandidate(vp);
+		}
+
+		VTOC(vp)->c_attr.ca_recflags &= ~(kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+		VTOC(vp)->c_flag |= C_MODIFIED;
+
+		hfs_update(vp, 0);
+
+		hfs_end_transaction(hfsmp);
+		
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+	}
+}
+
+static int
+flag_hotfile(struct hfsmount *hfsmp, const char *filename)
+{
+	struct vnode *dvp = NULL, *fvp = NULL;
+	vfs_context_t ctx = vfs_context_kernel();
+	int  error=0;
+	size_t fname_len;
+	const char *orig_fname = filename;
+	
+	if (filename == NULL) {
+		return EINVAL;
+	}
+
+	fname_len = strlen(filename);    // do NOT include the trailing '\0' so that we break out of the loop below
+	
+	error = hfs_vfs_root(HFSTOVFS(hfsmp), &dvp, ctx);
+	if (error) {
+		return (error);
+	}
+
+	/* At this point, 'dvp' must be considered iocounted */
+	const char *ptr;
+	ptr = filename;
+
+	while (ptr < (orig_fname + fname_len - 1)) {
+		for(; ptr < (orig_fname + fname_len) && *ptr && *ptr != '/'; ptr++) {
+			/* just keep advancing till we reach the end of the string or a slash */
+		}
+
+		struct componentname cname = {
+			.cn_nameiop = LOOKUP,
+			.cn_flags 	= ISLASTCN,
+			.cn_pnbuf 	= __DECONST(char *, orig_fname),
+			.cn_nameptr = __DECONST(char *, filename),
+			.cn_pnlen 	= fname_len,
+			.cn_namelen = ptr - filename
+		};
+
+        struct vnop_lookup_args ap = {
+            .a_dvp      = dvp,
+            .a_vpp      = &fvp,
+            .a_cnp      = &cname,
+            .a_context  = ctx
+        };
+
+        error = hfs_vnop_lookup(&ap);
+		if (error) {
+			/*
+			 * If 'dvp' is non-NULL, then it has an iocount.  Make sure to release it
+			 * before bailing out.  VNOP_LOOKUP could legitimately return ENOENT
+			 * if the item didn't exist or if we raced with a delete.
+			 */
+			if (dvp) {
+				vnode_put(dvp);
+				dvp = NULL;
+			}
+			return error;
+		}
+
+		if (ptr < orig_fname + fname_len - 1) {
+			//
+			// we've got a multi-part pathname so drop the ref on the dir,
+			// make dvp become what we just looked up, and advance over
+			// the slash character in the pathname to get to the next part
+			// of the component
+			//
+			vnode_put(dvp);
+			dvp = fvp;
+			fvp = NULL;
+
+			filename = ++ptr;   // skip the slash character
+		}
+	}
+	
+	if (fvp == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+
+	struct cnode *cp = VTOC(fvp);
+	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+		goto out;
+	}
+
+	hfs_start_transaction(hfsmp);
+	
+	cp->c_attr.ca_recflags |= (kHFSFastDevCandidateMask|kHFSAutoCandidateMask);
+	cp->c_flag |= C_MODIFIED;
+
+	hfs_update(fvp, 0);
+
+	hfs_end_transaction(hfsmp);
+
+	hfs_unlock(cp);
+	//printf("hfs: flagged /%s with the fast-dev-candidate|auto-candidate flags\n", filename);
+
+
+out:
+	if (fvp) {
+		vnode_put(fvp);
+		fvp = NULL;
+	}
+
+	if (dvp) {
+		vnode_put(dvp);
+		dvp = NULL;
+	}
+
+	return error;
+}
+
+
+static void
+hfs_setup_default_cf_hotfiles(struct hfsmount *hfsmp)
+{
+	const char *system_default_hotfiles[] = {
+		"usr",
+		"System",
+		"Applications",
+		"private/var/db/dyld"
+	};
+	int i;
+
+	for(i=0; i < (int)(sizeof(system_default_hotfiles)/sizeof(char *)); i++) {
+		flag_hotfile(hfsmp, system_default_hotfiles[i]);
+	}
+}
+
+
+#define NUM_FILE_RESET_IDS   4096    // so we allocate 16k to hold file-ids
+
+static void
+hfs_hotfile_reset(struct hfsmount *hfsmp)
+{
+	CatalogKey * keyp;
+	CatalogRecord * datap;
+	u_int32_t  dataSize;
+	BTScanState scanstate;
+	BTreeIterator * iterator = NULL;
+	FSBufferDescriptor  record;
+	u_int32_t  data;
+	u_int32_t  cnid;
+	int error = 0;
+	uint32_t *fileids=NULL;
+	int cur_id_index = 0;
+
+	int cleared = 0;  /* debug variables */
+	int filecount = 0;
+	int dircount = 0;
+
+#if HFC_VERBOSE
+	printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
+#endif
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	fileids = hfs_malloc(NUM_FILE_RESET_IDS * sizeof(uint32_t));
+
+	record.bufferAddress = &data;
+	record.itemSize = sizeof(u_int32_t);
+	record.itemCount = 1;
+
+	/*
+	 * Get ready to scan the Catalog file.
+	 */
+	error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+	                       kCatSearchBufferSize, &scanstate);
+	if (error) {
+		printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+		goto out;
+	}
+
+	/*
+	 * Visit all the catalog btree leaf records, clearing any that have the
+	 * HotFileCached bit set.
+	 */
+	for (;;) {
+		error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
+		if (error) {
+			if (error == btNotFound)
+				error = 0;
+			else
+				printf("hfs_hotfile_reset: err %d BTScanNext\n", error);
+			break;
+		}
+
+		if (datap->recordType == kHFSPlusFolderRecord && (dataSize == sizeof(HFSPlusCatalogFolder))) {
+			HFSPlusCatalogFolder *dirp = (HFSPlusCatalogFolder *)datap;
+
+			dircount++;
+		
+			if ((dirp->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+				continue;
+			}
+
+			cnid = dirp->folderID;
+		} else if ((datap->recordType == kHFSPlusFileRecord) && (dataSize == sizeof(HFSPlusCatalogFile))) {
+			HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)datap;   
+
+			filecount++;
+
+			/*
+			 * If the file doesn't have any of the HotFileCached bits set, ignore it.
+			 */
+			if ((filep->flags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask|kHFSFastDevCandidateMask|kHFSAutoCandidateMask)) == 0) {
+				continue;
+			}
+
+			cnid = filep->fileID;
+		} else {
+			continue;
+		}
+
+		/* Skip over journal files. */
+		if (cnid == hfsmp->hfs_jnlfileid || cnid == hfsmp->hfs_jnlinfoblkid) {
+			continue;
+		}
+
+		//
+		// Just record the cnid of the file for now.  We will modify it separately
+		// because we can't modify the catalog while we're scanning it.
+		//
+		fileids[cur_id_index++] = cnid;
+		if (cur_id_index >= NUM_FILE_RESET_IDS) {
+			//
+			// We're over the limit of file-ids so we have to terminate this
+			// scan, go modify all the catalog records, then restart the scan.
+			// This is required because it's not permissible to modify the
+			// catalog while scanning it.
+			//
+			(void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+			reset_file_ids(hfsmp, fileids, cur_id_index);
+			cleared += cur_id_index;
+			cur_id_index = 0;
+
+			// restart the scan
+			error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+						 kCatSearchBufferSize, &scanstate);
+			if (error) {
+				printf("hfs_hotfile_reset: err %d BTScanInit\n", error);
+				goto out;
+			}
+			continue;
+		}
+	}
+
+	if (cur_id_index) {
+		reset_file_ids(hfsmp, fileids, cur_id_index);
+		cleared += cur_id_index;
+		cur_id_index = 0;
+	}
+
+	printf("hfs: cleared HotFileCache related bits on %d files out of %d (dircount %d)\n", cleared, filecount, dircount);
+
+	(void) BTScanTerminate(&scanstate, &data, &data, &data);
+
+out:	
+	hfs_free(fileids, NUM_FILE_RESET_IDS * sizeof(uint32_t));
+	hfs_free(iterator, sizeof(*iterator));
+
+	//
+	// If the hotfile btree exists, delete it.  We need to open
+	// it to be able to delete it because we need the hfc_filevp
+	// for deletion.
+	//
+	error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+	if (!error) {
+		printf("hfs: hotfile_reset: deleting existing hotfile btree\n");
+		hfc_btree_delete(hfsmp);
+	}
+	
+	if (hfsmp->hfc_filevp) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+	}
+
+	hfsmp->hfs_hotfile_blk_adjust = 0;
+	hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+}
+
+
+//
+// This should ONLY be called by hfs_recording_init() and the special fsctl.
+//
+// We assume that the hotfile btree is already opened.
+//
+static int
+hfs_hotfile_repin_files(struct hfsmount *hfsmp)
+{
+	BTreeIterator * iterator = NULL;
+	HotFileKey * key;
+	filefork_t * filefork;
+	int  error = 0;
+	int  bt_op;
+	enum hfc_stage stage;
+	uint32_t pinned_blocks;
+	uint32_t num_files=0, nrsrc=0;
+	uint32_t total_pinned=0;
+
+	if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || !hfsmp->hfc_filevp) {
+		//
+		// this is only meaningful if we're pinning hotfiles
+		// (as opposed to the regular form of hotfiles that
+		// get relocated to the hotfile zone)
+		//
+		return 0;
+	}
+
+#if HFC_VERBOSE
+	printf("hfs: %s: %s\n", hfsmp->vcbVN, __FUNCTION__);
+#endif
+	
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		return (EPERM);
+	}
+
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	stage = hfsmp->hfc_stage;
+	hfsmp->hfc_stage = HFC_BUSY;
+
+	bt_op = kBTreeFirstRecord;
+
+	key = (HotFileKey*) &iterator->key;
+
+	filefork = VTOF(hfsmp->hfc_filevp);
+	int lockflags;
+
+	while (1) {
+
+		lockflags = 0;
+		/*
+		 * Obtain the first record (ie the coldest one).
+		 */
+		if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
+			// no more records
+			error = 0;
+			break;
+		}
+		if (key->keyLength != HFC_KEYLENGTH) {
+			// printf("hfs: hotfiles_repin_files: invalid key length %d\n", key->keyLength);
+			error = EFTYPE;
+			break;
+		}		
+		if (key->temperature == HFC_LOOKUPTAG) {
+			// ran into thread records in the hotfile btree
+			error = 0;
+			break;
+		}
+
+        //
+		// Just lookup the records in the catalog and pin the direct
+		// mapped extents.  Faster than instantiating full vnodes
+		// (and thereby thrashing the system vnode cache).
+		//
+		struct cat_desc fdesc;
+		struct cat_attr attr;
+		struct cat_fork fork;
+        uint8_t forktype = 0;
+
+		lockflags = hfs_systemfile_lock(hfsmp, (SFL_CATALOG | SFL_EXTENTS), HFS_SHARED_LOCK);
+        /*
+         * Snoop the cnode hash to find out if the item we want is in-core already.
+         *
+         * We largely expect this function to fail (the items we want are probably not in the hash).
+         * we use the special variant which bails out as soon as it finds a vnode (even if it is
+         * marked as open-unlinked or actually removed on-disk.  If we find a vnode, then we
+         * release the systemfile locks and go through the pin-vnode path instead.
+         */
+        if (hfs_chash_snoop (hfsmp, key->fileID, 1, NULL, NULL) == 0) {
+            pinned_blocks = 0;
+
+            /* unlock immediately and go through the in-core path */
+            hfs_systemfile_unlock(hfsmp, lockflags);
+			lockflags = 0;
+
+            error = hfs_getvnode_and_pin (hfsmp, key->fileID, &pinned_blocks);
+            if (error) {
+                /* if ENOENT, then it was deleted in the catalog. Remove from our hotfiles tracking */
+                if (error == ENOENT) {
+                    hfc_btree_delete_record(hfsmp, iterator, key);
+                }
+                /* other errors, just ignore and move on with life */
+            }
+            else { //!error
+                total_pinned += pinned_blocks;
+                num_files++;
+            }
+
+            goto next;
+        }
+
+        /* If we get here, we're still holding the systemfile locks */
+		error = cat_idlookup(hfsmp, key->fileID, 1, 0, &fdesc, &attr, &fork);
+		if (error) {
+			//
+			// this file system could have been mounted while booted from a
+			// different partition and thus the hotfile btree would not have
+			// been maintained.  thus a file that was hotfile cached could
+			// have been deleted while booted from a different partition which
+			// means we need to delete it from the hotfile btree.
+			//
+			// block accounting is taken care of at the end: we re-assign
+			// hfsmp->hfs_hotfile_freeblks based on how many blocks we actually
+			// pinned.
+			//
+			hfc_btree_delete_record(hfsmp, iterator, key);
+
+			goto next;
+		}
+
+		if (fork.cf_size == 0) {
+			// hmmm, the data is probably in the resource fork (aka a compressed file)
+			error = cat_idlookup(hfsmp, key->fileID, 1, 1, &fdesc, &attr, &fork);
+			if (error) {
+				hfc_btree_delete_record(hfsmp, iterator, key);
+				goto next;
+			}
+            forktype = 0xff;
+			nrsrc++;
+		}
+
+		pinned_blocks = 0;
+
+        /* Can't release the catalog /extents lock yet, we may need to go find the overflow blocks */
+        error = hfs_pin_extent_record (hfsmp, fork.cf_extents, &pinned_blocks);
+        if (error) {
+            goto next;  //skip to next
+        }
+		/* add in the blocks from the inline 8 */
+        total_pinned += pinned_blocks;
+        pinned_blocks = 0;
+
+        /* Could this file have overflow extents? */
+        if (fork.cf_extents[kHFSPlusExtentDensity-1].startBlock) {
+            /* better pin them, too */
+            error = hfs_pin_overflow_extents (hfsmp, key->fileID, forktype, &pinned_blocks);
+            if (error) {
+				/* If we fail to pin all of the overflow extents, then just skip to the next file */
+                goto next;
+            }
+        }
+
+		num_files++;
+        if (pinned_blocks) {
+            /* now add in any overflow also */
+            total_pinned += pinned_blocks;
+        }
+
+	next:
+		if (lockflags) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			lockflags = 0;
+		}
+		bt_op = kBTreeNextRecord;
+
+	} /* end while */
+
+#if HFC_VERBOSE
+	printf("hfs: hotfiles_repin_files: re-pinned %d files (nrsrc %d, total pinned %d blks; freeblock %d, maxblocks %d, calculated free: %d)\n",
+	       num_files, nrsrc, total_pinned, hfsmp->hfs_hotfile_freeblks, hfsmp->hfs_hotfile_maxblks,
+	      hfsmp->hfs_hotfile_maxblks - total_pinned);
+#endif
+	//
+	// make sure this is accurate based on how many blocks we actually pinned
+	//
+	hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - total_pinned;
+
+	hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+	hfs_free(iterator, sizeof(*iterator));	
+	hfsmp->hfc_stage = stage;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+	return (error);
+}
+
+void
+hfs_repin_hotfiles(struct hfsmount *hfsmp)
+{
+	int error, need_close;
+	
+	lck_mtx_lock(&hfsmp->hfc_mutex);
+
+	if (hfsmp->hfc_filevp == NULL) {
+		error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+		if (!error) {
+			need_close = 1;
+		} else {
+			printf("hfs: failed to open the btree err=%d.  Unable to re-pin hotfiles.\n", error);
+			lck_mtx_unlock(&hfsmp->hfc_mutex);
+			return;
+		}
+	} else {
+		need_close = 0;
+	}
+
+	hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL);
+			
+	hfs_hotfile_repin_files(hfsmp);
+
+	if (need_close) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+	}
+
+	lck_mtx_unlock(&hfsmp->hfc_mutex);
+}
+
+/*
+ * For a given file ID, find and pin all of its overflow extents to the underlying CS
+ * device.  Assumes that the extents overflow b-tree is locked for the duration of this call.
+ *
+ * Emit the number of blocks pinned in output argument 'pinned'
+ *
+ * Return success or failure (errno) in return value.
+ *
+ */
+int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+                                     uint8_t forktype, uint32_t *pinned) {
+
+    struct BTreeIterator *ext_iter = NULL;
+    ExtentKey *ext_key_ptr = NULL;
+    ExtentRecord ext_data;
+    FSBufferDescriptor btRecord;
+    uint16_t btRecordSize;
+    int error = 0;
+
+    uint32_t pinned_blocks = 0;
+
+
+    ext_iter = hfs_mallocz(sizeof (*ext_iter));
+
+    BTInvalidateHint (ext_iter);
+    ext_key_ptr = (ExtentKey*)&ext_iter->key;
+    btRecord.bufferAddress = &ext_data;
+    btRecord.itemCount = 1;
+
+    /*
+     * This is like when you delete a file; we don't actually need most of the search machinery because
+     * we are going to need all of the extent records that belong to this file (for a given fork type),
+     * so we might as well use a straight-up iterator.
+     *
+     * Position the B-Tree iterator at the first record with this file ID
+     */
+    btRecord.itemSize = sizeof (HFSPlusExtentRecord);
+    ext_key_ptr->hfsPlus.keyLength = kHFSPlusExtentKeyMaximumLength;
+    ext_key_ptr->hfsPlus.forkType = forktype;
+    ext_key_ptr->hfsPlus.pad = 0;
+    ext_key_ptr->hfsPlus.fileID = fileid;
+    ext_key_ptr->hfsPlus.startBlock = 0;
+
+    error = BTSearchRecord (VTOF(hfsmp->hfs_extents_vp), ext_iter, &btRecord, &btRecordSize, ext_iter);
+    if (error ==  btNotFound) {
+        /* empty b-tree, so that's ok. we'll fall out during error check below. */
+        error = 0;
+    }
+
+    while (1) {
+        uint32_t found_fileid;
+        uint32_t pblocks;
+
+        error = BTIterateRecord (VTOF(hfsmp->hfs_extents_vp), kBTreeNextRecord, ext_iter, &btRecord, &btRecordSize);
+        if (error) {
+            /* swallow it if it's btNotFound, otherwise just bail out */
+            if (error == btNotFound)
+                error = 0;
+            break;
+        }
+
+        found_fileid = ext_key_ptr->hfsPlus.fileID;
+        /*
+         * We only do one fork type at a time. So if either the fork-type doesn't
+         * match what we are looking for (resource or data), OR the file id doesn't match
+         * which indicates that there's nothing more with this file ID as the key, then bail out
+         */
+        if ((found_fileid != fileid) || (ext_key_ptr->hfsPlus.forkType != forktype))  {
+            error = 0;
+            break;
+        }
+
+        /* Otherwise, we now have an extent record. Process and pin all of the file extents. */
+        pblocks = 0;
+        error = hfs_pin_extent_record (hfsmp, ext_data.hfsPlus, &pblocks);
+
+        if (error) {
+            break;
+        }
+        pinned_blocks += pblocks;
+
+        /* if 8th extent is empty, then bail out */
+        if (ext_data.hfsPlus[kHFSPlusExtentDensity-1].startBlock == 0) {
+            error = 0;
+            break;
+        }
+
+    } // end extent-getting loop
+
+    /* dump the iterator */
+    hfs_free(ext_iter, sizeof(*ext_iter));
+
+    if (error == 0) {
+        /*
+         * In the event that the file has no overflow extents, pinned_blocks
+         * will never be updated, so we'll properly export 0 pinned blocks to caller
+         */
+        *pinned = pinned_blocks;
+    }
+
+    return error;
+
+}
+
+
+static int
+hfs_getvnode_and_pin (struct hfsmount *hfsmp, uint32_t fileid, uint32_t *pinned) {
+    struct vnode *vp;
+    int error = 0;
+    *pinned = 0;
+    uint32_t pblocks;
+
+    /*
+     * Acquire the vnode for this file.  This returns a locked cnode on success
+     */
+    error = hfs_vget(hfsmp, fileid, &vp, 0, 0);
+    if (error) {
+        /* It's possible the file was open-unlinked. In this case, we'll get ENOENT back. */
+        return error;
+    }
+
+    /*
+     * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck
+     * here.  We do not want to move them.
+     */
+    if (!vnode_isreg(vp)) {
+        hfs_unlock(VTOC(vp));
+        vnode_put(vp);
+        return EPERM;
+    }
+
+    if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+        hfs_unlock(VTOC(vp));
+        vnode_put(vp);
+        return EINVAL;
+    }
+
+    error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pblocks);
+    if (error == 0) {
+        *pinned = pblocks;
+    }
+
+    hfs_unlock(VTOC(vp));
+    vnode_put(vp);
+
+    return error;
+
+}
+
+/*
+ * Pins an HFS Extent record to the underlying CoreStorage.  Assumes that Catalog & Extents overflow
+ * B-trees are held locked, as needed.
+ *
+ * Returns the number of blocks pinned in the output argument 'pinned'
+ *
+ * Returns error status (0 || errno) in return value.
+ */
+static int hfs_pin_extent_record (struct hfsmount *hfsmp, HFSPlusExtentRecord extents, uint32_t *pinned) {
+    uint32_t pb = 0;
+    int i;
+    int error = 0;
+
+	if (pinned == NULL) {
+		return EINVAL;
+	}
+    *pinned = 0;
+
+
+
+	/* iterate through the extents */
+	for ( i = 0; i < kHFSPlusExtentDensity; i++) {
+		if (extents[i].startBlock == 0) {
+			break;
+		}
+
+		error = hfs_pin_block_range (hfsmp, HFS_PIN_IT, extents[i].startBlock,
+				extents[i].blockCount);
+
+		if (error) {
+			break;
+		}
+		pb += extents[i].blockCount;
+	}
+
+    *pinned = pb;
+
+	return error;
+}
+
+/*
+ * Consume an HFS Plus on-disk catalog record and pin its blocks
+ * to the underlying CS devnode.
+ *
+ * NOTE: This is an important distinction!
+ * This function takes in an HFSPlusCatalogFile* which is the actual
+ * 200-some-odd-byte on-disk representation in the Catalog B-Tree (not
+ * one of the run-time structs that we normally use.
+ *
+ * This assumes that the catalog and extents-overflow btrees
+ * are locked, at least in shared mode
+ */
+static int hfs_pin_catalog_rec (struct hfsmount *hfsmp, HFSPlusCatalogFile *cfp, int rsrc) {
+	uint32_t pinned_blocks = 0;
+	HFSPlusForkData *forkdata;
+	int error = 0;
+	uint8_t forktype = 0;
+
+	if (rsrc) {
+        forkdata = &cfp->resourceFork;
+		forktype = 0xff;
+	}
+	else {
+		forkdata = &cfp->dataFork;
+	}
+
+	uint32_t pblocks = 0;
+
+	/* iterate through the inline extents */
+	error = hfs_pin_extent_record (hfsmp, forkdata->extents, &pblocks);
+	if (error) {
+        return error;
+	}
+
+	pinned_blocks += pblocks;
+    pblocks = 0;
+
+	/* it may have overflow extents */
+	if (forkdata->extents[kHFSPlusExtentDensity-1].startBlock != 0) {
+        error = hfs_pin_overflow_extents (hfsmp, cfp->fileID, forktype, &pblocks);
+	}
+    pinned_blocks += pblocks;
+
+	hfsmp->hfs_hotfile_freeblks -= pinned_blocks;
+
+	return error;
+}
+
+
+/*
+ *
+ */
+int
+hfs_recording_init(struct hfsmount *hfsmp)
+{
+	CatalogKey * keyp;
+	CatalogRecord * datap;
+	u_int32_t  dataSize;
+	HFSPlusCatalogFile *filep;
+	BTScanState scanstate;
+	BTreeIterator * iterator = NULL;
+	FSBufferDescriptor  record;
+	HotFileKey * key;
+	filefork_t * filefork;
+	u_int32_t  data;
+	struct cat_attr cattr;
+	u_int32_t  cnid;
+	int error = 0;
+	long starting_temp;
+
+	int started_tr = 0;
+	int started_scan = 0;
+
+	int inserted = 0;  /* debug variables */
+	int filecount = 0;
+	int uncacheable = 0;
+
+	/*
+	 * For now, only the boot volume is supported.
+	 */
+	if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) {
+		hfsmp->hfc_stage = HFC_DISABLED;
+		return (EPERM);
+	}
+
+	/* We grab the HFC mutex even though we're not fully mounted yet, just for orderliness */
+	lck_mtx_lock (&hfsmp->hfc_mutex);
+
+	/*
+	 * Tracking of hot files requires up-to-date access times.
+	 * So if access time updates are disabled, then we disable
+	 * hot files, too.
+	 */
+	if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_NOATIME) {
+		hfsmp->hfc_stage = HFC_DISABLED;
+		lck_mtx_unlock (&hfsmp->hfc_mutex);
+		return EPERM;
+	}
+	
+	//
+	// Check if we've been asked to suspend operation
+	//
+	cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-suspend", &cattr, NULL);
+	if (cnid != 0) {
+		printf("hfs: %s: %s: hotfiles explicitly disabled!  remove /.hotfiles-suspend to re-enable\n", hfsmp->vcbVN, __FUNCTION__);
+		hfsmp->hfc_stage = HFC_DISABLED;
+		lck_mtx_unlock (&hfsmp->hfc_mutex);
+		return EPERM;
+	}
+
+	//
+	// Check if we've been asked to reset our state.
+	//
+	cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, ".hotfile-reset", &cattr, NULL);
+	if (cnid != 0) {
+		hfs_hotfile_reset(hfsmp);
+	}
+
+	if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+		//
+		// Cooperative Fusion (CF) systems use different constants 
+		// than traditional hotfile systems.  These were picked after a bit of
+		// experimentation - we can cache many more files on the
+		// ssd in an CF system and we can do so more rapidly
+		// so bump the limits considerably (and turn down the
+		// duration so that it doesn't take weeks to adopt all
+		// the files).
+		//
+		hfc_default_file_count = 20000;
+		hfc_default_duration   = 300;    // 5min
+		hfc_max_file_count     = 50000;
+		hfc_max_file_size      = (512ULL * 1024ULL * 1024ULL);
+	}
+
+	/*
+	 * If the Hot File btree exists then metadata zone is ready.
+	 */
+	cnid = GetFileInfo(HFSTOVCB(hfsmp), kRootDirID, HFC_FILENAME, &cattr, NULL);
+	if (cnid != 0 && S_ISREG(cattr.ca_mode)) {
+		int recreate = 0;
+		
+		if (hfsmp->hfc_stage == HFC_DISABLED)
+			hfsmp->hfc_stage = HFC_IDLE;
+		hfsmp->hfs_hotfile_freeblks = 0;
+
+		if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && cattr.ca_blocks > 0) {
+			//
+			// make sure the hotfile btree is pinned
+			//
+			error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+			if (!error) {
+				/* XXX: must fix hfs_pin_vnode too */
+				hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL);
+				
+			} else {
+				printf("hfs: failed to open the btree err=%d.  Recreating hotfile btree.\n", error);
+				recreate = 1;
+			}
+			
+			hfs_hotfile_repin_files(hfsmp);
+
+			if (hfsmp->hfc_filevp) {
+				(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+				hfsmp->hfc_filevp = NULL;
+			}
+
+		} else if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			// hmmm, the hotfile btree is zero bytes long?  how odd.  let's recreate it.
+			printf("hfs: hotfile btree is zero bytes long?!  recreating it.\n");
+			recreate = 1;
+		}
+
+		if (!recreate) {
+			/* don't forget to unlock the mutex */
+			lck_mtx_unlock (&hfsmp->hfc_mutex);
+			return (0);
+		} else {
+			//
+			// open the hotfile btree file ignoring errors because
+			// we need the vnode pointer for hfc_btree_delete() to
+			// be able to do its work
+			//
+			error = hfc_btree_open_ext(hfsmp, &hfsmp->hfc_filevp, 1);
+			if (!error) {
+				// and delete it!
+				error = hfc_btree_delete(hfsmp);
+				(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+				hfsmp->hfc_filevp = NULL;
+			}
+		}
+	}
+
+	printf("hfs: %s: %s: creating the hotfile btree\n", hfsmp->vcbVN, __FUNCTION__);
+	if (hfs_start_transaction(hfsmp) != 0) {
+		lck_mtx_unlock (&hfsmp->hfc_mutex);
+		return EINVAL;
+	}
+
+	/* B-tree creation must be journaled */
+	started_tr = 1;
+
+	error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
+	if (error) {
+#if HFC_VERBOSE
+		printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+		goto recording_init_out;
+	}
+
+	hfs_end_transaction (hfsmp);
+	started_tr = 0;
+	/*
+	 * Do a journal flush + flush track cache. We have to ensure that the async I/Os have been issued to the media
+	 * before proceeding.
+	 */
+	hfs_flush (hfsmp, HFS_FLUSH_FULL);
+
+	/* now re-start a new transaction */
+	if (hfs_start_transaction (hfsmp) != 0) {
+		lck_mtx_unlock (&hfsmp->hfc_mutex);
+		return EINVAL;
+	}
+	started_tr = 1;
+
+	/*
+	 * Open the Hot File B-tree file for writing.
+	 */
+	if (hfsmp->hfc_filevp)
+		panic("hfs_recording_init: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+	error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp);
+	if (error) {
+#if HFC_VERBOSE
+		printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
+#endif
+		goto recording_init_out;
+	}
+
+	/*
+	 * This function performs work similar to namei; we must NOT hold the catalog lock while
+	 * calling it. This will decorate catalog records as being pinning candidates. (no hotfiles work)
+	 */
+	hfs_setup_default_cf_hotfiles(hfsmp);
+
+	/*
+	 * now grab the hotfiles b-tree vnode/cnode lock first, as it is not classified as a systemfile.
+	 */
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		error = EPERM;
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		/* zero it out to avoid pinning later on */
+		hfsmp->hfc_filevp = NULL;
+		goto recording_init_out;
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	key = (HotFileKey*) &iterator->key;
+	key->keyLength = HFC_KEYLENGTH;
+
+	record.bufferAddress = &data;
+	record.itemSize = sizeof(u_int32_t);
+	record.itemCount = 1;
+
+#if HFC_VERBOSE
+	printf("hfs: Evaluating space for \"%s\" metadata zone... (freeblks %d)\n", HFSTOVCB(hfsmp)->vcbVN,
+	       hfsmp->hfs_hotfile_freeblks);
+#endif
+
+	/*
+	 * Get ready to scan the Catalog file. We explicitly do NOT grab the catalog lock because
+	 * we're fully single-threaded at the moment (by virtue of being called during mount()),
+	 * and if we have to grow the hotfile btree, then we would need to grab the catalog lock
+	 * and if we take a shared lock here, it would deadlock (see <rdar://problem/21486585>)
+	 *
+	 * We already started a transaction so we should already be holding the journal lock at this point.
+	 * Note that we have to hold the journal lock / start a txn BEFORE the systemfile locks.
+	 */
+
+	error = BTScanInitialize(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), 0, 0, 0,
+	                       kCatSearchBufferSize, &scanstate);
+	if (error) {
+		printf("hfs_recording_init: err %d BTScanInit\n", error);
+
+		/* drop the systemfile locks */
+		hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+
+		/* zero it out to avoid pinning */
+		hfsmp->hfc_filevp = NULL;
+		goto recording_init_out;
+	}
+
+	started_scan = 1;
+
+	filefork = VTOF(hfsmp->hfc_filevp);
+
+	starting_temp = random() % HF_TEMP_RANGE;
+
+	/*
+	 * Visit all the catalog btree leaf records. We have to hold the catalog lock to do this.
+	 *
+	 * NOTE: The B-Tree scanner reads from the media itself. Under normal circumstances it would be
+	 * fine to simply use b-tree routines to read blocks that correspond to b-tree nodes, because the
+	 * block cache is going to ensure you always get the cached copy of a block (even if a journal
+	 * txn has modified one of those blocks).  That is NOT true when
+	 * using the scanner.  In particular, it will always read whatever is on-disk. So we have to ensure
+	 * that the journal has flushed and that the async I/Os to the metadata files have been issued.
+	 */
+	for (;;) {
+		error = BTScanNextRecord(&scanstate, 0, (void **)&keyp, (void **)&datap, &dataSize);
+		if (error) {
+			if (error == btNotFound)
+				error = 0;
+			else
+				printf("hfs_recording_init: err %d BTScanNext\n", error);
+			break;
+		}
+		if ((datap->recordType != kHFSPlusFileRecord) ||
+		    (dataSize != sizeof(HFSPlusCatalogFile))) {
+			continue;
+		}
+		filep = (HFSPlusCatalogFile *)datap;
+		filecount++;
+
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			if (filep->flags & kHFSDoNotFastDevPinMask) {
+				uncacheable++;
+			}
+
+			//
+			// If the file does not have the FastDevPinnedMask set, we
+			// can ignore it and just go to the next record.
+			//
+			if ((filep->flags & kHFSFastDevPinnedMask) == 0) {
+				continue;
+			}
+		} else if (filep->dataFork.totalBlocks == 0) {
+			continue;
+		}
+
+		/*
+		 * On a regular hdd, any file that has blocks inside
+		 * the hot file space is recorded for later eviction.
+		 *
+		 * For now, resource forks are ignored.
+		 *
+		 * We don't do this on CF systems as there is no real
+		 * hotfile area - we just pin/unpin blocks belonging to
+		 * interesting files.
+		 */
+		if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && !hotextents(hfsmp, &filep->dataFork.extents[0])) {
+			continue;
+		}
+		cnid = filep->fileID;
+
+		/* Skip over journal files and the hotfiles B-Tree file. */
+		if (cnid == hfsmp->hfs_jnlfileid
+			|| cnid == hfsmp->hfs_jnlinfoblkid
+			|| cnid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
+			continue;
+		}
+		/*
+		 * XXX - need to skip quota files as well.
+		 */
+
+		uint32_t temp;
+
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			int rsrc = 0;
+
+			temp = (uint32_t)starting_temp++;
+			if (filep->flags & kHFSAutoCandidateMask) {
+				temp += MAX_NORMAL_TEMP;
+			}
+
+			/* use the data fork by default */
+			if (filep->dataFork.totalBlocks == 0) {
+				/*
+                 * but if empty, switch to rsrc as its likely
+                 * a compressed file
+                 */
+				rsrc = 1;
+			}
+
+			error =  hfs_pin_catalog_rec (hfsmp, filep, rsrc);
+			if (error)
+				break;
+
+		} else {
+			temp = HFC_MINIMUM_TEMPERATURE;
+		}
+
+		/* Insert a hot file entry. */
+		key->keyLength   = HFC_KEYLENGTH;
+		key->temperature = temp;
+		key->fileID      = cnid;
+		key->forkType    = 0;
+		data = 0x3f3f3f3f;
+		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+		if (error) {
+			printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+			error = MacToVFSError(error);
+			break;
+		}
+
+		/* Insert the corresponding thread record. */
+		key->keyLength = HFC_KEYLENGTH;
+		key->temperature = HFC_LOOKUPTAG;
+		key->fileID = cnid;
+		key->forkType = 0;
+		data = temp;
+		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+		if (error) {
+			printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID);
+			error = MacToVFSError(error);
+			break;
+		}
+		inserted++;
+	} // end catalog iteration loop
+
+	save_btree_user_info(hfsmp);
+	(void) BTFlushPath(filefork);
+
+recording_init_out:
+
+	/* Unlock first, then pin after releasing everything else */
+	if (hfsmp->hfc_filevp) {
+		hfs_unlock (VTOC(hfsmp->hfc_filevp));
+	}
+
+	if (started_scan) {
+		(void) BTScanTerminate (&scanstate, &data, &data, &data);
+	}
+
+	if (started_tr) {
+		hfs_end_transaction(hfsmp);
+	}
+
+#if HFC_VERBOSE
+	printf("hfs: %d files identified out of %d (freeblocks is now: %d)\n", inserted, filecount, hfsmp->hfs_hotfile_freeblks);
+	if (uncacheable) {
+		printf("hfs: %d files were marked as uncacheable\n", uncacheable);
+	}
+#endif
+	
+	if (iterator)
+		hfs_free(iterator, sizeof(*iterator));
+
+	if (hfsmp->hfc_filevp) {
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			hfs_pin_vnode(hfsmp, hfsmp->hfc_filevp, HFS_PIN_IT, NULL);
+		}
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+	}
+
+	if (error == 0)
+		hfsmp->hfc_stage = HFC_IDLE;
+
+	/* Finally, unlock the HFC mutex */
+	lck_mtx_unlock (&hfsmp->hfc_mutex);
+
+	return (error);
+}
+
+/*
+ * Use sync to perform ocassional background work.
+ */
+int
+hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
+{
+	if (hfsmp->hfc_stage) {
+		struct timeval tv;
+
+		lck_mtx_lock(&hfsmp->hfc_mutex);
+
+		switch (hfsmp->hfc_stage) {
+		case HFC_IDLE:
+			(void) hfs_recording_start(hfsmp);
+			break;
+	
+		case HFC_RECORDING:
+			microtime(&tv);
+			if (tv.tv_sec > hfsmp->hfc_timeout)
+				(void) hfs_recording_stop(hfsmp);
+			break;
+	
+		case HFC_EVICTION:
+			(void) hotfiles_evict(hfsmp, ctx);
+			break;
+	
+		case HFC_ADOPTION:
+			(void) hotfiles_adopt(hfsmp);
+			break;
+		default:
+			break;
+		}
+
+		lck_mtx_unlock(&hfsmp->hfc_mutex);
+	}
+	return (0);
+}
+
+/*
+ * Add a hot file to the recording list.
+ *
+ * This can happen when a hot file gets reclaimed or at the
+ * end of the recording period for any active hot file.
+ *
+ * NOTE: Since both the data and resource fork can  be hot,
+ * there can be two entries for the same file id.
+ *
+ * Note: the cnode is locked on entry.
+ */
+int
+hfs_addhotfile(struct vnode *vp)
+{
+	hfsmount_t *hfsmp;
+	int error;
+
+	hfsmp = VTOHFS(vp);
+	if (hfsmp->hfc_stage != HFC_RECORDING)
+		return (0);
+
+	lck_mtx_lock(&hfsmp->hfc_mutex);
+	error = hfs_addhotfile_internal(vp);
+	lck_mtx_unlock(&hfsmp->hfc_mutex);
+	return (error);
+}
+
+static int
+hf_ignore_process(const char *pname, size_t maxlen)
+{
+	if (   strncmp(pname, "mds", maxlen) == 0
+	    || strncmp(pname, "mdworker", maxlen) == 0
+	    || strncmp(pname, "mds_stores", maxlen) == 0
+	    || strncmp(pname, "makewhatis", maxlen) == 0) {
+		return 1;
+	}
+
+	return 0;
+	
+}
+
+static int
+hfs_addhotfile_internal(struct vnode *vp)
+{
+	hotfile_data_t *hotdata;
+	hotfile_entry_t *entry;
+	hfsmount_t *hfsmp;
+	cnode_t *cp;
+	filefork_t *ffp;
+	u_int32_t temperature;
+
+	hfsmp = VTOHFS(vp);
+	if (hfsmp->hfc_stage != HFC_RECORDING)
+		return (0);
+
+	/* 
+	 * Only regular files are eligible for hotfiles addition. 
+	 * 
+	 * Symlinks were previously added to the list and may exist in 
+	 * extant hotfiles regions, but no new ones will be added, and no
+	 * symlinks will now be relocated/evicted from the hotfiles region.
+	 */
+	if (!vnode_isreg(vp) || vnode_issystem(vp)) {
+		return (0);
+	}
+
+	/* Skip resource forks for now. */
+	if (VNODE_IS_RSRC(vp)) {
+		return (0);
+	}
+	if ((hotdata = hfsmp->hfc_recdata) == NULL) {
+		return (0);
+	}
+	ffp = VTOF(vp);
+	cp = VTOC(vp);
+
+	if (cp->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask)) {
+		// it's already a hotfile or can't be a hotfile...
+		return 0;
+	}
+
+	if (vnode_isdir(vp) || vnode_issystem(vp) || (cp->c_flag & (C_DELETED | C_NOEXISTS))) {
+		return 0;
+	}
+
+	if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && vnode_isfastdevicecandidate(vp)) {
+		//
+		// On cooperative fusion (CF) systems we have different criteria for whether something
+		// can be pinned to the ssd.
+		//
+		if (cp->c_flag & (C_DELETED|C_NOEXISTS)) {
+			//
+			// dead files are definitely not worth caching
+			//
+			return 0;
+		} else if (ffp->ff_blocks == 0 && !(cp->c_bsdflags & UF_COMPRESSED) && !(cp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) {
+			//
+			// empty files aren't worth caching but compressed ones might be, as are 
+			// newly created files that live in WorthCaching directories... 
+			//
+			return 0;
+		}
+
+		char pname[256];
+		pname[0] = '\0';
+		proc_selfname(pname, sizeof(pname));
+		if (hf_ignore_process(pname, sizeof(pname))) {
+			// ignore i/o's from certain system daemons 
+			return 0;
+		}
+
+		temperature = cp->c_fileid;        // in memory we just keep it sorted by file-id
+	} else {
+		// the normal hard drive based hotfile checks
+		if ((ffp->ff_bytesread == 0) ||
+		    (ffp->ff_blocks == 0) ||
+		    (ffp->ff_size == 0) ||
+		    (ffp->ff_blocks > hotdata->maxblocks) ||
+		    (cp->c_bsdflags & (UF_NODUMP | UF_COMPRESSED)) ||
+		    (cp->c_atime < hfsmp->hfc_timebase)) {
+			return (0);
+		}
+
+		temperature = ffp->ff_bytesread / ffp->ff_size;
+		if (temperature < hotdata->threshold) {
+			return (0);
+		}
+	}
+
+	/*
+	 * If there is room or this file is hotter than
+	 * the coldest one then add it to the list.
+	 *
+	 */
+	if ((hotdata->activefiles < hfsmp->hfc_maxfiles) ||
+	    (hotdata->coldest == NULL) ||
+	    (temperature >= hotdata->coldest->temperature)) {
+		++hotdata->refcount;
+		entry = hf_getnewentry(hotdata);
+		entry->temperature = temperature;
+		entry->fileid = cp->c_fileid;
+		//
+		// if ffp->ff_blocks is zero, it might be compressed so make sure we record
+		// that there's at least one block.
+		//
+		entry->blocks = ffp->ff_blocks ? ffp->ff_blocks : 1;   
+		if (hf_insert(hotdata, entry) == EEXIST) {
+			// entry is already present, don't need to add it again
+			entry->right = hotdata->freelist;
+			hotdata->freelist = entry;
+		}
+		--hotdata->refcount;
+	}
+
+	return (0);
+}
+
+/*
+ * Remove a hot file from the recording list.
+ *
+ * This can happen when a hot file becomes
+ * an active vnode (active hot files are
+ * not kept in the recording list until the
+ * end of the recording period).
+ *
+ * Note: the cnode is locked on entry.
+ */
+int
+hfs_removehotfile(struct vnode *vp)
+{
+	hotfile_data_t *hotdata;
+	hfsmount_t *hfsmp;
+	cnode_t *cp;
+	filefork_t *ffp;
+	u_int32_t temperature;
+
+	hfsmp = VTOHFS(vp);
+	if (hfsmp->hfc_stage != HFC_RECORDING)
+		return (0);
+
+	if ((!vnode_isreg(vp)) || vnode_issystem(vp)) {
+		return (0);
+	}
+
+	ffp = VTOF(vp);
+	cp = VTOC(vp);
+
+	if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) ||
+	    (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) {
+		return (0);
+	}
+
+	lck_mtx_lock(&hfsmp->hfc_mutex);
+	if (hfsmp->hfc_stage != HFC_RECORDING)
+		goto out;
+	if ((hotdata = hfsmp->hfc_recdata) == NULL)
+		goto out;
+
+	temperature = ffp->ff_bytesread / ffp->ff_size;
+	if (temperature < hotdata->threshold)
+		goto out;
+
+	if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) {
+		++hotdata->refcount;
+		hf_delete(hotdata, VTOC(vp)->c_fileid, temperature);
+		--hotdata->refcount;
+	}
+out:
+	lck_mtx_unlock(&hfsmp->hfc_mutex);
+	return (0);
+}
+
+int
+hfs_hotfile_deleted(__unused struct vnode *vp)
+{
+#if 1
+	return 0;
+#else	
+	//
+	// XXXdbg - this code, while it would work, would introduce a huge inefficiency
+	//          to deleting files as the way it's written would require us to open
+	//          the hotfile btree on every open, delete two records in it and then
+	//          close the hotfile btree (which involves more writes).
+	//
+	//          We actually can be lazy about deleting hotfile records for files
+	//          that get deleted.  When it's time to evict things, if we encounter
+	//          a record that references a dead file (i.e. a fileid which no
+	//          longer exists), the eviction code will remove the records.  Likewise
+	//          the code that scans the HotFile B-Tree at boot time to re-pin files
+	//          will remove dead records.
+	//
+
+	hotfile_data_t *hotdata;
+	hfsmount_t *hfsmp;
+	cnode_t *cp;
+	filefork_t *filefork;
+	u_int32_t temperature;
+	BTreeIterator * iterator = NULL;
+	FSBufferDescriptor record;
+	HotFileKey *key;
+	u_int32_t data;
+	int error=0;
+
+	cp = VTOC(vp);
+	if (cp == NULL || !(cp->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+		return 0;
+	}
+
+	hfsmp = VTOHFS(vp);
+	if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+		return 0;
+	}
+	
+	if (hfc_btree_open(hfsmp, &hfsmp->hfc_filevp) != 0 || hfsmp->hfc_filevp == NULL) {
+		// either there is no hotfile info or it's damaged
+		return EINVAL;
+	}
+	
+	filefork = VTOF(hfsmp->hfc_filevp);
+	if (filefork == NULL) {
+		return 0;
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	key = (HotFileKey*) &iterator->key;
+
+	record.bufferAddress = &data;
+	record.itemSize = sizeof(u_int32_t);
+	record.itemCount = 1;
+
+	key->keyLength = HFC_KEYLENGTH;
+	key->temperature = HFC_LOOKUPTAG;
+	key->fileID = cp->c_fileid;
+	key->forkType = 0;
+
+	lck_mtx_lock(&hfsmp->hfc_mutex);
+	(void) BTInvalidateHint(iterator);
+	if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) == 0) {
+		temperature = key->temperature;
+		hfc_btree_delete_record(hfsmp, iterator, key);
+	} else {
+		//printf("hfs: hotfile_deleted: did not find fileid %d\n", cp->c_fileid);
+		error = ENOENT;
+	}
+
+	if ((hotdata = hfsmp->hfc_recdata) != NULL) {
+		// just in case, also make sure it's removed from the in-memory list as well
+		++hotdata->refcount;
+		hf_delete(hotdata, cp->c_fileid, cp->c_fileid);
+		--hotdata->refcount;
+	}
+
+	lck_mtx_unlock(&hfsmp->hfc_mutex);
+	hfs_free(iterator, sizeof(*iterator));
+
+	hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+	
+	return error;
+#endif
+}
+
+int
+hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks)
+{
+	hfsmount_t *hfsmp;
+	
+	if (vp == NULL) {
+		return 0;
+	}
+
+	hfsmp = VTOHFS(vp);
+
+	if (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) || num_blocks == 0 || vp == NULL) {
+		return 0;
+	}
+
+	//
+	// if file is not HotFileCached or it has the CanNotHotFile cache
+	// bit set then there is nothing to do
+	//
+	if (!(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) || (VTOC(vp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) {
+		// it's not a hot file or can't be one so don't bother tracking
+		return 0;
+	}
+	
+	OSAddAtomic(num_blocks, &hfsmp->hfs_hotfile_blk_adjust);
+
+	return (0);
+}
+
+//
+// Assumes hfsmp->hfc_mutex is LOCKED
+//
+static int
+hfs_hotfile_cur_freeblks(hfsmount_t *hfsmp)
+{
+	if (hfsmp->hfc_stage < HFC_IDLE) {
+		return 0;
+	}
+	
+	int cur_blk_adjust = hfsmp->hfs_hotfile_blk_adjust;   // snap a copy of this value
+
+	if (cur_blk_adjust) {
+		OSAddAtomic(-cur_blk_adjust, &hfsmp->hfs_hotfile_blk_adjust);
+		hfsmp->hfs_hotfile_freeblks += cur_blk_adjust;
+	}
+
+	return hfsmp->hfs_hotfile_freeblks;
+}
+
+
+/*
+ *========================================================================
+ *                     HOT FILE MAINTENANCE ROUTINES
+ *========================================================================
+ */
+
+static int
+hotfiles_collect_callback(struct vnode *vp, __unused void *cargs)
+{
+        if ((vnode_isreg(vp)) && !vnode_issystem(vp))
+	        (void) hfs_addhotfile_internal(vp);
+
+	return (VNODE_RETURNED);
+}
+
+/*
+ * Add all active hot files to the recording list.
+ */
+static int
+hotfiles_collect(struct hfsmount *hfsmp)
+{
+	struct mount *mp = HFSTOVFS(hfsmp);
+
+	if (vfs_busy(mp, LK_NOWAIT))
+		return (0);
+
+	/*
+	 * hotfiles_collect_callback will be called for each vnode
+	 * hung off of this mount point
+	 * the vnode will be
+	 * properly referenced and unreferenced around the callback
+	 */
+	vnode_iterate(mp, 0, hotfiles_collect_callback, (void *)NULL);
+
+	vfs_unbusy(mp);
+
+	return (0);
+}
+
+
+/*
+ * Update the data of a btree record
+ * This is called from within BTUpdateRecord.
+ */
+static int
+update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state)
+{
+	if (key->temperature == HFC_LOOKUPTAG)
+		*data = *state;
+	return (0);
+}
+
+/*
+ * Identify files already in hot area.
+ */
+static int
+hotfiles_refine(struct hfsmount *hfsmp)
+{
+	BTreeIterator * iterator = NULL;
+	struct mount *mp;
+	filefork_t * filefork;
+	hotfilelist_t  *listp;
+	FSBufferDescriptor  record;
+	HotFileKey * key;
+	u_int32_t  data;
+	int  i;
+	int  error = 0;
+
+	if ((listp = hfsmp->hfc_filelist) == NULL)
+		return (0);	
+
+	if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+		// on ssd's we don't refine the temperature since the
+		// replacement algorithm is simply random
+		return 0;
+	}
+
+	mp = HFSTOVFS(hfsmp);
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	key = (HotFileKey*) &iterator->key;
+
+	record.bufferAddress = &data;
+	record.itemSize = sizeof(u_int32_t);
+	record.itemCount = 1;
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    error = EINVAL;
+	    goto out;
+	} 
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		error = EPERM;
+		goto out1;
+	}
+	filefork = VTOF(hfsmp->hfc_filevp);
+
+	for (i = 0; i < listp->hfl_count; ++i) {
+		/*
+		 * Check if entry (thread) is already in hot area.
+		 */
+		key->keyLength = HFC_KEYLENGTH;
+		key->temperature = HFC_LOOKUPTAG;
+		key->fileID = listp->hfl_hotfile[i].hf_fileid;
+		key->forkType = 0;
+		(void) BTInvalidateHint(iterator);
+		if (BTSearchRecord(filefork, iterator, &record, NULL, iterator) != 0) {
+			continue;  /* not in hot area, so skip */
+		}
+
+		/*
+		 * Update thread entry with latest temperature.
+		 */
+		error = BTUpdateRecord(filefork, iterator,
+				       (IterateCallBackProcPtr)update_callback,
+				      &listp->hfl_hotfile[i].hf_temperature);
+		if (error) {
+			printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID);
+			error = MacToVFSError(error);
+			//	break;
+		}
+		/*
+		 * Re-key entry with latest temperature.
+		 */
+		key->keyLength = HFC_KEYLENGTH;
+		key->temperature = data;
+		key->fileID = listp->hfl_hotfile[i].hf_fileid;
+		key->forkType = 0;
+		/* Pick up record data. */
+		(void) BTInvalidateHint(iterator);
+		(void) BTSearchRecord(filefork, iterator, &record, NULL, iterator);
+		error = BTDeleteRecord(filefork, iterator);
+		if (error) {
+			printf("hfs: hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID);
+			error = MacToVFSError(error);
+			break;
+		}
+		key->keyLength = HFC_KEYLENGTH;
+		key->temperature = listp->hfl_hotfile[i].hf_temperature;
+		key->fileID = listp->hfl_hotfile[i].hf_fileid;
+		key->forkType = 0;
+		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+		if (error) {
+			printf("hfs: hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID);
+			error = MacToVFSError(error);
+			break;
+		}
+		/*
+		 * Invalidate this entry in the list.
+		 */
+		listp->hfl_hotfile[i].hf_temperature = 0;
+		listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+		
+	} /* end for */
+
+	(void) BTFlushPath(filefork);
+	hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+out1:
+	hfs_end_transaction(hfsmp);
+out:
+	if (iterator)
+		hfs_free(iterator, sizeof(*iterator));	
+	return (error);
+}
+
+/*
+ * Move new hot files into hot area.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hotfiles_adopt(struct hfsmount *hfsmp)
+{
+	BTreeIterator * iterator = NULL;
+	struct vnode *vp;
+	filefork_t * filefork;
+	hotfilelist_t  *listp;
+	FSBufferDescriptor  record;
+	HotFileKey * key;
+	u_int32_t  data;
+	enum hfc_stage stage;
+	int  fileblocks;
+	int  blksmoved;
+	int  i;
+	int  last;
+	int  error = 0;
+	int  startedtrans = 0;
+	//
+	// all files in a given adoption phase have a temperature
+	// that starts at a random value and then increases linearly.
+	// the idea is that during eviction, files that were adopted
+	// together will be evicted together
+	//
+	long starting_temp = random() % HF_TEMP_RANGE;
+	long temp_adjust = 0;
+
+	if ((listp = hfsmp->hfc_filelist) == NULL)
+		return (0);	
+
+	if (hfsmp->hfc_stage != HFC_ADOPTION) {
+		return (EBUSY);
+	}
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		return (EPERM);
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+#if HFC_VERBOSE
+		printf("hfs:%s: hotfiles_adopt: (hfl_next: %d, hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+		       hfsmp->vcbVN,
+		       listp->hfl_next,
+		       hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+		       hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
+	stage = hfsmp->hfc_stage;
+	hfsmp->hfc_stage = HFC_BUSY;
+
+	blksmoved = 0;
+	last = listp->hfl_next + HFC_FILESPERSYNC;
+	if (last > listp->hfl_count)
+		last = listp->hfl_count;
+
+	key = (HotFileKey*) &iterator->key;
+	key->keyLength = HFC_KEYLENGTH;
+
+	record.bufferAddress = &data;
+	record.itemSize = sizeof(u_int32_t);
+	record.itemCount = 1;
+
+	filefork = VTOF(hfsmp->hfc_filevp);
+
+	for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) {
+		/*
+		 * Skip entries that aren't going to work.
+		 */
+		if (listp->hfl_hotfile[i].hf_temperature == 0) {
+			//printf("hfs: zero temp on file-id %d\n", listp->hfl_hotfile[i].hf_fileid);
+			listp->hfl_next++;
+			continue;
+		}
+		if (listp->hfl_hotfile[i].hf_fileid == VTOC(hfsmp->hfc_filevp)->c_fileid) {
+			//printf("hfs: cannot adopt the hotfile b-tree itself! (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+			listp->hfl_next++;
+			continue;
+		}
+		if (listp->hfl_hotfile[i].hf_fileid < kHFSFirstUserCatalogNodeID) {
+			//printf("hfs: cannot adopt system files (file-id %d)\n", listp->hfl_hotfile[i].hf_fileid);
+			listp->hfl_next++;
+			continue;
+		}
+
+		/*
+		 * Acquire a vnode for this file.
+		 */
+		error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
+		if (error) {
+			//printf("failed to get fileid %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
+			if (error == ENOENT) {
+				error = 0;
+				listp->hfl_next++;
+				continue;  /* stale entry, go to next */
+			}
+			break;
+		}
+
+		//printf("hfs: examining hotfile entry w/fileid %d, temp %d, blocks %d (HotFileCached: %s)\n",
+		//       listp->hfl_hotfile[i].hf_fileid, listp->hfl_hotfile[i].hf_temperature,
+		//       listp->hfl_hotfile[i].hf_blocks,
+		//       (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask) ? "YES" : "NO");
+
+		if (!vnode_isreg(vp)) {
+			/* Symlinks are ineligible for adoption into the hotfile zone.  */
+			//printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid);
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			listp->hfl_hotfile[i].hf_temperature = 0;
+			listp->hfl_next++;
+			continue;  /* stale entry, go to next */
+		}
+		if (   (VTOC(vp)->c_flag & (C_DELETED | C_NOEXISTS))
+		    || (!(hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && hotextents(hfsmp, &VTOF(vp)->ff_extents[0]))
+		    || (VTOC(vp)->c_attr.ca_recflags & (kHFSFastDevPinnedMask|kHFSDoNotFastDevPinMask))) {
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			listp->hfl_hotfile[i].hf_temperature = 0;
+			listp->hfl_next++;
+			listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+			continue;  /* stale entry, go to next */
+		}
+
+		fileblocks = VTOF(vp)->ff_blocks;
+
+		//
+		// for CF, if the file is empty (and not compressed) or it is too large,
+		// do not try to pin it.  (note: if fileblocks == 0 but the file is marked
+		// as compressed, we may still be able to cache it).
+		//
+		if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+		    ((fileblocks == 0 && !(VTOC(vp)->c_bsdflags & UF_COMPRESSED)) ||
+		     (unsigned int)fileblocks > (HFC_MAXIMUM_FILESIZE / (uint64_t)HFSTOVCB(hfsmp)->blockSize))) {
+			// don't try to cache something too large or that's zero-bytes
+
+			vnode_clearfastdevicecandidate(vp);    // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			listp->hfl_hotfile[i].hf_temperature = 0;
+			listp->hfl_next++;
+			listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+			continue;  /* entry is too big, just carry on with the next guy */
+		}
+
+		//
+		// If a file is not an autocandidate (i.e. it's a user-tagged file desirous of
+		// being hotfile cached) but it is already bigger than 4 megs, don't bother
+		// hotfile caching it.  Note that if a user tagged file starts small, gets
+		// adopted and then grows over time we will allow it to grow bigger than 4 megs
+		// which is intentional for things like the Mail or Photos database files which
+		// grow slowly over time and benefit from being on the FastDevice.
+		//
+		if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) &&
+		    !(VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) && 
+		    (VTOC(vp)->c_attr.ca_recflags & kHFSFastDevCandidateMask) && 
+		    (unsigned int)fileblocks > ((4*1024*1024) / (uint64_t)HFSTOVCB(hfsmp)->blockSize)) {
+
+			vnode_clearfastdevicecandidate(vp);    // turn off the fast-dev-candidate flag so we don't keep trying to cache it.
+
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			listp->hfl_hotfile[i].hf_temperature = 0;
+			listp->hfl_next++;
+			listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks;
+			continue;  /* entry is too big, just carry on with the next guy */
+		}
+
+		if (fileblocks > hfs_hotfile_cur_freeblks(hfsmp)) {
+			//
+			// No room for this file.  Although eviction should have made space
+			// it's best that we check here as well since writes to existing
+			// hotfiles may have eaten up space since we performed eviction
+			//
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			listp->hfl_next++;
+			listp->hfl_totalblocks -= fileblocks;
+			continue;  /* entry too big, go to next */
+		}
+		
+		if ((blksmoved > 0) &&
+		    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
+			//
+			// we've done enough work, let's be nice to the system and
+			// stop until the next iteration
+			//
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			break;  /* adopt this entry the next time around */
+		}
+
+		//
+		// The size of data for a hot file record is 4 bytes. The data
+		// stored in hot file record is not really meaningful. However
+		// to aid debugging, we store first four bytes of the file name
+		// or the ASCII text "????"
+		//
+		if (VTOC(vp)->c_desc.cd_nameptr && (VTOC(vp)->c_desc.cd_namelen > 0)) {
+			size_t max_len;
+
+			max_len = sizeof(u_int32_t);
+			if (max_len > (unsigned)VTOC(vp)->c_desc.cd_namelen)
+				max_len = VTOC(vp)->c_desc.cd_namelen;
+
+			memcpy(&data, VTOC(vp)->c_desc.cd_nameptr, max_len);
+		} else
+			data = 0x3f3f3f3f;
+
+
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			//
+			// For CF we pin the blocks belonging to the file
+			// to the "fast" (aka ssd) media
+			//
+			uint32_t pinned_blocks;
+
+			if (vnode_isautocandidate(vp)) {
+				VTOC(vp)->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+			}
+			if (VTOC(vp)->c_attr.ca_recflags & kHFSAutoCandidateMask) {
+				//
+				// this moves auto-cached files to the higher tier 
+				// of "temperatures" which means they are less likely
+				// to get evicted (user selected hotfiles will get
+				// evicted first in the theory that they change more
+				// frequently compared to system files)
+				//
+				temp_adjust = MAX_NORMAL_TEMP;
+			} else {
+				temp_adjust = 0;
+			}
+
+			hfs_unlock(VTOC(vp));  // don't need an exclusive lock for this
+			hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+			error = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, &pinned_blocks);
+
+			fileblocks = pinned_blocks;
+
+			// go back to an exclusive lock since we're going to modify the cnode again
+			hfs_unlock(VTOC(vp));
+			hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+		} else {
+			//
+			// Old style hotfiles moves the data to the center (aka "hot")
+			// region of the disk
+			//
+			error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc());
+		}
+
+		if (!error) {
+			VTOC(vp)->c_attr.ca_recflags |= kHFSFastDevPinnedMask;
+			VTOC(vp)->c_flag |= C_MODIFIED;
+		} else if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && error == EALREADY) {
+			//
+			// If hfs_pin_vnode() returned EALREADY then this file is not
+			// ever able to be hotfile cached the normal way.  This can
+			// happen with compressed files which have their data stored
+			// in an extended attribute.  We flag them so that we won't
+			// bother to try and hotfile cache them again the next time
+			// they're read.
+			//
+			VTOC(vp)->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+			VTOC(vp)->c_flag |= C_MODIFIED;
+		}
+
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+		if (error) {
+#if HFC_VERBOSE
+			if (error != EALREADY) {
+				printf("hfs: hotfiles_adopt: could not relocate file %d (err %d)\n", listp->hfl_hotfile[i].hf_fileid, error);
+			}
+#endif
+
+			if (last < listp->hfl_count) {
+				last++;
+			}
+			/* Move on to next item. */
+			listp->hfl_next++;
+			continue;
+		}
+		/* Keep hot file free space current. */
+		hfsmp->hfs_hotfile_freeblks -= fileblocks;
+		listp->hfl_totalblocks -= fileblocks;
+		
+		/* Insert hot file entry */
+		key->keyLength   = HFC_KEYLENGTH;
+
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			//
+			// The "temperature" for a CF hotfile is simply a random
+			// number that we sequentially increment for each file in
+			// the set of files we're currently adopting.  This has the
+			// nice property that all of the files we pin to the ssd
+			// in the current phase will sort together in the hotfile
+			// btree.  When eviction time comes we will evict them
+			// together as well.  This gives the eviction phase temporal
+			// locality - things written together get evicted together
+			// which is what ssd's like.
+			//
+			listp->hfl_hotfile[i].hf_temperature = (uint32_t)temp_adjust + starting_temp++;
+		}
+
+		key->temperature = listp->hfl_hotfile[i].hf_temperature;
+		key->fileID      = listp->hfl_hotfile[i].hf_fileid;
+		key->forkType    = 0;
+
+		/* Start a new transaction before calling BTree code. */
+		if (hfs_start_transaction(hfsmp) != 0) {
+		    error = EINVAL;
+		    break;
+		}
+		startedtrans = 1;
+
+		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+		if (error) {
+			int orig_error = error;
+			error = MacToVFSError(error);
+			printf("hfs: hotfiles_adopt:1: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
+			stage = HFC_IDLE;
+			break;
+		}
+
+		/* Insert thread record */
+		key->keyLength = HFC_KEYLENGTH;
+		key->temperature = HFC_LOOKUPTAG;
+		key->fileID = listp->hfl_hotfile[i].hf_fileid;
+		key->forkType = 0;
+		data = listp->hfl_hotfile[i].hf_temperature;
+		error = BTInsertRecord(filefork, iterator, &record, record.itemSize);
+		if (error) {
+			int orig_error = error;
+			error = MacToVFSError(error);
+			printf("hfs: hotfiles_adopt:2: BTInsertRecord failed %d/%d (fileid %d)\n", error, orig_error, key->fileID);
+			stage = HFC_IDLE;
+			break;
+		} else {
+			(void) BTFlushPath(filefork);
+			blksmoved += fileblocks;
+		}
+
+		listp->hfl_next++;
+		if (listp->hfl_next >= listp->hfl_count) {
+			break;
+		}
+
+		/* Transaction complete. */
+		if (startedtrans) {
+		    hfs_end_transaction(hfsmp);
+		    startedtrans = 0;
+		}
+
+		if (hfs_hotfile_cur_freeblks(hfsmp) <= 0) {
+#if HFC_VERBOSE
+			printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+			break;
+		}
+	} /* end for */
+
+#if HFC_VERBOSE
+	printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d files left)\n", listp->hfl_next, blksmoved, listp->hfl_count - i);
+#endif
+	if (!startedtrans) {
+		// start a txn so we'll save the btree summary info
+		if (hfs_start_transaction(hfsmp) == 0) {
+			startedtrans = 1;
+		}
+	}		
+
+	/* Finish any outstanding transactions. */
+	if (startedtrans) {
+		save_btree_user_info(hfsmp);
+
+		(void) BTFlushPath(filefork);
+		hfs_end_transaction(hfsmp);
+		startedtrans = 0;
+	}
+	hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+	if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) {
+#if HFC_VERBOSE
+		printf("hfs: hotfiles_adopt: all done relocating %d files\n", listp->hfl_count);
+		printf("hfs: hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+		stage = HFC_IDLE;
+	}
+	hfs_free(iterator, sizeof(*iterator));
+
+	if (stage != HFC_ADOPTION && hfsmp->hfc_filevp) {
+		(void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp);
+		hfsmp->hfc_filevp = NULL;
+	}
+	hfsmp->hfc_stage = stage;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+	return (error);
+}
+
+/*
+ * Reclaim space by evicting the coldest files.
+ *
+ * Requires that the hfc_mutex be held.
+ */
+static int
+hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
+{
+	BTreeIterator * iterator = NULL;
+	struct vnode *vp;
+	HotFileKey * key;
+	filefork_t * filefork;
+	hotfilelist_t  *listp;
+	enum hfc_stage stage;
+	u_int32_t savedtemp;
+	int  blksmoved;
+	int  filesmoved;
+	int  fileblocks;
+	int  error = 0;
+	int  startedtrans = 0;
+	int  bt_op;
+
+	if (hfsmp->hfc_stage != HFC_EVICTION) {
+		return (EBUSY);
+	}
+
+	if ((listp = hfsmp->hfc_filelist) == NULL)
+		return (0);	
+
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		return (EPERM);
+	}
+
+#if HFC_VERBOSE
+		printf("hfs:%s: hotfiles_evict (hotfile start/end block: %d - %d; max/free: %d/%d; maxfiles: %d)\n",
+		       hfsmp->vcbVN,
+		       hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end,
+		       hfsmp->hfs_hotfile_maxblks, hfsmp->hfs_hotfile_freeblks, hfsmp->hfc_maxfiles);
+#endif
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	stage = hfsmp->hfc_stage;
+	hfsmp->hfc_stage = HFC_BUSY;
+
+	filesmoved = blksmoved = 0;
+	bt_op = kBTreeFirstRecord;
+
+	key = (HotFileKey*) &iterator->key;
+
+	filefork = VTOF(hfsmp->hfc_filevp);
+
+#if HFC_VERBOSE
+	printf("hfs: hotfiles_evict: reclaim blks %d\n", listp->hfl_reclaimblks);
+#endif
+	
+	while (listp->hfl_reclaimblks > 0 &&
+	       blksmoved < HFC_BLKSPERSYNC &&
+	       filesmoved < HFC_FILESPERSYNC) {
+
+		/*
+		 * Obtain the first record (ie the coldest one).
+		 */
+		if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) {
+#if HFC_VERBOSE
+			printf("hfs: hotfiles_evict: no more records\n");
+#endif
+			error = 0;
+			stage = HFC_ADOPTION;
+			break;
+		}
+		if (key->keyLength != HFC_KEYLENGTH) {
+			printf("hfs: hotfiles_evict: invalid key length %d\n", key->keyLength);
+			error = EFTYPE;
+			break;
+		}		
+		if (key->temperature == HFC_LOOKUPTAG) {
+#if HFC_VERBOSE
+			printf("hfs: hotfiles_evict: ran into thread records\n");
+#endif
+			error = 0;
+			stage = HFC_ADOPTION;
+			break;
+		}
+
+		// Jump straight to delete for some files...
+		if (key->fileID == VTOC(hfsmp->hfc_filevp)->c_fileid
+			|| key->fileID == hfsmp->hfs_jnlfileid
+			|| key->fileID == hfsmp->hfs_jnlinfoblkid
+			|| key->fileID < kHFSFirstUserCatalogNodeID) {
+			goto delete;
+		}
+
+		/*
+		 * Aquire the vnode for this file.
+		 */
+		error = hfs_vget(hfsmp, key->fileID, &vp, 0, 0);
+		if (error) {
+			if (error == ENOENT) {
+				goto delete;  /* stale entry, go to next */
+			} else {
+				printf("hfs: hotfiles_evict: err %d getting file %d\n",
+				       error, key->fileID);
+			}
+			break;
+		}
+
+		/* 
+		 * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck 
+		 * here.  We do not want to move them. 
+		 */
+		if (!vnode_isreg(vp)) {
+			//printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID);
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			goto delete;  /* invalid entry, go to next */
+		}
+
+		fileblocks = VTOF(vp)->ff_blocks;
+		if ((blksmoved > 0) &&
+		    (blksmoved + fileblocks) > HFC_BLKSPERSYNC) {
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			break;
+		}
+		/*
+		 * Make sure file is in the hot area.
+		 */
+		if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0]) && !(VTOC(vp)->c_attr.ca_recflags & kHFSFastDevPinnedMask)) {
+#if HFC_VERBOSE
+			printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID);
+#endif
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			goto delete;  /* stale entry, go to next */
+		}
+		
+		/*
+		 * Relocate file out of hot area.  On cooperative fusion (CF) that just 
+		 * means un-pinning the data from the ssd.  For traditional hotfiles that means moving
+		 * the file data out of the hot region of the disk.
+		 */
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			uint32_t pinned_blocks;
+			
+			hfs_unlock(VTOC(vp));  // don't need an exclusive lock for this
+			hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+			error = hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &pinned_blocks);
+			fileblocks = pinned_blocks;
+
+			if (!error) {
+				// go back to an exclusive lock since we're going to modify the cnode again
+				hfs_unlock(VTOC(vp));
+				hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+			}
+		} else {
+			error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx));
+		}
+		if (error) {
+#if HFC_VERBOSE
+			printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID);
+#endif
+			hfs_unlock(VTOC(vp));
+			vnode_put(vp);
+			bt_op = kBTreeNextRecord;
+			goto next;  /* go to next */
+		} else {
+			VTOC(vp)->c_attr.ca_recflags &= ~kHFSFastDevPinnedMask;
+			VTOC(vp)->c_flag |= C_MODIFIED;
+		}
+
+		//
+		// We do not believe that this call to hfs_fsync() is
+		// necessary and it causes a journal transaction
+		// deadlock so we are removing it.
+		//
+		// (void) hfs_fsync(vp, MNT_WAIT, 0, p);
+
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+
+		hfsmp->hfs_hotfile_freeblks += fileblocks;
+		listp->hfl_reclaimblks -= fileblocks;
+		if (listp->hfl_reclaimblks < 0)
+			listp->hfl_reclaimblks = 0;
+		blksmoved += fileblocks;
+		filesmoved++;
+delete:
+		/* Start a new transaction before calling BTree code. */
+		if (hfs_start_transaction(hfsmp) != 0) {
+		    error = EINVAL;
+		    break;
+		}
+		startedtrans = 1;
+
+		error = BTDeleteRecord(filefork, iterator);
+		if (error) {
+			error = MacToVFSError(error);
+			break;
+		}
+		savedtemp = key->temperature;
+		key->temperature = HFC_LOOKUPTAG;
+		error = BTDeleteRecord(filefork, iterator);
+		if (error) {
+			error = MacToVFSError(error);
+			break;
+		}
+		key->temperature = savedtemp;
+next:
+		(void) BTFlushPath(filefork);
+
+		/* Transaction complete. */
+		if (startedtrans) {
+			hfs_end_transaction(hfsmp);
+			startedtrans = 0;
+		}
+
+	} /* end while */
+
+#if HFC_VERBOSE
+	printf("hfs: hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks);
+#endif
+	/* Finish any outstanding transactions. */
+	if (startedtrans) {
+		save_btree_user_info(hfsmp);
+
+		(void) BTFlushPath(filefork);
+		hfs_end_transaction(hfsmp);
+		startedtrans = 0;
+	}
+	hfs_unlock(VTOC(hfsmp->hfc_filevp));
+
+	/*
+	 * Move to next stage when finished.
+	 */
+	if (listp->hfl_reclaimblks <= 0) {
+		stage = HFC_ADOPTION;
+#if HFC_VERBOSE
+		printf("hfs: hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks);
+#endif
+	}
+	hfs_free(iterator, sizeof(*iterator));	
+	hfsmp->hfc_stage = stage;
+	wakeup((caddr_t)&hfsmp->hfc_stage);
+	return (error);
+}
+
+/*
+ * Age the existing records in the hot files b-tree.
+ */
+static int
+hotfiles_age(struct hfsmount *hfsmp)
+{
+	BTreeInfoRec  btinfo;
+	BTreeIterator * iterator = NULL;
+	BTreeIterator * prev_iterator;
+	FSBufferDescriptor  record;
+	FSBufferDescriptor  prev_record;
+	HotFileKey * key;
+	HotFileKey * prev_key;
+	filefork_t * filefork;
+	u_int32_t  data;
+	u_int32_t  prev_data;
+	u_int32_t  newtemp;
+	int  error;
+	int  i;
+	int  numrecs;
+	int  aged = 0;
+	u_int16_t  reclen;
+
+
+	if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+		//
+		// hotfiles don't age on CF
+		//
+		return 0;
+	}
+
+	iterator = hfs_mallocz(2 * sizeof(*iterator));
+
+	key = (HotFileKey*) &iterator->key;
+
+	prev_iterator = &iterator[1];
+	prev_key = (HotFileKey*) &prev_iterator->key;
+
+	record.bufferAddress = &data;
+	record.itemSize = sizeof(data);
+	record.itemCount = 1;
+	prev_record.bufferAddress = &prev_data;
+	prev_record.itemSize = sizeof(prev_data);
+	prev_record.itemCount = 1;
+
+	/*
+	 * Capture b-tree changes inside a transaction
+	 */
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    error = EINVAL;
+	    goto out2;
+	} 
+	if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		error = EPERM;
+		goto out1;
+	}
+	filefork = VTOF(hfsmp->hfc_filevp);
+
+	error = BTGetInformation(filefork, 0, &btinfo);
+	if (error) {
+		error = MacToVFSError(error);
+		goto out;
+	}
+	if (btinfo.numRecords < 2) {
+		error = 0;
+		goto out;
+	}
+	
+	/* Only want 1st half of leaf records */
+	numrecs = (btinfo.numRecords /= 2) - 1;
+
+	error = BTIterateRecord(filefork, kBTreeFirstRecord, iterator, &record, &reclen);
+	if (error) {
+		printf("hfs_agehotfiles: BTIterateRecord: %d\n", error);
+		error = MacToVFSError(error);
+		goto out;
+	}
+	bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
+	prev_data = data;
+
+	for (i = 0; i < numrecs; ++i) {
+		error = BTIterateRecord(filefork, kBTreeNextRecord, iterator, &record, &reclen);
+		if (error == 0) {
+			if (key->temperature < prev_key->temperature) {
+				printf("hfs_agehotfiles: out of order keys!\n");
+				error = EFTYPE;
+				break;
+			}
+			if (reclen != sizeof(data)) {
+				printf("hfs_agehotfiles: invalid record length %d\n", reclen);
+				error = EFTYPE;
+				break;
+			}
+			if (key->keyLength != HFC_KEYLENGTH) {
+				printf("hfs_agehotfiles: invalid key length %d\n", key->keyLength);
+				error = EFTYPE;
+				break;
+			}
+		} else if ((error == fsBTEndOfIterationErr || error == fsBTRecordNotFoundErr) &&
+		    (i == (numrecs - 1))) {
+			error = 0;
+		} else if (error) {
+			printf("hfs_agehotfiles: %d of %d BTIterateRecord: %d\n", i, numrecs, error);
+			error = MacToVFSError(error);
+			break;
+		}
+		if (prev_key->temperature == HFC_LOOKUPTAG) {
+#if HFC_VERBOSE	
+			printf("hfs_agehotfiles: ran into thread record\n");
+#endif
+			error = 0;
+			break;
+		}
+		error = BTDeleteRecord(filefork, prev_iterator);
+		if (error) {
+			printf("hfs_agehotfiles: BTDeleteRecord failed %d (file %d)\n", error, prev_key->fileID);
+			error = MacToVFSError(error);
+			break;
+		}
+		
+		/* Age by halving the temperature (floor = 4) */
+		newtemp = MAX(prev_key->temperature >> 1, 4);
+		prev_key->temperature = newtemp;
+	
+		error = BTInsertRecord(filefork, prev_iterator, &prev_record, prev_record.itemSize);
+		if (error) {
+			printf("hfs_agehotfiles: BTInsertRecord failed %d (file %d)\n", error, prev_key->fileID);
+			error = MacToVFSError(error);
+			break;
+		}
+		++aged;
+		/*
+		 * Update thread entry with latest temperature.
+		 */
+		prev_key->temperature = HFC_LOOKUPTAG;
+		error = BTUpdateRecord(filefork, prev_iterator,
+				(IterateCallBackProcPtr)update_callback,
+				&newtemp);
+		if (error) {
+			printf("hfs_agehotfiles: %d of %d BTUpdateRecord failed %d (file %d, %d)\n",
+				i, numrecs, error, prev_key->fileID, newtemp);
+			error = MacToVFSError(error);
+		//	break;
+		}
+
+		bcopy(iterator, prev_iterator, sizeof(BTreeIterator));
+		prev_data = data;
+
+	} /* end for */
+
+#if HFC_VERBOSE	
+	if (error == 0)
+		printf("hfs_agehotfiles: aged %d records out of %d\n", aged, btinfo.numRecords);
+#endif
+	(void) BTFlushPath(filefork);
+out:
+	hfs_unlock(VTOC(hfsmp->hfc_filevp));
+out1:
+	hfs_end_transaction(hfsmp);
+out2:
+	if (iterator)
+		hfs_free(iterator, 2 * sizeof(*iterator));
+	return (error);
+}
+
+/*
+ * Return true if any blocks (or all blocks if all is true)
+ * are contained in the hot file region.
+ */
+static int
+hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents)
+{
+	u_int32_t  b1, b2;
+	int  i;
+	int  inside = 0;
+
+	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+		b1 = extents[i].startBlock;
+		if (b1 == 0)
+			break;
+		b2 = b1 + extents[i].blockCount - 1;
+		if ((b1 >= hfsmp->hfs_hotfile_start &&
+		     b2 <= hfsmp->hfs_hotfile_end) ||
+		    (b1 < hfsmp->hfs_hotfile_end && 
+		     b2 > hfsmp->hfs_hotfile_end)) {
+			inside = 1;
+			break;
+		}
+	}
+	return (inside);
+}
+
+
+/*
+ *========================================================================
+ *                       HOT FILE B-TREE ROUTINES
+ *========================================================================
+ */
+
+/*
+ * Open the hot files b-tree for writing.
+ *
+ * On successful exit the vnode has a reference but not an iocount.
+ */
+static int
+hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
+{
+	return hfc_btree_open_ext(hfsmp, vpp, 0);
+}
+
+static int
+hfc_btree_open_ext(struct hfsmount *hfsmp, struct vnode **vpp, int ignore_btree_errs)
+{
+	proc_t p;
+	struct vnode *vp;
+	struct cat_desc  cdesc;
+	struct cat_attr  cattr;
+	struct cat_fork  cfork;
+	static char filename[] = HFC_FILENAME;
+	int  error;
+	int  retry = 0;
+	int lockflags;
+	int newvnode_flags = 0;
+
+	*vpp = NULL;
+	p = current_proc();
+
+	bzero(&cdesc, sizeof(cdesc));
+	cdesc.cd_parentcnid = kRootDirID;
+	cdesc.cd_nameptr = (const u_int8_t *)filename;
+	cdesc.cd_namelen = strlen(filename);
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	error = cat_lookup(hfsmp, &cdesc, 0, 0, &cdesc, &cattr, &cfork, NULL);
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (error) {
+		printf("hfs: hfc_btree_open: cat_lookup error %d\n", error);
+		return (error);
+	}
+again:
+	cdesc.cd_flags |= CD_ISMETA;
+	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, 
+							&cfork, &vp, &newvnode_flags);
+	if (error) {
+		printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error);
+		cat_releasedesc(&cdesc);
+		return (error);
+	}
+	if (!vnode_issystem(vp)) {
+#if HFC_VERBOSE
+		printf("hfs: hfc_btree_open: file has UBC, try again\n");
+#endif
+		hfs_unlock(VTOC(vp));
+		vnode_recycle(vp);
+		vnode_put(vp);
+		if (retry++ == 0)
+			goto again;
+		else
+			return (EBUSY);
+	}
+
+	/* Open the B-tree file for writing... */
+	error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys);	
+	if (error) {
+		if (!ignore_btree_errs) {
+			printf("hfs: hfc_btree_open: BTOpenPath error %d; filesize %lld\n", error, VTOF(vp)->ff_size);
+			error = MacToVFSError(error);
+		} else {
+			error = 0;
+		}
+	}
+
+	hfs_unlock(VTOC(vp));
+	if (error == 0) {
+		*vpp = vp;
+		vnode_ref(vp);  /* keep a reference while its open */
+	}
+	vnode_put(vp);
+
+	if (!vnode_issystem(vp))
+		panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp);
+
+	HotFilesInfo hotfileinfo;
+
+	if (error == 0 && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+		if ((BTGetUserData(VTOF(vp), &hotfileinfo, sizeof(hotfileinfo)) == 0) && (SWAP_BE32 (hotfileinfo.magic) == HFC_MAGIC)) {
+			if (hfsmp->hfs_hotfile_freeblks == 0) {
+				hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks - SWAP_BE32 (hotfileinfo.usedblocks);
+			}
+
+			hfs_hotfile_cur_freeblks(hfsmp);        // factors in any adjustments that happened at run-time
+		}
+	}
+	
+	return (error);
+}
+
+/*
+ * Close the hot files b-tree.
+ *
+ * On entry the vnode has a reference.
+ */
+static int
+hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
+{
+	proc_t p = current_proc();
+	int  error = 0;
+
+
+	if (hfsmp->jnl) {
+	    hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
+	}
+
+	if (vnode_get(vp) == 0) {
+		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			(void) hfs_fsync(vp, MNT_WAIT, 0, p);
+			error = BTClosePath(VTOF(vp));
+			hfs_unlock(VTOC(vp));
+		}
+		vnode_rele(vp);
+		vnode_recycle(vp);
+		vnode_put(vp);
+	}
+	
+	return (error);
+}
+
+//
+// Assumes that hfsmp->hfc_filevp points to the hotfile btree vnode
+// (i.e. you called hfc_btree_open() ahead of time)
+//
+static int
+hfc_btree_delete_record(struct hfsmount *hfsmp, BTreeIterator *iterator, HotFileKey *key)
+{
+	int error;
+	filefork_t *filefork=VTOF(hfsmp->hfc_filevp);
+
+	/* Start a new transaction before calling BTree code. */
+	if (hfs_start_transaction(hfsmp) != 0) {
+		return EINVAL;
+	}
+
+	error = BTDeleteRecord(filefork, iterator);
+	if (error) {
+		error = MacToVFSError(error);
+		printf("hfs: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+		goto out;
+	}
+
+	int savedtemp;
+	savedtemp = key->temperature;
+	key->temperature = HFC_LOOKUPTAG;
+	error = BTDeleteRecord(filefork, iterator);
+	if (error) {
+		error = MacToVFSError(error);
+		printf("hfs:2: failed to delete record for file-id %d : err %d\n", key->fileID, error);
+	}
+	key->temperature = savedtemp;
+
+	(void) BTFlushPath(filefork);
+
+out:
+	/* Transaction complete. */
+	hfs_end_transaction(hfsmp);
+
+	return error;
+}
+
+//
+// You have to have already opened the hotfile btree so
+// that hfsmp->hfc_filevp is filled in.
+//
+static int
+hfc_btree_delete(struct hfsmount *hfsmp)
+{
+	struct vnode *dvp = NULL;
+	vfs_context_t ctx = vfs_context_current();
+	struct vnode_attr va;
+	static char filename[] = HFC_FILENAME;
+	int  error;
+
+	error = hfs_vfs_root(HFSTOVFS(hfsmp), &dvp, ctx);
+	if (error) {
+		return (error);
+	}
+
+	struct componentname cname = {
+		.cn_nameiop = DELETE,
+		.cn_flags = ISLASTCN,
+		.cn_pnbuf = filename,
+		.cn_pnlen = sizeof(filename),
+		.cn_nameptr = filename,
+		.cn_namelen = strlen(filename),
+	};
+
+	VATTR_INIT(&va);
+	VATTR_SET(&va, va_type, VREG);
+	VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
+	VATTR_SET(&va, va_uid, 0);
+	VATTR_SET(&va, va_gid, 0);
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    error = EINVAL;
+	    goto out;
+	} 
+
+    struct vnop_remove_args ap = {
+        .a_dvp = dvp,
+        .a_vp  = hfsmp->hfc_filevp,
+        .a_cnp = &cname,
+    };
+
+    error = hfs_vnop_remove(&ap);
+	if (error) {
+		printf("hfs: error %d removing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+	}
+
+	hfs_end_transaction(hfsmp);
+
+out:
+	if (dvp) {
+		vnode_put(dvp);
+		dvp = NULL;
+	}
+
+	return 0;
+}
+
+
+
+
+/*
+ *  Create a hot files btree file.
+ *
+ */
+static int
+hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int entries)
+{
+	struct vnode *dvp = NULL;
+	struct vnode *vp = NULL;
+	struct cnode *cp = NULL;
+	vfs_context_t ctx = vfs_context_current();
+	struct vnode_attr va;
+	static char filename[] = HFC_FILENAME;
+	int  error;
+
+	if (hfsmp->hfc_filevp)
+		panic("hfs: hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp);
+
+	error = hfs_vfs_root(HFSTOVFS(hfsmp), &dvp, ctx);
+	if (error) {
+		return (error);
+	}
+
+	struct componentname cname = {
+		.cn_nameiop = CREATE,
+		.cn_flags = ISLASTCN,
+		.cn_pnbuf = filename,
+		.cn_pnlen = sizeof(filename),
+		.cn_nameptr = filename,
+		.cn_namelen = strlen(filename)
+	};
+
+	VATTR_INIT(&va);
+	VATTR_SET(&va, va_type, VREG);
+	VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR);
+	VATTR_SET(&va, va_uid, 0);
+	VATTR_SET(&va, va_gid, 0);
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+		vnode_put(dvp);
+		return EINVAL;
+	}
+
+	/* call ourselves directly, ignore the higher-level VFS file creation code */
+
+    struct vnop_create_args ap = {
+        .a_dvp = dvp,
+        .a_vpp = &vp,
+        .a_cnp = &cname,
+        .a_vap = &va
+    };
+
+    error = hfs_vnop_create(&ap);
+	if (error) {
+		printf("hfs: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+		goto out;
+	}
+	if (dvp) {
+		vnode_put(dvp);
+		dvp = NULL;
+	}
+	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		goto out;
+	}
+	cp = VTOC(vp);
+
+	/* Don't use non-regular files or files with links. */
+	if (!vnode_isreg(vp) || cp->c_linkcount != 1) {
+		error = EFTYPE;
+		goto out;
+	}
+
+	printf("hfs: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN);
+
+	if (VTOF(vp)->ff_size < nodesize) {
+		caddr_t  buffer;
+		u_int16_t *index;
+		u_int16_t  offset;
+		BTNodeDescriptor  *ndp;
+		BTHeaderRec  *bthp;
+		HotFilesInfo *hotfileinfo;
+		int  nodecnt;
+		int  filesize;
+		int  entirespernode;
+
+		/*
+		 * Mark it invisible (truncate will pull these changes).
+		 */
+		((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |=
+			SWAP_BE16 (kIsInvisible + kNameLocked);
+
+		buffer = hfs_mallocz(nodesize);
+		index = (u_int16_t *)buffer;
+	
+		entirespernode = (nodesize - sizeof(BTNodeDescriptor) - 2) /
+				 (sizeof(HotFileKey) + 6);
+		nodecnt = 2 + howmany(entries * 2, entirespernode);
+		nodecnt = roundup(nodecnt, 8);
+		filesize = nodecnt * nodesize;
+	
+		/* FILL IN THE NODE DESCRIPTOR:  */
+		ndp = (BTNodeDescriptor *)buffer;
+		ndp->kind = kBTHeaderNode;
+		ndp->numRecords = SWAP_BE16 (3);
+		offset = sizeof(BTNodeDescriptor);
+		index[(nodesize / 2) - 1] = SWAP_BE16 (offset);
+	
+		/* FILL IN THE HEADER RECORD:  */
+		bthp = (BTHeaderRec *)((u_int8_t *)buffer + offset);
+		bthp->nodeSize     = SWAP_BE16 (nodesize);
+		bthp->totalNodes   = SWAP_BE32 (filesize / nodesize);
+		bthp->freeNodes    = SWAP_BE32 (nodecnt - 1);
+		bthp->clumpSize    = SWAP_BE32 (filesize);
+		bthp->btreeType    = kUserBTreeType; /* non-metadata */
+		bthp->attributes  |= SWAP_BE32 (kBTBigKeysMask);
+		bthp->maxKeyLength = SWAP_BE16 (HFC_KEYLENGTH);
+		offset += sizeof(BTHeaderRec);
+		index[(nodesize / 2) - 2] = SWAP_BE16 (offset);
+	
+		/* FILL IN THE USER RECORD:  */
+		hotfileinfo = (HotFilesInfo *)((u_int8_t *)buffer + offset);
+		hotfileinfo->magic       = SWAP_BE32 (HFC_MAGIC);
+		hotfileinfo->version     = SWAP_BE32 (HFC_VERSION);
+		hotfileinfo->duration    = SWAP_BE32 (HFC_DEFAULT_DURATION);
+		hotfileinfo->timebase    = 0;
+		hotfileinfo->timeleft    = 0;
+		hotfileinfo->threshold   = SWAP_BE32 (HFC_MINIMUM_TEMPERATURE);
+		hotfileinfo->maxfileblks = SWAP_BE32 (HFC_MAXIMUM_FILESIZE / HFSTOVCB(hfsmp)->blockSize);
+		if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+			if (hfsmp->hfs_hotfile_freeblks == 0) {
+				hfsmp->hfs_hotfile_freeblks = hfsmp->hfs_hotfile_maxblks;
+			}
+			hotfileinfo->usedblocks = SWAP_BE32 (hfsmp->hfs_hotfile_maxblks - hfsmp->hfs_hotfile_freeblks);
+		} else {
+			hotfileinfo->maxfilecnt  = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT);
+		}
+		strlcpy((char *)hotfileinfo->tag, hfc_tag,
+			sizeof hotfileinfo->tag);
+		offset += kBTreeHeaderUserBytes;
+		index[(nodesize / 2) - 3] = SWAP_BE16 (offset);
+	
+		/* FILL IN THE MAP RECORD (only one node in use). */
+		*((u_int8_t *)buffer + offset) = 0x80;
+		offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec)
+				   - kBTreeHeaderUserBytes - (4 * sizeof(int16_t));
+		index[(nodesize / 2) - 4] = SWAP_BE16 (offset);
+
+		vnode_setnoflush(vp);
+		error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, ctx);
+		if (error) {
+			printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+			goto out;
+		}
+		cp->c_flag |= C_ZFWANTSYNC;
+		cp->c_zftimeout = 1;
+		
+		if (error == 0) {
+			struct vnop_write_args args;
+			uio_t auio;
+
+			auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE);
+			uio_addiov(auio, (uintptr_t)buffer, nodesize);
+
+			args.a_desc = &vnop_write_desc;
+			args.a_vp = vp;
+			args.a_uio = auio;
+			args.a_ioflag = 0;
+			args.a_context = ctx;
+
+			hfs_unlock(cp);
+			cp = NULL;
+
+			error = hfs_vnop_write(&args);
+			if (error)
+				printf("hfs: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN);
+
+			uio_free(auio);
+		}
+		hfs_free(buffer, nodesize);
+	}
+out:
+	hfs_end_transaction(hfsmp);
+	if (dvp) {
+		vnode_put(dvp);
+	}
+	if (vp) {
+		if (cp)
+			hfs_unlock(cp);
+		vnode_recycle(vp);
+		vnode_put(vp);
+	}
+	return (error);
+}
+
+/*
+ * Compare two hot file b-tree keys.
+ *
+ * Result:   +n  search key > trial key
+ *            0  search key = trial key
+ *           -n  search key < trial key
+ */
+static int
+hfc_comparekeys(HotFileKey *searchKey, HotFileKey *trialKey)
+{
+	/*
+	 * Compared temperatures first.
+	 */
+	if (searchKey->temperature == trialKey->temperature) {
+		/*
+		 * Temperatures are equal so compare file ids.
+		 */
+		if (searchKey->fileID == trialKey->fileID) {
+			/*
+			 * File ids are equal so compare fork types.
+			 */
+			if (searchKey->forkType == trialKey->forkType) {
+				return (0);
+			} else if (searchKey->forkType > trialKey->forkType) {
+				return (1);
+			}
+		} else if (searchKey->fileID > trialKey->fileID) {
+			return (1);
+		}
+	} else if (searchKey->temperature > trialKey->temperature) {
+		return (1);
+	}
+	
+	return (-1);
+}
+
+
+/*
+ *========================================================================
+ *               HOT FILE DATA COLLECTING ROUTINES
+ *========================================================================
+ */
+
+/*
+ * Lookup a hot file entry in the tree.
+ */
+#if HFC_DEBUG
+static hotfile_entry_t *
+hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
+{
+	hotfile_entry_t *entry = hotdata->rootentry;
+
+	while (entry &&
+	       entry->temperature != temperature &&
+	       entry->fileid != fileid) {
+
+		if (temperature > entry->temperature)
+			entry = entry->right;
+		else if (temperature < entry->temperature)
+			entry = entry->left;
+		else if (fileid > entry->fileid)
+			entry = entry->right;
+		else
+			entry = entry->left;
+	}
+	return (entry);
+}
+#endif
+
+/*
+ * Insert a hot file entry into the tree.
+ */
+static int
+hf_insert(hotfile_data_t *hotdata, hotfile_entry_t *newentry) 
+{
+	hotfile_entry_t *entry = hotdata->rootentry;
+	u_int32_t fileid = newentry->fileid;
+	u_int32_t temperature = newentry->temperature;
+
+	if (entry == NULL) {
+		hotdata->rootentry = newentry;
+		hotdata->coldest = newentry;
+		hotdata->activefiles++;
+		return 0;
+	}
+
+	while (entry) {
+		if (temperature > entry->temperature) {
+			if (entry->right) {
+				entry = entry->right;
+			} else {
+				entry->right = newentry;
+				break;
+			}
+		} else if (temperature < entry->temperature) {
+			if (entry->left) {
+				entry = entry->left;
+			} else {
+			    	entry->left = newentry;
+				break;
+			}
+		} else if (fileid > entry->fileid) { 
+			if (entry->right) {
+				entry = entry->right;
+			} else {
+	       			if (entry->fileid != fileid)
+					entry->right = newentry;
+				break;
+			}
+		} else { 
+			if (entry->left) {
+				entry = entry->left;
+			} else {
+	       			if (entry->fileid != fileid) {
+			    		entry->left = newentry;
+				} else {
+					return EEXIST;
+				}
+				break;
+			}
+		}
+	}
+
+	hotdata->activefiles++;
+	return 0;
+}
+
+/*
+ * Find the coldest entry in the tree.
+ */
+static hotfile_entry_t *
+hf_coldest(hotfile_data_t *hotdata)
+{
+	hotfile_entry_t *entry = hotdata->rootentry;
+
+	if (entry) {
+		while (entry->left)
+			entry = entry->left;
+	}
+	return (entry);
+}
+
+/*
+ * Find the hottest entry in the tree.
+ */
+static hotfile_entry_t *
+hf_hottest(hotfile_data_t *hotdata)
+{
+	hotfile_entry_t *entry = hotdata->rootentry;
+
+	if (entry) {
+		while (entry->right)
+			entry = entry->right;
+	}
+	return (entry);
+}
+
+/*
+ * Delete a hot file entry from the tree.
+ */
+static void
+hf_delete(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature)
+{
+	hotfile_entry_t *entry, *parent, *next;
+
+	parent = NULL;
+	entry = hotdata->rootentry;
+
+	while (entry &&
+	       entry->temperature != temperature &&
+	       entry->fileid != fileid) {
+
+		parent = entry;
+		if (temperature > entry->temperature)
+			entry = entry->right;
+		else if (temperature < entry->temperature)
+			entry = entry->left;
+		else if (fileid > entry->fileid)
+			entry = entry->right;
+		else
+			entry = entry->left;
+	}
+
+	if (entry) {
+		/*
+		 * Reorganize the sub-trees spanning from our entry.
+		 */
+		if ((next = entry->right)) {
+			hotfile_entry_t *pnextl, *psub;
+			/*
+			 * Tree pruning: take the left branch of the
+			 * current entry and place it at the lowest
+			 * left branch of the current right branch 
+			 */
+			psub = next;
+			
+			/* Walk the Right/Left sub tree from current entry */
+			while ((pnextl = psub->left))
+				psub = pnextl;	
+			
+			/* Plug the old left tree to the new ->Right leftmost entry */	
+			psub->left = entry->left;
+	
+		} else /* only left sub-tree, simple case */ {  
+			next = entry->left;
+		}
+		/* 
+		 * Now, plug the current entry sub tree to
+		 * the good pointer of our parent entry.
+		 */
+		if (parent == NULL)
+			hotdata->rootentry = next;
+		else if (parent->left == entry)
+			parent->left = next;
+		else
+			parent->right = next;	
+		
+		/* Place entry back on the free-list */
+		entry->left = 0;
+		entry->fileid = 0;
+		entry->temperature = 0;
+
+		entry->right = hotdata->freelist; 
+		hotdata->freelist = entry; 		
+		hotdata->activefiles--;
+		
+		if (hotdata->coldest == entry || hotdata->coldest == NULL) {
+			hotdata->coldest = hf_coldest(hotdata);
+		}
+
+	}
+}
+
+/*
+ * Get a free hot file entry.
+ */
+static hotfile_entry_t *
+hf_getnewentry(hotfile_data_t *hotdata)
+{
+	hotfile_entry_t * entry;
+	
+	/*
+	 * When the free list is empty then steal the coldest one
+	 */
+	if (hotdata->freelist == NULL) {
+		entry = hf_coldest(hotdata);
+		hf_delete(hotdata, entry->fileid, entry->temperature);
+	}
+	entry = hotdata->freelist;
+	hotdata->freelist = entry->right;
+	entry->right = 0;
+	
+	return (entry);
+}
+
+
+/*
+ * Generate a sorted list of hot files (hottest to coldest).
+ *
+ * As a side effect, every node in the hot file tree will be
+ * deleted (moved to the free list).
+ */
+static void
+hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist)
+{
+	int i = 0;
+	hotfile_entry_t *entry;
+	
+	while ((entry = hf_hottest(hotdata)) != NULL) {
+		sortedlist->hfl_hotfile[i].hf_fileid = entry->fileid;
+		sortedlist->hfl_hotfile[i].hf_temperature = entry->temperature;
+		sortedlist->hfl_hotfile[i].hf_blocks = entry->blocks;
+		sortedlist->hfl_totalblocks += entry->blocks;
+		++i;
+
+		hf_delete(hotdata, entry->fileid, entry->temperature);
+	}
+	
+	sortedlist->hfl_count = i;
+	
+#if HFC_VERBOSE
+	printf("hfs: hf_getsortedlist returning %d entries w/%d total blocks\n", i, sortedlist->hfl_totalblocks);
+#endif
+}
+
+
+#if HFC_DEBUG
+static void
+hf_maxdepth(hotfile_entry_t * root, int depth, int *maxdepth)
+{
+	if (root) {
+		depth++;
+		if (depth > *maxdepth)
+			*maxdepth = depth;
+		hf_maxdepth(root->left, depth, maxdepth);
+		hf_maxdepth(root->right, depth, maxdepth);
+	}
+}
+
+static void
+hf_printtree(hotfile_entry_t * root)
+{
+	if (root) {
+		hf_printtree(root->left);
+		printf("hfs: temperature: % 8d, fileid %d\n", root->temperature, root->fileid);
+		hf_printtree(root->right);
+	}
+}
+#endif
diff --git a/core/hfs_hotfiles.h b/core/hfs_hotfiles.h
new file mode 100644
index 0000000..0ce32da
--- /dev/null
+++ b/core/hfs_hotfiles.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2003-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef __HFS_HOTFILES__
+#define __HFS_HOTFILES__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+
+#define HFC_FILENAME	".hotfiles.btree"
+
+
+/*
+ * Temperature measurement constraints.
+ */
+#define HFC_DEFAULT_FILE_COUNT	 hfc_default_file_count
+#define HFC_DEFAULT_DURATION     hfc_default_duration
+#define HFC_CUMULATIVE_CYCLES	 3
+#define HFC_MAXIMUM_FILE_COUNT	 hfc_max_file_count
+#define HFC_MAXIMUM_FILESIZE	 hfc_max_file_size 
+#define HFC_MINIMUM_TEMPERATURE  24
+
+
+/*
+ * Sync constraints.
+ */
+#define HFC_BLKSPERSYNC    300
+#define HFC_FILESPERSYNC   50
+
+
+/*
+ * Hot file clustering stages.
+ */
+enum hfc_stage {
+	HFC_DISABLED,
+	HFC_IDLE,
+	HFC_BUSY,
+	HFC_RECORDING,
+	HFC_EVALUATION,
+	HFC_EVICTION,
+	HFC_ADOPTION,
+};
+
+
+/* 
+ * B-tree file key format (on-disk).
+ */
+struct HotFileKey {
+	u_int16_t 	keyLength;	/* length of key, excluding this field */
+	u_int8_t 	forkType;	/* 0 = data fork, FF = resource fork */
+	u_int8_t 	pad;		/* make the other fields align on 32-bit boundary */
+	u_int32_t 	temperature;	/* temperature recorded */
+	u_int32_t 	fileID;		/* file ID */
+};
+typedef struct HotFileKey HotFileKey;
+
+#define HFC_LOOKUPTAG   0xFFFFFFFF
+#define HFC_KEYLENGTH	(sizeof(HotFileKey) - sizeof(u_int16_t))
+
+/* 
+ * B-tree header node user info (on-disk).
+ */
+struct HotFilesInfo {
+	u_int32_t	magic;
+	u_int32_t	version;
+	u_int32_t	duration;    /* duration of sample period (secs) */
+	u_int32_t	timebase;    /* start of recording period (GMT time in secs) */
+	u_int32_t	timeleft;    /* time remaining in recording period (secs) */
+	u_int32_t	threshold;
+	u_int32_t	maxfileblks;
+	union {
+		u_int32_t	_maxfilecnt;   // on hdd's we track the max # of files
+		u_int32_t	_usedblocks;   // on ssd's we track how many blocks are used
+	} _u;
+	u_int8_t	tag[32];
+};
+
+#define usedblocks _u._usedblocks
+#define maxfilecnt _u._maxfilecnt
+
+typedef struct HotFilesInfo HotFilesInfo;
+
+#define HFC_MAGIC	0xFF28FF26
+#define HFC_VERSION	1
+
+
+struct hfsmount;
+struct proc;
+struct vnode;
+
+/*
+ * Hot File interface functions.
+ */
+int  hfs_hotfilesync (struct hfsmount *, vfs_context_t ctx);
+
+int  hfs_recording_init(struct hfsmount *);
+int  hfs_recording_suspend (struct hfsmount *);
+
+int  hfs_addhotfile (struct vnode *);
+int  hfs_removehotfile (struct vnode *);
+int  hfs_hotfile_deleted(struct vnode *vp);   // called when a file is deleted
+void hfs_repin_hotfiles(struct hfsmount *);
+
+// call this to adjust the number of used hotfile blocks either up/down
+int  hfs_hotfile_adjust_blocks(struct vnode *vp, int64_t num_blocks);
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* __HFS_HOTFILES__ */
diff --git a/core/hfs_iokit.cpp b/core/hfs_iokit.cpp
new file mode 100644
index 0000000..5908364
--- /dev/null
+++ b/core/hfs_iokit.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <sys/mount.h>
+#include <libkern/libkern.h>
+#include <IOKit/IOService.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/IOKitKeys.h>
+#include <IOKit/IOPlatformExpert.h>
+
+#include "hfs_iokit.h"
+#include "hfs.h"
+#include "hfs_dbg.h"
+
+#ifndef panic_on_assert
+bool panic_on_assert;
+#endif
+
+#if DEBUG
+bool hfs_corruption_panics = true;
+#endif
+
+class com_apple_filesystems_hfs : public IOService {
+    OSDeclareDefaultStructors(com_apple_filesystems_hfs)
+
+public:
+
+	bool start(IOService *provider) override;
+	void stop(IOService *provider) override;
+
+protected:
+	vfstable_t vfs_handle;
+};
+
+#define super IOService
+OSDefineMetaClassAndStructors(com_apple_filesystems_hfs, IOService)
+
+extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
+#if CONFIG_HFS_STD
+extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
+#endif
+extern struct vnodeopv_desc hfs_specop_opv_desc;
+extern struct vnodeopv_desc hfs_fifoop_opv_desc;
+extern struct vfsops hfs_vfsops;
+
+bool com_apple_filesystems_hfs::start(IOService *provider)
+{
+	if (!super::start(provider))
+		return false;
+
+#ifndef panic_on_assert
+	panic_on_assert = PE_i_can_has_kernel_configuration() & kPEICanHasAssertions;
+#endif
+
+#if DEBUG
+	PE_parse_boot_argn("hfs_corruption_panics", &hfs_corruption_panics, sizeof(hfs_corruption_panics));
+#endif
+
+	struct vnodeopv_desc *op_descs[] = {
+		&hfs_vnodeop_opv_desc,
+#if CONFIG_HFS_STD
+		&hfs_std_vnodeop_opv_desc,
+#endif
+		&hfs_specop_opv_desc,
+#if FIFO
+		&hfs_fifoop_opv_desc,
+#endif
+	};
+
+#define lengthof(x) (sizeof(x)/sizeof(*x))
+
+#ifndef VFS_TBLVNOP_SECLUDE_RENAME
+#define VFS_TBLVNOP_SECLUDE_RENAME 0
+#endif
+
+	struct vfs_fsentry vfe = {
+		.vfe_vfsops = &hfs_vfsops,
+		.vfe_vopcnt = lengthof(op_descs),
+		.vfe_opvdescs = op_descs,
+		.vfe_fsname = "hfs",
+		.vfe_flags = (VFS_TBLNOTYPENUM | VFS_TBLLOCALVOL | VFS_TBLREADDIR_EXTENDED
+					  | VFS_TBL64BITREADY | VFS_TBLVNOP_PAGEOUTV2 | VFS_TBLVNOP_PAGEINV2
+					  | VFS_TBLTHREADSAFE | VFS_TBLCANMOUNTROOT | VFS_TBLVNOP_SECLUDE_RENAME
+					  | VFS_TBLNATIVEXATTR)
+	};
+
+	int ret = vfs_fsadd(&vfe, &vfs_handle);
+
+	if (ret) {
+		printf("hfs: vfs_fsadd failed: %d!\n", ret);
+		vfs_handle = NULL;
+		return false;
+	}
+
+	hfs_init_zones();
+	
+	hfs_sysctl_register();
+
+	return true;
+}
+
+void com_apple_filesystems_hfs::stop(IOService *provider)
+{
+	if (vfs_handle) {
+		vfs_fsremove(vfs_handle);
+		hfs_sysctl_unregister();
+		vfs_handle = NULL;
+	}
+
+	super::stop(provider);
+}
+
+int hfs_is_ejectable(const char *cdev_name)
+{
+	int ret = 0;
+	OSDictionary *dictionary;
+	OSString *dev_name;
+
+	if (strncmp(cdev_name, "/dev/", 5) == 0) {
+		cdev_name += 5;
+	}
+
+	dictionary = IOService::serviceMatching("IOMedia");
+	if( dictionary ) {
+		dev_name = OSString::withCString( cdev_name );
+		if( dev_name ) {
+			IOService *service;
+			mach_timespec_t tv = { 5, 0 };    // wait up to "timeout" seconds for the device
+
+			dictionary->setObject(kIOBSDNameKey, dev_name);
+			dictionary->retain();
+			service = IOService::waitForService(dictionary, &tv);
+			if( service ) {
+				OSBoolean *ejectable = (OSBoolean *)service->getProperty("Ejectable");
+
+				if( ejectable ) {
+					ret = (int)ejectable->getValue();
+				}
+
+			}
+			dev_name->release();
+		}
+		dictionary->release();
+	}
+
+	return ret;
+}
+
+void hfs_iterate_media_with_content(const char *content_uuid_cstring,
+									int (*func)(const char *device,
+												const char *uuid_str,
+												void *arg),
+									void *arg)
+{
+	OSDictionary *dictionary;
+	OSString *content_uuid_string;
+
+	dictionary = IOService::serviceMatching("IOMedia");
+	if (dictionary) {
+		content_uuid_string = OSString::withCString(content_uuid_cstring);
+		if (content_uuid_string) {
+			IOService *service;
+			OSIterator *iter;
+
+			dictionary->setObject("Content", content_uuid_string);
+			dictionary->retain();
+
+			iter = IOService::getMatchingServices(dictionary);
+			while (iter && (service = (IOService *)iter->getNextObject())) {
+				if (service) {
+					OSString *iostr = (OSString *) service->getProperty(kIOBSDNameKey);
+					OSString *uuidstr = (OSString *)service->getProperty("UUID");
+					const char *uuid;
+
+					if (iostr) {
+						if (uuidstr) {
+							uuid = uuidstr->getCStringNoCopy();
+						} else {
+							uuid = "00000000-0000-0000-0000-000000000000";
+						}
+
+						if (!func(iostr->getCStringNoCopy(), uuid, arg))
+							break;
+					}
+				}
+			}
+			if (iter)
+				iter->release();
+
+			content_uuid_string->release();
+		}
+		dictionary->release();
+	}
+}
+
+kern_return_t hfs_get_platform_serial_number(char *serial_number_str,
+											 uint32_t len)
+{
+	OSDictionary * platform_dict;
+	IOService *platform;
+	OSString *  string;
+
+	if (len < 1) {
+		return 0;
+	}
+	serial_number_str[0] = '\0';
+
+	platform_dict = IOService::serviceMatching( "IOPlatformExpertDevice" );
+	if (platform_dict == NULL) {
+		return KERN_NOT_SUPPORTED;
+	}
+
+	platform = IOService::waitForService( platform_dict );
+	if (platform) {
+		string = (OSString *)platform->getProperty(kIOPlatformSerialNumberKey);
+		if (string == 0) {
+			return KERN_NOT_SUPPORTED;
+		} else {
+			strlcpy( serial_number_str, string->getCStringNoCopy(), len);
+		}
+	}
+
+	return KERN_SUCCESS;
+}
+
+// Interface with AKS
+
+static aks_file_system_key_services_t *
+key_services(void)
+{
+	static aks_file_system_key_services_t *g_key_services;
+
+	if (!g_key_services) {
+		IOService *platform = IOService::getPlatform();
+		if (platform) {
+			IOReturn ret = platform->callPlatformFunction
+				(kAKSFileSystemKeyServices, true, &g_key_services, NULL, NULL, NULL);
+			if (ret)
+				printf("hfs: unable to get " kAKSFileSystemKeyServices " (0x%x)\n", ret);
+		}
+	}
+
+	return g_key_services;
+}
+
+int hfs_unwrap_key(aks_cred_t access, const aks_wrapped_key_t wrapped_key_in,
+				   aks_raw_key_t key_out)
+{
+	aks_file_system_key_services_t *ks = key_services();
+	if (!ks || !ks->unwrap_key)
+		return ENXIO;
+	return ks->unwrap_key(access, wrapped_key_in, key_out);
+}
+
+int hfs_rewrap_key(aks_cred_t access, cp_key_class_t dp_class,
+				   const aks_wrapped_key_t wrapped_key_in,
+				   aks_wrapped_key_t wrapped_key_out)
+{
+	aks_file_system_key_services_t *ks = key_services();
+	if (!ks || !ks->rewrap_key)
+		return ENXIO;
+	return ks->rewrap_key(access, dp_class, wrapped_key_in, wrapped_key_out);
+}
+
+int hfs_new_key(aks_cred_t access, cp_key_class_t dp_class,
+				aks_raw_key_t key_out, aks_wrapped_key_t wrapped_key_out)
+{
+	aks_file_system_key_services_t *ks = key_services();
+	if (!ks || !ks->new_key)
+		return ENXIO;
+	return ks->new_key(access, dp_class, key_out, wrapped_key_out);
+}
+
+int hfs_backup_key(aks_cred_t access, const aks_wrapped_key_t wrapped_key_in,
+				   aks_wrapped_key_t wrapped_key_out)
+{
+	aks_file_system_key_services_t *ks = key_services();
+	if (!ks || !ks->backup_key)
+		return ENXIO;
+	return ks->backup_key(access, wrapped_key_in, wrapped_key_out);
+}
diff --git a/core/hfs_iokit.h b/core/hfs_iokit.h
new file mode 100644
index 0000000..d31a062
--- /dev/null
+++ b/core/hfs_iokit.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef hfs_iokit_h
+#define hfs_iokit_h
+
+#include <sys/cdefs.h>
+#include <AppleKeyStore/AppleKeyStoreFSServices.h>
+
+__BEGIN_DECLS
+
+int hfs_is_ejectable(const char *cdev_name);
+void hfs_iterate_media_with_content(const char *content_uuid_cstring,
+									int (*func)(const char *bsd_name,
+												const char *uuid_str,
+												void *arg),
+									void *arg);
+kern_return_t hfs_get_platform_serial_number(char *serial_number_str,
+											 uint32_t len);
+int hfs_unwrap_key(aks_cred_t access, const aks_wrapped_key_t wrapped_key_in,
+				   aks_raw_key_t key_out);
+int hfs_rewrap_key(aks_cred_t access, cp_key_class_t dp_class,
+				   const aks_wrapped_key_t wrapped_key_in,
+				   aks_wrapped_key_t wrapped_key_out);
+int hfs_new_key(aks_cred_t access, cp_key_class_t dp_class,
+				aks_raw_key_t key_out, aks_wrapped_key_t wrapped_key_out);
+int hfs_backup_key(aks_cred_t access, const aks_wrapped_key_t wrapped_key_in,
+				   aks_wrapped_key_t wrapped_key_out);
+
+__END_DECLS
+
+#endif /* hfs_iokit_h */
diff --git a/core/hfs_journal.c b/core/hfs_journal.c
new file mode 100644
index 0000000..f1e6ee6
--- /dev/null
+++ b/core/hfs_journal.c
@@ -0,0 +1,4892 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+//
+// This file implements a simple write-ahead journaling layer.  
+// In theory any file system can make use of it by calling these 
+// functions when the fs wants to modify meta-data blocks.  See
+// hfs_journal.h for a more detailed description of the api and
+// data structures.
+//
+// Dominic Giampaolo (dbg@apple.com)
+//
+
+#ifdef KERNEL
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ioctl.h>
+#include <sys/ubc.h>
+#include <sys/malloc.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <sys/disk.h>
+#include <sys/kdebug.h>
+#include <sys/kpi_private.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <miscfs/specfs/specdev.h>
+#include <libkern/OSAtomic.h>	/* OSAddAtomic */
+
+#include "hfs.h"
+
+kern_return_t	thread_terminate(thread_t);
+
+/*
+ * Set sysctl vfs.generic.jnl.kdebug.trim=1 to enable KERNEL_DEBUG_CONSTANT
+ * logging of trim-related calls within the journal.  (They're
+ * disabled by default because there can be a lot of these events,
+ * and we don't want to overwhelm the kernel debug buffer.  If you
+ * want to watch these events in particular, just set the sysctl.)
+ */
+static int jnl_kdebug = 0;
+
+HFS_SYSCTL(NODE, _vfs_generic_hfs, OID_AUTO, jnl, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Journal")
+HFS_SYSCTL(NODE, _vfs_generic_hfs_jnl, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Journal kdebug")
+HFS_SYSCTL(INT, _vfs_generic_hfs_jnl_kdebug, OID_AUTO, trim, CTLFLAG_RW|CTLFLAG_LOCKED, &jnl_kdebug, 0, "Enable kdebug logging for journal TRIM")
+
+#define DBG_JOURNAL_FLUSH			FSDBG_CODE(DBG_JOURNAL, 1)
+#define DBG_JOURNAL_TRIM_ADD		FSDBG_CODE(DBG_JOURNAL, 2)
+#define DBG_JOURNAL_TRIM_REMOVE		FSDBG_CODE(DBG_JOURNAL, 3)
+#define DBG_JOURNAL_TRIM_REMOVE_PENDING	FSDBG_CODE(DBG_JOURNAL, 4)
+#define DBG_JOURNAL_TRIM_REALLOC	FSDBG_CODE(DBG_JOURNAL, 5)
+#define DBG_JOURNAL_TRIM_FLUSH		FSDBG_CODE(DBG_JOURNAL, 6)
+#define DBG_JOURNAL_TRIM_UNMAP		FSDBG_CODE(DBG_JOURNAL, 7)
+
+/* 
+ * Cap the journal max size to 2GB.  On HFS, it will attempt to occupy
+ * a full allocation block if the current size is smaller than the allocation
+ * block on which it resides.  Once we hit the exabyte filesystem range, then
+ * it will use 2GB allocation blocks.  As a result, make the cap 2GB.
+ */
+#define MAX_JOURNAL_SIZE 0x80000000U
+
+#include <mach/machine/sdt.h>
+#else
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include "compat.h"
+
+#endif   /* KERNEL */
+
+#include "hfs_journal.h"
+
+#include <sys/kdebug.h>
+
+//
+// By default, we grow the list of extents to trim by 4K at a time.
+// We'll opt to flush a transaction if it contains at least
+// JOURNAL_FLUSH_TRIM_EXTENTS extents to be trimmed (even if the number
+// of modified blocks is small).
+//
+enum {
+    JOURNAL_DEFAULT_TRIM_BYTES = 4096,
+    JOURNAL_DEFAULT_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_BYTES / sizeof(dk_extent_t),
+    JOURNAL_FLUSH_TRIM_EXTENTS = JOURNAL_DEFAULT_TRIM_EXTENTS * 15 / 16
+};
+
+unsigned int jnl_trim_flush_limit = JOURNAL_FLUSH_TRIM_EXTENTS;
+
+HFS_SYSCTL(UINT, _vfs_generic_hfs_jnl, OID_AUTO, trim_flush, CTLFLAG_RW, &jnl_trim_flush_limit, 0, "number of trimmed extents to cause a journal flush")
+
+// number of bytes to checksum in a block_list_header
+// NOTE: this should be enough to clear out the header
+//       fields as well as the first entry of binfo[]
+#define BLHDR_CHECKSUM_SIZE 32
+
+static void lock_condition(journal *jnl, boolean_t *condition, const char *condition_name);
+static void wait_condition(journal *jnl, boolean_t *condition, const char *condition_name);
+static void unlock_condition(journal *jnl, boolean_t *condition);
+static void finish_end_thread(transaction *tr);
+static void write_header_thread(journal *jnl);
+static int finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callback_arg);
+static int end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void *callback_arg, boolean_t drop_lock, boolean_t must_wait);
+static void abort_transaction(journal *jnl, transaction *tr);
+static void dump_journal(journal *jnl);
+
+static __inline__ void  lock_oldstart(journal *jnl);
+static __inline__ void  unlock_oldstart(journal *jnl);
+static __inline__ void  lock_flush(journal *jnl);
+static __inline__ void  unlock_flush(journal *jnl);
+
+
+//
+// 3105942 - Coalesce writes to the same block on journal replay
+//
+
+typedef struct bucket {
+	off_t     block_num;
+	uint32_t  jnl_offset;
+	uint32_t  block_size;
+	int32_t   cksum;
+} bucket;
+
+#define STARTING_BUCKETS 256
+
+static int add_block(journal *jnl, struct bucket **buf_ptr, off_t block_num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr);
+static int grow_table(struct bucket **buf_ptr, int num_buckets, int new_size);
+static int lookup_bucket(struct bucket **buf_ptr, off_t block_num, int num_full);
+static int do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr);
+static int insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr, int overwriting);
+
+#define CHECK_JOURNAL(jnl) \
+	do {		   \
+	if (jnl == NULL) {					\
+		panic("%s:%d: null journal ptr?\n", __FILE__, __LINE__); \
+	}								\
+	if (jnl->jdev == NULL) {				\
+		panic("%s:%d: jdev is null!\n", __FILE__, __LINE__); \
+	}							\
+	if (jnl->fsdev == NULL) {				\
+		panic("%s:%d: fsdev is null!\n", __FILE__, __LINE__);	\
+	}								\
+	if (jnl->jhdr->magic != JOURNAL_HEADER_MAGIC) {			\
+		panic("%s:%d: jhdr magic corrupted (0x%x != 0x%x)\n",	\
+		      __FILE__, __LINE__, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC); \
+	}								\
+	if (   jnl->jhdr->start <= 0					\
+	       || jnl->jhdr->start > jnl->jhdr->size) {			\
+		panic("%s:%d: jhdr start looks bad (0x%llx max size 0x%llx)\n", \
+		      __FILE__, __LINE__, jnl->jhdr->start, jnl->jhdr->size); \
+	}								\
+	if (   jnl->jhdr->end <= 0					\
+	       || jnl->jhdr->end > jnl->jhdr->size) {			\
+		panic("%s:%d: jhdr end looks bad (0x%llx max size 0x%llx)\n", \
+		      __FILE__, __LINE__, jnl->jhdr->end, jnl->jhdr->size); \
+	}								\
+	} while(0)
+
+#define CHECK_TRANSACTION(tr) \
+	do {		      \
+	if (tr == NULL) {					\
+		panic("%s:%d: null transaction ptr?\n", __FILE__, __LINE__); \
+	}								\
+	if (tr->jnl == NULL) {						\
+		panic("%s:%d: null tr->jnl ptr?\n", __FILE__, __LINE__); \
+	}								\
+	if (tr->blhdr != (block_list_header *)tr->tbuffer) {		\
+		panic("%s:%d: blhdr (%p) != tbuffer (%p)\n", __FILE__, __LINE__, tr->blhdr, tr->tbuffer); \
+	}								\
+	if (tr->total_bytes < 0) {					\
+		panic("%s:%d: tr total_bytes looks bad: %d\n", __FILE__, __LINE__, tr->total_bytes); \
+	}								\
+	if (tr->journal_start < 0) {					\
+		panic("%s:%d: tr journal start looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_start); \
+	}								\
+	if (tr->journal_end < 0) {					\
+		panic("%s:%d: tr journal end looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_end); \
+	}								\
+	if (tr->blhdr && (tr->blhdr->max_blocks <= 0 || tr->blhdr->max_blocks > (tr->jnl->jhdr->size/tr->jnl->jhdr->jhdr_size))) { \
+		panic("%s:%d: tr blhdr max_blocks looks bad: %d\n", __FILE__, __LINE__, tr->blhdr->max_blocks);	\
+	}								\
+	} while(0)
+
+
+
+//
+// this isn't a great checksum routine but it will do for now.
+// we use it to checksum the journal header and the block list
+// headers that are at the start of each transaction.
+//
+static unsigned int
+calc_checksum(const char *ptr, int len)
+{
+	int i;
+	unsigned int cksum=0;
+
+	// this is a lame checksum but for now it'll do
+	for(i = 0; i < len; i++, ptr++) {
+		cksum = (cksum << 8) ^ (cksum + *(unsigned char *)ptr);
+	}
+
+	return (~cksum);
+}
+
+//
+// Journal Locking
+//
+lck_grp_attr_t *  jnl_group_attr;
+lck_attr_t *      jnl_lock_attr;
+lck_grp_t *       jnl_mutex_group;
+
+void
+journal_init(void)
+{
+	jnl_lock_attr    = lck_attr_alloc_init();
+	jnl_group_attr   = lck_grp_attr_alloc_init();
+	jnl_mutex_group  = lck_grp_alloc_init("jnl-mutex", jnl_group_attr);
+}
+
+__inline__ void
+journal_lock(journal *jnl)
+{
+	lck_mtx_lock(&jnl->jlock);
+	if (jnl->owner) {
+		panic ("jnl: owner is %p, expected NULL\n", jnl->owner);
+	}
+	jnl->owner = current_thread();
+}
+
+__inline__ void
+journal_unlock(journal *jnl)
+{
+	jnl->owner = NULL;
+	lck_mtx_unlock(&jnl->jlock);
+}
+
+static __inline__ void
+lock_flush(journal *jnl)
+{
+	lck_mtx_lock(&jnl->flock);
+}
+
+static __inline__ void
+unlock_flush(journal *jnl)
+{
+	lck_mtx_unlock(&jnl->flock);
+}
+
+static __inline__ void
+lock_oldstart(journal *jnl)
+{
+	lck_mtx_lock(&jnl->old_start_lock);
+}
+
+static __inline__ void
+unlock_oldstart(journal *jnl)
+{
+	lck_mtx_unlock(&jnl->old_start_lock);
+}
+
+
+
+#define JNL_WRITE    0x0001
+#define JNL_READ     0x0002
+#define JNL_HEADER   0x8000
+
+//
+// This function sets up a fake buf and passes it directly to the
+// journal device strategy routine (so that it won't get cached in
+// the block cache.
+//
+// It also handles range checking the i/o so that we don't write
+// outside the journal boundaries and it will wrap the i/o back
+// to the beginning if necessary (skipping over the journal header)
+// 
+static size_t
+do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction)
+{
+	int	err;
+	off_t	curlen = len;
+	size_t	io_sz = 0;
+	buf_t	bp;
+	off_t 	max_iosize;
+	bufattr_t bap;
+	boolean_t was_vm_privileged = FALSE;
+	boolean_t need_vm_privilege = FALSE;
+
+	if (vfs_isswapmount(jnl->fsmount))
+		need_vm_privilege = TRUE;
+
+	if (*offset < 0 || *offset > jnl->jhdr->size) {
+		panic("jnl: do_jnl_io: bad offset 0x%llx (max 0x%llx)\n", *offset, jnl->jhdr->size);
+	}
+	
+	if (direction & JNL_WRITE)
+		max_iosize = jnl->max_write_size;
+	else if (direction & JNL_READ)
+		max_iosize = jnl->max_read_size;
+	else
+		max_iosize = 128 * 1024;
+
+again:
+	bp = buf_alloc(jnl->jdev);
+
+	if (*offset + curlen > jnl->jhdr->size && *offset != 0 && jnl->jhdr->size != 0) {
+		if (*offset == jnl->jhdr->size) {
+			*offset = jnl->jhdr->jhdr_size;
+		} else {
+			curlen = jnl->jhdr->size - *offset;
+		}
+	}
+
+	if (curlen > max_iosize) {
+		curlen = max_iosize;
+	}
+
+	if (curlen <= 0) {
+		panic("jnl: do_jnl_io: curlen == %lld, offset 0x%llx len %zd\n", curlen, *offset, len);
+	}
+
+	if (*offset == 0 && (direction & JNL_HEADER) == 0) {
+		panic("jnl: request for i/o to jnl-header without JNL_HEADER flag set! (len %lld, data %p)\n", curlen, data);
+	}
+
+	/*
+	 * As alluded to in the block comment at the top of the function, we use a "fake" iobuf
+	 * here and issue directly to the disk device that the journal protects since we don't
+	 * want this to enter the block cache.  As a result, we lose the ability to mark it
+	 * as a metadata buf_t for the layers below us that may care. If we were to
+	 * simply attach the B_META flag into the b_flags this may confuse things further
+	 * since this is an iobuf, not a metadata buffer. 
+	 *
+	 * To address this, we use the extended bufattr struct embedded in the bp. 
+	 * Explicitly mark the buf here as a metadata buffer in its bufattr flags.
+	 */
+	bap = buf_attr(bp);
+	bufattr_markmeta(bap);
+	
+	if (direction & JNL_READ)
+		buf_setflags(bp, B_READ);
+	else {
+		/*
+		 * don't have to set any flags
+		 */
+		vnode_startwrite(jnl->jdev);
+	}
+	buf_setsize(bp, curlen);
+	buf_setcount(bp, curlen);
+	buf_setdataptr(bp, (uintptr_t)data);
+	buf_setblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size));
+	buf_setlblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size));
+
+	if ((direction & JNL_WRITE) && (jnl->flags & JOURNAL_DO_FUA_WRITES)) {
+		buf_markfua(bp);
+	}
+
+	if (need_vm_privilege == TRUE) {
+		/*
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+	DTRACE_IO1(journal__start, buf_t, bp);
+	err = VNOP_STRATEGY(bp);
+	if (!err) {
+		err = (int)buf_biowait(bp);
+	}
+	DTRACE_IO1(journal__done, buf_t, bp);
+
+	if (need_vm_privilege == TRUE && was_vm_privileged == FALSE)
+		set_vm_privilege(FALSE);
+
+	buf_free(bp);
+
+	if (err) {
+		printf("jnl: %s: do_jnl_io: strategy err 0x%x\n", jnl->jdev_name, err);
+		return 0;
+	}
+
+	*offset += curlen;
+	io_sz   += curlen;
+
+	if (io_sz != len) {
+		// handle wrap-around
+		data    = (char *)data + curlen;
+		curlen  = len - io_sz;
+		if (*offset >= jnl->jhdr->size) {
+			*offset = jnl->jhdr->jhdr_size;
+		}
+		goto again;
+	}
+
+	return io_sz;
+}
+
+static size_t
+read_journal_data(journal *jnl, off_t *offset, void *data, size_t len)
+{
+	return do_journal_io(jnl, offset, data, len, JNL_READ);
+}
+
+static size_t
+write_journal_data(journal *jnl, off_t *offset, void *data, size_t len)
+{
+	return do_journal_io(jnl, offset, data, len, JNL_WRITE);
+}
+
+
+static size_t
+read_journal_header(journal *jnl, void *data, size_t len)
+{
+	off_t hdr_offset = 0;
+	
+	return do_journal_io(jnl, &hdr_offset, data, len, JNL_READ|JNL_HEADER);
+}
+
+static int
+write_journal_header(journal *jnl, int updating_start, uint32_t sequence_num)
+{
+	static int num_err_prints = 0;
+	int ret=0;
+	off_t jhdr_offset = 0;
+	//
+	// Flush the track cache if we're not doing force-unit-access
+	// writes.
+	//
+	if (!updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
+
+		dk_synchronize_t sync_request = {
+			.options			= DK_SYNCHRONIZE_OPTION_BARRIER,
+		};
+
+		/*
+		 * If device doesn't support barrier-only flush, or
+		 * the journal is on a different device, use full flush.
+		 */
+		if (!(jnl->flags & JOURNAL_FEATURE_BARRIER) || (jnl->jdev != jnl->fsdev)) {
+			sync_request.options = 0;
+			jnl->flush_counter++;
+		}
+
+		ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel());
+	}
+	if (ret != 0) {
+		//
+		// Only print this error if it's a different error than the
+		// previous one, or if it's the first time for this device
+		// or if the total number of printfs is less than 25.  We
+		// allow for up to 25 printfs to insure that some make it
+		// into the on-disk syslog.  Otherwise if we only printed
+		// one, it's possible it would never make it to the syslog
+		// for the root volume and that makes debugging hard.
+		//
+		if (   ret != jnl->last_flush_err
+		       || (jnl->flags & JOURNAL_FLUSHCACHE_ERR) == 0
+		       || num_err_prints++ < 25) {
+	    
+			printf("jnl: %s: flushing fs disk buffer returned 0x%x\n", jnl->jdev_name, ret);
+	    
+			jnl->flags |= JOURNAL_FLUSHCACHE_ERR;
+			jnl->last_flush_err = ret;
+		}
+	}
+
+	jnl->jhdr->sequence_num = sequence_num;
+	jnl->jhdr->checksum = 0;
+	jnl->jhdr->checksum = calc_checksum((char *)jnl->jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+
+	if (do_journal_io(jnl, &jhdr_offset, jnl->header_buf, jnl->jhdr->jhdr_size, JNL_WRITE|JNL_HEADER) != (size_t)jnl->jhdr->jhdr_size) {
+		printf("jnl: %s: write_journal_header: error writing the journal header!\n", jnl->jdev_name);
+		jnl->flags |= JOURNAL_INVALID;
+		return -1;
+	}	
+
+	// If we're not doing force-unit-access writes, then we
+	// have to flush after writing the journal header so that
+	// a future transaction doesn't sneak out to disk before
+	// the header does and thus overwrite data that the old
+	// journal header refers to.  Saw this exact case happen
+	// on an IDE bus analyzer with Larry Barras so while it
+	// may seem obscure, it's not.
+	//
+	if (updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
+
+		dk_synchronize_t sync_request = {
+			.options			= DK_SYNCHRONIZE_OPTION_BARRIER,
+		};
+
+		/*
+		 * If device doesn't support barrier-only flush, or
+		 * the journal is on a different device, use full flush.
+		 */
+		if (!(jnl->flags & JOURNAL_FEATURE_BARRIER) || (jnl->jdev != jnl->fsdev)) {
+			sync_request.options = 0;
+			jnl->flush_counter++;
+		}
+
+		VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel());
+	}
+
+	return 0;
+}
+
+
+
+//
+// this is a work function used to free up transactions that
+// completed. they can't be free'd from buffer_flushed_callback
+// because it is called from deep with the disk driver stack
+// and thus can't do something that would potentially cause
+// paging.  it gets called by each of the journal api entry
+// points so stuff shouldn't hang around for too long.
+//
+static void
+free_old_stuff(journal *jnl)
+{
+	transaction *tr, *next;
+	block_list_header  *blhdr=NULL, *next_blhdr=NULL;
+
+	if (jnl->tr_freeme == NULL)
+		return;
+
+	lock_oldstart(jnl);
+	tr = jnl->tr_freeme;
+	jnl->tr_freeme = NULL;
+	unlock_oldstart(jnl);
+
+	for(; tr; tr=next) {
+		for (blhdr = tr->blhdr; blhdr; blhdr = next_blhdr) {
+			next_blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum);
+			blhdr->binfo[0].bnum = 0xdeadc0de;
+		    
+			hfs_free(blhdr, tr->tbuffer_size);
+
+			KERNEL_DEBUG(0xbbbbc01c, jnl, tr, tr->tbuffer_size, 0, 0);
+		}
+		next = tr->next;
+		hfs_free(tr, sizeof(*tr));
+	}
+}
+
+
+
+//
+// This is our callback that lets us know when a buffer has been
+// flushed to disk.  It's called from deep within the driver stack
+// and thus is quite limited in what it can do.  Notably, it can
+// not initiate any new i/o's or allocate/free memory.
+//
+static void
+buffer_flushed_callback(struct buf *bp, void *arg)
+{
+	transaction  *tr;
+	journal      *jnl;
+	transaction  *ctr, *prev=NULL, *next;
+	size_t        i;
+	int           bufsize, amt_flushed, total_bytes;
+
+
+	//printf("jnl: buf flush: bp @ 0x%x l/blkno %qd/%qd vp 0x%x tr @ 0x%x\n",
+	//	   bp, buf_lblkno(bp), buf_blkno(bp), buf_vnode(bp), arg);
+
+	// snarf out the bits we want
+	bufsize = buf_size(bp);
+	tr      = (transaction *)arg;
+
+	// then we've already seen it
+	if (tr == NULL) {
+		return;
+	}
+
+	CHECK_TRANSACTION(tr);
+
+	jnl = tr->jnl;
+
+	CHECK_JOURNAL(jnl);
+
+	amt_flushed = tr->num_killed;
+	total_bytes = tr->total_bytes;
+    
+	// update the number of blocks that have been flushed.
+	// this buf may represent more than one block so take
+	// that into account.
+	//
+	// OSAddAtomic() returns the value of tr->num_flushed before the add
+	//
+	amt_flushed += OSAddAtomic(bufsize, &tr->num_flushed);
+
+
+	// if this transaction isn't done yet, just return as
+	// there is nothing to do.
+	//
+	// NOTE: we are careful to not reference anything through
+	//       the tr pointer after doing the OSAddAtomic().  if
+	//       this if statement fails then we are the last one
+	//       and then it's ok to dereference "tr".
+	//
+	if ((amt_flushed + bufsize) < total_bytes) {
+		return;
+	}
+
+	// this will single thread checking the transaction
+	lock_oldstart(jnl);
+
+	if (tr->total_bytes == (int)0xfbadc0de) {
+		// then someone beat us to it...
+		unlock_oldstart(jnl);
+		return;
+	}
+
+	// mark this so that we're the owner of dealing with the
+	// cleanup for this transaction
+	tr->total_bytes = 0xfbadc0de;
+
+	if (jnl->flags & JOURNAL_INVALID)
+		goto transaction_done;
+
+	//printf("jnl: tr 0x%x (0x%llx 0x%llx) in jnl 0x%x completed.\n",
+	//   tr, tr->journal_start, tr->journal_end, jnl);
+
+	// find this entry in the old_start[] index and mark it completed
+	for(i = 0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) {
+	
+		if ((off_t)(jnl->old_start[i] & ~(0x8000000000000000ULL)) == tr->journal_start) {
+			jnl->old_start[i] &= ~(0x8000000000000000ULL);
+			break;
+		}
+	}
+
+	if (i >= sizeof(jnl->old_start)/sizeof(jnl->old_start[0])) {
+		panic("jnl: buffer_flushed: did not find tr w/start @ %lld (tr %p, jnl %p)\n",
+		      tr->journal_start, tr, jnl);
+	}
+
+
+	// if we are here then we need to update the journal header
+	// to reflect that this transaction is complete
+	if (tr->journal_start == jnl->active_start) {
+		jnl->active_start = tr->journal_end;
+		tr->journal_start = tr->journal_end = (off_t)0;
+	}
+
+	// go through the completed_trs list and try to coalesce
+	// entries, restarting back at the beginning if we have to.
+	for (ctr = jnl->completed_trs; ctr; prev=ctr, ctr=next) {
+		if (ctr->journal_start == jnl->active_start) {
+			jnl->active_start = ctr->journal_end;
+			if (prev) {
+				prev->next = ctr->next;
+			}
+			if (ctr == jnl->completed_trs) {
+				jnl->completed_trs = ctr->next;
+			}
+
+			next           = jnl->completed_trs;   // this starts us over again
+			ctr->next      = jnl->tr_freeme;
+			jnl->tr_freeme = ctr;
+			ctr            = NULL;
+		} else if (tr->journal_end == ctr->journal_start) {
+			ctr->journal_start = tr->journal_start;
+			next               = jnl->completed_trs;  // this starts us over again
+			ctr                = NULL;
+			tr->journal_start  = tr->journal_end = (off_t)0;
+		} else if (tr->journal_start == ctr->journal_end) {
+			ctr->journal_end  = tr->journal_end;
+			next              = ctr->next;
+			tr->journal_start = tr->journal_end = (off_t)0;
+		} else if (ctr->next && ctr->journal_end == ctr->next->journal_start) {
+			// coalesce the next entry with this one and link the next
+			// entry in at the head of the tr_freeme list
+			next              = ctr->next;           // temporarily use the "next" variable
+			ctr->journal_end  = next->journal_end;
+			ctr->next         = next->next;
+			next->next        = jnl->tr_freeme;      // link in the next guy at the head of the tr_freeme list
+			jnl->tr_freeme    = next;
+
+			next              = jnl->completed_trs;  // this starts us over again
+			ctr               = NULL;
+		} else {
+			next = ctr->next;
+		}
+	}
+
+	// if this is true then we didn't merge with anyone
+	// so link ourselves in at the head of the completed
+	// transaction list.
+	if (tr->journal_start != 0) {
+		// put this entry into the correct sorted place
+		// in the list instead of just at the head.
+		//
+
+		prev = NULL;
+		for (ctr = jnl->completed_trs; ctr && tr->journal_start > ctr->journal_start; prev=ctr, ctr=ctr->next) {
+			// just keep looping
+		}
+
+		if (ctr == NULL && prev == NULL) {
+			jnl->completed_trs = tr;
+			tr->next = NULL;
+		} else if (ctr == jnl->completed_trs) {
+			tr->next = jnl->completed_trs;
+			jnl->completed_trs = tr;
+		} else {
+			tr->next = prev->next;
+			prev->next = tr;
+		}
+	} else {
+		// if we're here this tr got merged with someone else so
+		// put it on the list to be free'd
+		tr->next       = jnl->tr_freeme;
+		jnl->tr_freeme = tr;
+	}
+transaction_done:
+	unlock_oldstart(jnl);
+
+	unlock_condition(jnl, &jnl->asyncIO);
+}
+
+
+#include <libkern/OSByteOrder.h>
+
+#define SWAP16(x) OSSwapInt16(x)
+#define SWAP32(x) OSSwapInt32(x)
+#define SWAP64(x) OSSwapInt64(x)
+
+
+static void
+swap_journal_header(journal *jnl)
+{
+	jnl->jhdr->magic      = SWAP32(jnl->jhdr->magic);
+	jnl->jhdr->endian     = SWAP32(jnl->jhdr->endian);
+	jnl->jhdr->start      = SWAP64(jnl->jhdr->start);
+	jnl->jhdr->end        = SWAP64(jnl->jhdr->end);
+	jnl->jhdr->size       = SWAP64(jnl->jhdr->size);
+	jnl->jhdr->blhdr_size = SWAP32(jnl->jhdr->blhdr_size);
+	jnl->jhdr->checksum   = SWAP32(jnl->jhdr->checksum);
+	jnl->jhdr->jhdr_size  = SWAP32(jnl->jhdr->jhdr_size);
+	jnl->jhdr->sequence_num  = SWAP32(jnl->jhdr->sequence_num);
+}
+
+static void
+swap_block_list_header(journal *jnl, block_list_header *blhdr)
+{
+	int i;
+    
+	blhdr->max_blocks = SWAP16(blhdr->max_blocks);
+	blhdr->num_blocks = SWAP16(blhdr->num_blocks);
+	blhdr->bytes_used = SWAP32(blhdr->bytes_used);
+	blhdr->checksum   = SWAP32(blhdr->checksum);
+	blhdr->flags      = SWAP32(blhdr->flags);
+
+	if (blhdr->num_blocks >= ((jnl->jhdr->blhdr_size / sizeof(block_info)) - 1)) {
+		printf("jnl: %s: blhdr num blocks looks suspicious (%d / blhdr size %d).  not swapping.\n", jnl->jdev_name, blhdr->num_blocks, jnl->jhdr->blhdr_size);
+		return;
+	}
+
+	for(i = 0; i < blhdr->num_blocks; i++) {
+		blhdr->binfo[i].bnum    = SWAP64(blhdr->binfo[i].bnum);
+		blhdr->binfo[i].u.bi.bsize   = SWAP32(blhdr->binfo[i].u.bi.bsize);
+		blhdr->binfo[i].u.bi.b.cksum = SWAP32(blhdr->binfo[i].u.bi.b.cksum);
+	}
+}
+
+
+static int
+update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize)
+{
+	int		ret;
+	struct buf *oblock_bp=NULL;
+	boolean_t was_vm_privileged = FALSE;
+
+    
+	// first read the block we want.
+	ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
+	if (ret != 0) {
+		printf("jnl: %s: update_fs_block: error reading fs block # %lld! (ret %d)\n", jnl->jdev_name, fs_block, ret);
+
+		if (oblock_bp) {
+			buf_brelse(oblock_bp);
+			oblock_bp = NULL;
+		}
+
+		// let's try to be aggressive here and just re-write the block
+		oblock_bp = buf_getblk(jnl->fsdev, (daddr64_t)fs_block, bsize, 0, 0, BLK_META);
+		if (oblock_bp == NULL) {
+			printf("jnl: %s: update_fs_block: buf_getblk() for %lld failed! failing update.\n", jnl->jdev_name, fs_block);
+			return -1;
+		}
+	}
+	    
+	// make sure it's the correct size.
+	if (buf_size(oblock_bp) != bsize) {
+		buf_brelse(oblock_bp);
+		return -1;
+	}
+
+	// copy the journal data over top of it
+	memcpy((char *)buf_dataptr(oblock_bp), block_ptr, bsize);
+
+	if (vfs_isswapmount(jnl->fsmount)) {
+		/*
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+	ret = VNOP_BWRITE(oblock_bp);
+
+	if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+		set_vm_privilege(FALSE);
+
+	if (ret != 0) {
+		printf("jnl: %s: update_fs_block: failed to update block %lld (ret %d)\n", jnl->jdev_name, fs_block,ret);
+		return ret;
+	}
+	// and now invalidate it so that if someone else wants to read
+	// it in a different size they'll be able to do it.
+	ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
+	if (oblock_bp) {
+                buf_markinvalid(oblock_bp);
+		buf_brelse(oblock_bp);
+	}
+	    
+	return 0;
+}
+
+static int
+grow_table(struct bucket **buf_ptr, int num_buckets, int new_size)
+{
+	struct bucket *newBuf;
+	int current_size = num_buckets, i;
+    
+	// return if newsize is less than the current size
+	if (new_size < num_buckets) {
+		return current_size;
+	}
+    
+	newBuf = hfs_malloc(new_size*sizeof(struct bucket));
+
+	//  printf("jnl: lookup_bucket: expanded co_buf to %d elems\n", new_size);
+    
+	// copy existing elements 
+	bcopy(*buf_ptr, newBuf, num_buckets*sizeof(struct bucket));
+    
+	// initialize the new ones
+	for(i = num_buckets; i < new_size; i++) {
+		newBuf[i].block_num = (off_t)-1;
+	}
+    
+	// free the old container
+	hfs_free(*buf_ptr, num_buckets * sizeof(struct bucket));
+    
+	// reset the buf_ptr
+	*buf_ptr = newBuf;
+    
+	return new_size;
+}
+
+static int
+lookup_bucket(struct bucket **buf_ptr, off_t block_num, int num_full)
+{
+	int lo, hi, index, matches, i;
+    
+	if (num_full == 0) {
+		return 0; // table is empty, so insert at index=0
+	}
+    
+	lo = 0;
+	hi = num_full - 1;
+	index = -1;
+    
+	// perform binary search for block_num
+	do {
+		int mid = (hi - lo)/2 + lo;
+		off_t this_num = (*buf_ptr)[mid].block_num;
+	
+		if (block_num == this_num) {
+			index = mid;
+			break;
+		}
+	
+		if (block_num < this_num) {
+			hi = mid;
+			continue;
+		}
+	
+		if (block_num > this_num) {
+			lo = mid + 1;
+			continue;
+		}
+	} while (lo < hi);
+    
+	// check if lo and hi converged on the match
+	if (block_num == (*buf_ptr)[hi].block_num) {
+		index = hi;
+	}
+    
+	// if no existing entry found, find index for new one
+	if (index == -1) {
+		index = (block_num < (*buf_ptr)[hi].block_num) ? hi : hi + 1;
+	} else {
+		// make sure that we return the right-most index in the case of multiple matches
+		matches = 0;
+		i = index + 1;
+		while (i < num_full && block_num == (*buf_ptr)[i].block_num) {
+			matches++;
+			i++;
+		}
+
+		index += matches;
+	}
+    
+	return index;
+}
+
+static int
+insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr, int overwriting)
+{
+	if (!overwriting) {
+		// grow the table if we're out of space - we may index the table
+		// with *num_full_ptr (lookup_bucket() can return a maximum value == 
+		// *num_full_ptr), so we need to grow when we hit (*num_buckets_ptr - 1) 
+		// to prevent out-of-bounds indexing 
+		if (*num_full_ptr >= (*num_buckets_ptr - 1)) {
+			int new_size = *num_buckets_ptr * 2;
+			int grow_size = grow_table(buf_ptr, *num_buckets_ptr, new_size);
+	    
+			if (grow_size < new_size) {
+				printf("jnl: %s: add_block: grow_table returned an error!\n", jnl->jdev_name);
+				return -1;
+			}
+	    
+			*num_buckets_ptr = grow_size; //update num_buckets to reflect the new size
+		}
+	
+		// if we're not inserting at the end, we need to bcopy
+		if (blk_index != *num_full_ptr) {
+			bcopy( (*buf_ptr)+(blk_index), (*buf_ptr)+(blk_index+1), (*num_full_ptr-blk_index)*sizeof(struct bucket) );
+		}
+	
+		(*num_full_ptr)++; // increment only if we're not overwriting
+	}
+
+	// sanity check the values we're about to add
+	if ((off_t)offset >= jnl->jhdr->size) {
+		offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size);
+	}
+	if (size <= 0) {
+		panic("jnl: insert_block: bad size in insert_block (%zd)\n", size);
+	}	 
+
+	(*buf_ptr)[blk_index].block_num = num;
+	(*buf_ptr)[blk_index].block_size = (uint32_t)size;
+	(*buf_ptr)[blk_index].jnl_offset = (uint32_t)offset;
+	(*buf_ptr)[blk_index].cksum = cksum;
+    
+	return blk_index;
+}
+
+static int
+do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num, size_t size, __unused size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr)
+{
+	int	num_to_remove, index, i, overwrite, err;
+	size_t	jhdr_size = jnl->jhdr->jhdr_size, new_offset;
+	off_t	overlap, block_start, block_end;
+
+	block_start = block_num*jhdr_size;
+	block_end = block_start + size;
+	overwrite = (block_num == (*buf_ptr)[blk_index].block_num && size >= (*buf_ptr)[blk_index].block_size);
+
+	// first, eliminate any overlap with the previous entry
+	if (blk_index != 0 && !overwrite) {
+		off_t prev_block_start = (*buf_ptr)[blk_index-1].block_num*jhdr_size;
+		off_t prev_block_end = prev_block_start + (*buf_ptr)[blk_index-1].block_size;
+		overlap = prev_block_end - block_start;
+		if (overlap > 0) {
+			if (overlap % jhdr_size != 0) {
+				panic("jnl: do_overlap: overlap with previous entry not a multiple of %zd\n", jhdr_size);
+			}
+
+			// if the previous entry completely overlaps this one, we need to break it into two pieces.
+			if (prev_block_end > block_end) {
+				off_t new_num = block_end / jhdr_size;
+				size_t new_size = prev_block_end - block_end;
+
+				new_offset = (*buf_ptr)[blk_index-1].jnl_offset + (block_end - prev_block_start);
+		
+				err = insert_block(jnl, buf_ptr, blk_index, new_num, new_size, new_offset, cksum, num_buckets_ptr, num_full_ptr, 0);
+				if (err < 0) {
+					panic("jnl: do_overlap: error inserting during pre-overlap\n");
+				}
+			}
+	    
+			// Regardless, we need to truncate the previous entry to the beginning of the overlap
+			(*buf_ptr)[blk_index-1].block_size = (uint32_t)(block_start - prev_block_start);
+			(*buf_ptr)[blk_index-1].cksum = 0;   // have to blow it away because there's no way to check it
+		}
+	}
+
+	// then, bail out fast if there's no overlap with the entries that follow
+	if (!overwrite && block_end <= (off_t)((*buf_ptr)[blk_index].block_num*jhdr_size)) {
+		return 0; // no overlap, no overwrite
+	} else if (overwrite && (blk_index + 1 >= *num_full_ptr || block_end <= (off_t)((*buf_ptr)[blk_index+1].block_num*jhdr_size))) {
+
+		(*buf_ptr)[blk_index].cksum = cksum;   // update this
+		return 1; // simple overwrite
+	}
+    
+	// Otherwise, find all cases of total and partial overlap. We use the special
+	// block_num of -2 to designate entries that are completely overlapped and must
+	// be eliminated. The block_num, size, and jnl_offset of partially overlapped
+	// entries must be adjusted to keep the array consistent.
+	index = blk_index;
+	num_to_remove = 0;
+	while (index < *num_full_ptr && block_end > (off_t)((*buf_ptr)[index].block_num*jhdr_size)) {
+		if (block_end >= (off_t)(((*buf_ptr)[index].block_num*jhdr_size + (*buf_ptr)[index].block_size))) {
+			(*buf_ptr)[index].block_num = -2; // mark this for deletion
+			num_to_remove++;
+		} else {
+			overlap = block_end - (*buf_ptr)[index].block_num*jhdr_size;
+			if (overlap > 0) {
+				if (overlap % jhdr_size != 0) {
+					panic("jnl: do_overlap: overlap of %lld is not multiple of %zd\n", overlap, jhdr_size);
+				}
+				
+				// if we partially overlap this entry, adjust its block number, jnl offset, and size
+				(*buf_ptr)[index].block_num += (overlap / jhdr_size); // make sure overlap is multiple of jhdr_size, or round up
+				(*buf_ptr)[index].cksum = 0;
+		
+				new_offset = (*buf_ptr)[index].jnl_offset + overlap; // check for wrap-around
+				if ((off_t)new_offset >= jnl->jhdr->size) {
+					new_offset = jhdr_size + (new_offset - jnl->jhdr->size);
+				}
+				(*buf_ptr)[index].jnl_offset = (uint32_t)new_offset;
+		
+				(*buf_ptr)[index].block_size -= overlap; // sanity check for negative value
+				if ((*buf_ptr)[index].block_size <= 0) {
+					panic("jnl: do_overlap: after overlap, new block size is invalid (%u)\n", (*buf_ptr)[index].block_size);
+					// return -1; // if above panic is removed, return -1 for error
+				}
+			}
+			
+		}
+
+		index++;
+	}
+
+	// bcopy over any completely overlapped entries, starting at the right (where the above loop broke out)
+	index--; // start with the last index used within the above loop
+	while (index >= blk_index) {
+		if ((*buf_ptr)[index].block_num == -2) {
+			if (index == *num_full_ptr-1) {
+				(*buf_ptr)[index].block_num = -1; // it's the last item in the table... just mark as free
+			} else {
+				bcopy( (*buf_ptr)+(index+1), (*buf_ptr)+(index), (*num_full_ptr - (index + 1)) * sizeof(struct bucket) );
+			}
+			(*num_full_ptr)--;
+		}
+		index--;
+	}
+
+	// eliminate any stale entries at the end of the table
+	for(i = *num_full_ptr; i < (*num_full_ptr + num_to_remove); i++) {
+		(*buf_ptr)[i].block_num = -1;
+	}
+    
+	return 0; // if we got this far, we need to insert the entry into the table (rather than overwrite) 
+}
+
+// PR-3105942: Coalesce writes to the same block in journal replay
+// We coalesce writes by maintaining a dynamic sorted array of physical disk blocks
+// to be replayed and the corresponding location in the journal which contains
+// the most recent data for those blocks. The array is "played" once the all the
+// blocks in the journal have been coalesced. The code for the case of conflicting/
+// overlapping writes to a single block is the most dense. Because coalescing can
+// disrupt the existing time-ordering of blocks in the journal playback, care
+// is taken to catch any overlaps and keep the array consistent. 
+static int
+add_block(journal *jnl, struct bucket **buf_ptr, off_t block_num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr)
+{
+	int	blk_index, overwriting;
+    
+	// on return from lookup_bucket(), blk_index is the index into the table where block_num should be
+	// inserted (or the index of the elem to overwrite). 
+	blk_index = lookup_bucket( buf_ptr, block_num, *num_full_ptr);
+    
+	// check if the index is within bounds (if we're adding this block to the end of
+	// the table, blk_index will be equal to num_full)
+	if (blk_index < 0 || blk_index > *num_full_ptr) {
+		//printf("jnl: add_block: trouble adding block to co_buf\n");
+		return -1;
+	} // else printf("jnl: add_block: adding block 0x%llx at i=%d\n", block_num, blk_index);
+    
+	// Determine whether we're overwriting an existing entry by checking for overlap
+	overwriting = do_overlap(jnl, buf_ptr, blk_index, block_num, size, offset, cksum, num_buckets_ptr, num_full_ptr);
+	if (overwriting < 0) {
+		return -1; // if we got an error, pass it along
+	}
+        
+	// returns the index, or -1 on error
+	blk_index = insert_block(jnl, buf_ptr, blk_index, block_num, size, offset, cksum, num_buckets_ptr, num_full_ptr, overwriting);
+    
+	return blk_index;
+}
+
+static int
+replay_journal(journal *jnl)
+{
+	int		i, bad_blocks=0;
+	unsigned int	orig_checksum, checksum, check_block_checksums = 0;
+	size_t		ret;
+	size_t		max_bsize = 0;		/* protected by block_ptr */
+	block_list_header *blhdr;
+	off_t		offset, txn_start_offset=0, blhdr_offset, orig_jnl_start;
+	char		*buff, *block_ptr=NULL;
+	struct bucket	*co_buf;
+	int		num_buckets = STARTING_BUCKETS, num_full, check_past_jnl_end = 1, in_uncharted_territory=0;
+	uint32_t	last_sequence_num = 0;
+	int 		replay_retry_count = 0;
+    
+	// wrap the start ptr if it points to the very end of the journal
+	if (jnl->jhdr->start == jnl->jhdr->size) {
+		jnl->jhdr->start = jnl->jhdr->jhdr_size;
+	}
+	if (jnl->jhdr->end == jnl->jhdr->size) {
+		jnl->jhdr->end = jnl->jhdr->jhdr_size;
+	}
+
+	if (jnl->jhdr->start == jnl->jhdr->end) {
+		return 0;
+	}
+
+	orig_jnl_start = jnl->jhdr->start;
+
+	// allocate memory for the header_block.  we'll read each blhdr into this
+	buff = hfs_malloc(jnl->jhdr->blhdr_size);
+
+	// allocate memory for the coalesce buffer
+	co_buf = hfs_malloc(num_buckets*sizeof(struct bucket));
+
+restart_replay:
+
+	// initialize entries
+	for(i = 0; i < num_buckets; i++) {
+		co_buf[i].block_num = -1;
+	}
+	num_full = 0; // empty at first
+
+
+	printf("jnl: %s: replay_journal: from: %lld to: %lld (joffset 0x%llx)\n",
+	       jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end, jnl->jdev_offset);
+
+	while (check_past_jnl_end || jnl->jhdr->start != jnl->jhdr->end) {
+		offset = blhdr_offset = jnl->jhdr->start;
+		ret = read_journal_data(jnl, &offset, buff, jnl->jhdr->blhdr_size);
+		if (ret != (size_t)jnl->jhdr->blhdr_size) {
+			printf("jnl: %s: replay_journal: Could not read block list header block @ 0x%llx!\n", jnl->jdev_name, offset);
+			bad_blocks = 1;
+			goto bad_txn_handling;
+		}
+
+		blhdr = (block_list_header *)buff;
+		
+		orig_checksum = blhdr->checksum;
+		blhdr->checksum = 0;
+		if (jnl->flags & JOURNAL_NEED_SWAP) {
+			// calculate the checksum based on the unswapped data
+			// because it is done byte-at-a-time.
+			orig_checksum = (unsigned int)SWAP32(orig_checksum);
+			checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
+			swap_block_list_header(jnl, blhdr);
+		} else {
+			checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
+		}
+
+
+		//
+		// XXXdbg - if these checks fail, we should replay as much
+		//          we can in the hopes that it will still leave the
+		//          drive in a better state than if we didn't replay
+		//          anything
+		//
+		if (checksum != orig_checksum) {
+			if (check_past_jnl_end && in_uncharted_territory) {
+
+				if (blhdr_offset != jnl->jhdr->end) {
+					printf("jnl: %s: Extra txn replay stopped @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset);
+				}
+
+				check_past_jnl_end = 0;
+				jnl->jhdr->end = blhdr_offset;
+				continue;
+			}
+
+			printf("jnl: %s: replay_journal: bad block list header @ 0x%llx (checksum 0x%x != 0x%x)\n",
+			jnl->jdev_name, blhdr_offset, orig_checksum, checksum);
+
+			if (blhdr_offset == orig_jnl_start) {
+				// if there's nothing in the journal at all, just bail out altogether.
+				goto bad_replay;
+			}
+
+			bad_blocks = 1;
+			goto bad_txn_handling;
+		}
+
+		if (   (last_sequence_num != 0)
+		       && (blhdr->binfo[0].u.bi.b.sequence_num != 0)
+		       && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num)
+		       && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num+1)) {
+
+			txn_start_offset = jnl->jhdr->end = blhdr_offset;
+
+			if (check_past_jnl_end) {
+				check_past_jnl_end = 0;
+				printf("jnl: %s: 2: extra replay stopped @ %lld / 0x%llx (seq %d < %d)\n",
+				       jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num);
+				continue;
+			}
+
+			printf("jnl: %s: txn sequence numbers out of order in txn @ %lld / %llx! (%d < %d)\n",
+			       jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num);
+			bad_blocks = 1;
+			goto bad_txn_handling;
+		}
+		last_sequence_num = blhdr->binfo[0].u.bi.b.sequence_num;
+
+		if (blhdr_offset >= jnl->jhdr->end && jnl->jhdr->start <= jnl->jhdr->end) {
+			if (last_sequence_num == 0) {
+				check_past_jnl_end = 0;
+				printf("jnl: %s: pre-sequence-num-enabled txn's - can not go further than end (%lld %lld).\n",
+				       jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+				if (jnl->jhdr->start != jnl->jhdr->end) {
+					jnl->jhdr->start = jnl->jhdr->end;
+				}
+				continue;
+			}
+			printf("jnl: %s: examining extra transactions starting @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset);
+		}
+
+		if (   blhdr->max_blocks <= 0 || blhdr->max_blocks > (jnl->jhdr->size/jnl->jhdr->jhdr_size)
+		       || blhdr->num_blocks <= 0 || blhdr->num_blocks > blhdr->max_blocks) {
+			printf("jnl: %s: replay_journal: bad looking journal entry: max: %d num: %d\n",
+			       jnl->jdev_name, blhdr->max_blocks, blhdr->num_blocks);
+			bad_blocks = 1;
+			goto bad_txn_handling;
+		}
+	
+		max_bsize = 0;
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			if (blhdr->binfo[i].bnum < 0 && blhdr->binfo[i].bnum != (off_t)-1) {
+				printf("jnl: %s: replay_journal: bogus block number 0x%llx\n", jnl->jdev_name, blhdr->binfo[i].bnum);
+				bad_blocks = 1;
+				goto bad_txn_handling;
+			}
+			
+			if ((size_t)blhdr->binfo[i].u.bi.bsize > max_bsize) {
+				max_bsize = blhdr->binfo[i].u.bi.bsize;
+			}
+		}
+
+		if (blhdr->flags & BLHDR_CHECK_CHECKSUMS) {
+			check_block_checksums = 1;
+			block_ptr = hfs_malloc(max_bsize);
+		} else {
+			block_ptr = NULL;
+		}
+
+		if (blhdr->flags & BLHDR_FIRST_HEADER) {
+			txn_start_offset = blhdr_offset;
+		}
+
+		//printf("jnl: replay_journal: adding %d blocks in journal entry @ 0x%llx to co_buf\n", 
+		//       blhdr->num_blocks-1, jnl->jhdr->start);
+		bad_blocks = 0;
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			int size, ret_val;
+			off_t number;
+
+			size = blhdr->binfo[i].u.bi.bsize;
+			number = blhdr->binfo[i].bnum;
+			
+			// don't add "killed" blocks
+			if (number == (off_t)-1) {
+				//printf("jnl: replay_journal: skipping killed fs block (index %d)\n", i);
+			} else {
+
+				if (check_block_checksums) {
+					int32_t disk_cksum;
+					off_t block_offset;
+
+					block_offset = offset;
+
+					// read the block so we can check the checksum
+					ret = read_journal_data(jnl, &block_offset, block_ptr, size);
+					if (ret != (size_t)size) {
+						printf("jnl: %s: replay_journal: Could not read journal entry data @ offset 0x%llx!\n", jnl->jdev_name, offset);
+						bad_blocks = 1;
+						goto bad_txn_handling;
+					}
+				
+					disk_cksum = calc_checksum(block_ptr, size);
+
+					// there is no need to swap the checksum from disk because
+					// it got swapped when the blhdr was read in.
+					if (blhdr->binfo[i].u.bi.b.cksum != 0 && disk_cksum != blhdr->binfo[i].u.bi.b.cksum) {
+						printf("jnl: %s: txn starting at %lld (%lld) @ index %3d bnum %lld (%d) with disk cksum != blhdr cksum (0x%.8x 0x%.8x)\n",
+						       jnl->jdev_name, txn_start_offset, blhdr_offset, i, number, size, disk_cksum, blhdr->binfo[i].u.bi.b.cksum);
+						printf("jnl: 0x%.8x 0x%.8x 0x%.8x 0x%.8x  0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
+						       *(int *)&block_ptr[0*sizeof(int)], *(int *)&block_ptr[1*sizeof(int)], *(int *)&block_ptr[2*sizeof(int)], *(int *)&block_ptr[3*sizeof(int)],
+						       *(int *)&block_ptr[4*sizeof(int)], *(int *)&block_ptr[5*sizeof(int)], *(int *)&block_ptr[6*sizeof(int)], *(int *)&block_ptr[7*sizeof(int)]);
+
+						bad_blocks = 1;
+						goto bad_txn_handling;
+					}
+				}
+
+
+				// add this bucket to co_buf, coalescing where possible
+				// printf("jnl: replay_journal: adding block 0x%llx\n", number);
+				ret_val = add_block(jnl, &co_buf, number, size, (size_t) offset, blhdr->binfo[i].u.bi.b.cksum, &num_buckets, &num_full);
+			    
+				if (ret_val == -1) {
+					printf("jnl: %s: replay_journal: trouble adding block to co_buf\n", jnl->jdev_name);
+					goto bad_replay;
+				} // else printf("jnl: replay_journal: added block 0x%llx at i=%d\n", number);
+			}
+			
+			// increment offset
+			offset += size;
+			
+			// check if the last block added puts us off the end of the jnl.
+			// if so, we need to wrap to the beginning and take any remainder
+			// into account
+			//
+			if (offset >= jnl->jhdr->size) {
+				offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size);
+			}
+		}
+
+		if (block_ptr) {
+			hfs_free(block_ptr, max_bsize);
+			block_ptr = NULL;
+		}
+		
+		if (bad_blocks) {
+		bad_txn_handling:
+			/* Journal replay got error before it found any valid
+			 *  transations, abort replay */
+			if (txn_start_offset == 0) {
+				printf("jnl: %s: no known good txn start offset! aborting journal replay.\n", jnl->jdev_name);
+				goto bad_replay;
+			}
+
+			/* Repeated error during journal replay, abort replay */
+			if (replay_retry_count == 3) {
+				printf("jnl: %s: repeated errors replaying journal! aborting journal replay.\n", jnl->jdev_name);
+				goto bad_replay;
+			}
+			replay_retry_count++;
+
+			/* There was an error replaying the journal (possibly 
+			 * EIO/ENXIO from the device).  So retry replaying all 
+			 * the good transactions that we found before getting 
+			 * the error.  
+			 */
+			jnl->jhdr->start = orig_jnl_start;
+			jnl->jhdr->end = txn_start_offset;
+			check_past_jnl_end = 0;
+			last_sequence_num = 0;
+			printf("jnl: %s: restarting journal replay (%lld - %lld)!\n", jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+			goto restart_replay;
+		}
+
+		jnl->jhdr->start += blhdr->bytes_used;
+		if (jnl->jhdr->start >= jnl->jhdr->size) {
+			// wrap around and skip the journal header block
+			jnl->jhdr->start = (jnl->jhdr->start % jnl->jhdr->size) + jnl->jhdr->jhdr_size;
+		}
+
+		if (jnl->jhdr->start == jnl->jhdr->end) {
+			in_uncharted_territory = 1;
+		}
+	}
+
+	if (jnl->jhdr->start != jnl->jhdr->end) {
+		printf("jnl: %s: start %lld != end %lld.  resetting end.\n", jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+		jnl->jhdr->end = jnl->jhdr->start;
+	}
+
+	//printf("jnl: replay_journal: replaying %d blocks\n", num_full);
+    
+	/*
+	 * make sure it's at least one page in size, so
+	 * start max_bsize at PAGE_SIZE
+	 */
+	for (i = 0, max_bsize = PAGE_SIZE; i < num_full; i++) {
+
+		if (co_buf[i].block_num == (off_t)-1)
+			continue;
+
+		if (co_buf[i].block_size > max_bsize)
+			max_bsize = co_buf[i].block_size;
+	}
+	/*
+	 * round max_bsize up to the nearest PAGE_SIZE multiple
+	 */
+	if (max_bsize & (PAGE_SIZE - 1)) {
+		max_bsize = (max_bsize + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+	}
+
+	block_ptr = hfs_malloc(max_bsize);
+	
+	// Replay the coalesced entries in the co-buf
+	for(i = 0; i < num_full; i++) {
+		size_t size = co_buf[i].block_size;
+		off_t jnl_offset = (off_t) co_buf[i].jnl_offset;
+		off_t number = co_buf[i].block_num;
+	
+	
+		// printf("replaying co_buf[%d]: block 0x%llx, size 0x%x, jnl_offset 0x%llx\n", i, co_buf[i].block_num,
+		//      co_buf[i].block_size, co_buf[i].jnl_offset);
+	
+		if (number == (off_t)-1) {
+			// printf("jnl: replay_journal: skipping killed fs block\n");
+		} else {
+	    
+			// do journal read, and set the phys. block 
+			ret = read_journal_data(jnl, &jnl_offset, block_ptr, size);
+			if (ret != size) {
+				printf("jnl: %s: replay_journal: Could not read journal entry data @ offset 0x%llx!\n", jnl->jdev_name, jnl_offset);
+				goto bad_replay;
+			}
+	    	    
+			if (update_fs_block(jnl, block_ptr, number, size) != 0) {
+				goto bad_replay;
+			}
+		}
+	}
+    
+	
+	// done replaying; update jnl header
+	if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num) != 0) {
+		goto bad_replay;
+	}
+
+	printf("jnl: %s: journal replay done.\n", jnl->jdev_name);
+    
+	// free block_ptr
+	if (block_ptr) {
+		hfs_free(block_ptr, max_bsize);
+		block_ptr = NULL;
+	}
+    
+	// free the coalesce buffer
+	hfs_free(co_buf, num_buckets*sizeof(struct bucket));
+	co_buf = NULL;
+  
+	hfs_free(buff, jnl->jhdr->blhdr_size);
+	return 0;
+
+bad_replay:
+	hfs_free(block_ptr, max_bsize);
+	hfs_free(co_buf, num_buckets*sizeof(struct bucket));
+	hfs_free(buff, jnl->jhdr->blhdr_size);
+
+	return -1;
+}
+
+
+#define DEFAULT_TRANSACTION_BUFFER_SIZE  (128*1024)
+#define MAX_TRANSACTION_BUFFER_SIZE      (3072*1024)
+
+// XXXdbg - so I can change it in the debugger
+int def_tbuffer_size = 0;
+
+
+//
+// This function sets the size of the tbuffer and the
+// size of the blhdr.  It assumes that jnl->jhdr->size
+// and jnl->jhdr->jhdr_size are already valid.
+//
+static void
+size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz)
+{
+	//
+	// one-time initialization based on how much memory 
+	// there is in the machine.
+	//
+	if (def_tbuffer_size == 0) {
+		uint64_t memsize = 0;
+		size_t l = sizeof(memsize);
+		sysctlbyname("hw.memsize", &memsize, &l, NULL, 0);
+
+		if (memsize < (256*1024*1024)) {
+			def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE;
+		} else if (memsize < (512*1024*1024)) {
+			def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 2;
+		} else if (memsize < (1024*1024*1024)) {
+			def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 3;
+		} else {
+			def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * (memsize / (256*1024*1024));
+		}
+	}
+
+	// For analyzer
+	hfs_assert(jnl->jhdr->jhdr_size > 0);
+
+	// size up the transaction buffer... can't be larger than the number
+	// of blocks that can fit in a block_list_header block.
+	if (tbuffer_size == 0) {
+		jnl->tbuffer_size = def_tbuffer_size;
+	} else {
+		// make sure that the specified tbuffer_size isn't too small
+		if (tbuffer_size < jnl->jhdr->blhdr_size * 2) {
+			tbuffer_size = jnl->jhdr->blhdr_size * 2;
+		}
+		// and make sure it's an even multiple of the block size
+		if ((tbuffer_size % jnl->jhdr->jhdr_size) != 0) {
+			tbuffer_size -= (tbuffer_size % jnl->jhdr->jhdr_size);
+		}
+
+		jnl->tbuffer_size = tbuffer_size;
+	}
+
+	if (jnl->tbuffer_size > (jnl->jhdr->size / 2)) {
+		jnl->tbuffer_size = (jnl->jhdr->size / 2);
+	}
+    
+	if (jnl->tbuffer_size > MAX_TRANSACTION_BUFFER_SIZE) {
+		jnl->tbuffer_size = MAX_TRANSACTION_BUFFER_SIZE;
+	}
+
+	jnl->jhdr->blhdr_size = (jnl->tbuffer_size / jnl->jhdr->jhdr_size) * sizeof(block_info);
+	if (jnl->jhdr->blhdr_size < phys_blksz) {
+		jnl->jhdr->blhdr_size = phys_blksz;
+	} else if ((jnl->jhdr->blhdr_size % phys_blksz) != 0) {
+		// have to round up so we're an even multiple of the physical block size
+		jnl->jhdr->blhdr_size = (jnl->jhdr->blhdr_size + (phys_blksz - 1)) & ~(phys_blksz - 1);
+	}
+}
+
+static void
+get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_context *context)
+{
+	off_t	readblockcnt;
+	off_t	writeblockcnt;
+	off_t	readmaxcnt=0, tmp_readmaxcnt;
+	off_t	writemaxcnt=0, tmp_writemaxcnt;
+	off_t	readsegcnt, writesegcnt;
+	int32_t	features;
+
+	if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&features, 0, context) == 0) {
+		if (features & DK_FEATURE_FORCE_UNIT_ACCESS) {
+			const char *name = vnode_getname_printable(devvp);
+			jnl->flags |= JOURNAL_DO_FUA_WRITES;
+			printf("jnl: %s: enabling FUA writes (features 0x%x)\n", name, features);
+			vnode_putname_printable(name);
+		}
+		if (features & DK_FEATURE_UNMAP) {
+			jnl->flags |= JOURNAL_USE_UNMAP;
+		}
+
+		if (features & DK_FEATURE_BARRIER) {
+			jnl->flags |= JOURNAL_FEATURE_BARRIER;
+		}
+	}
+
+	//
+	// First check the max read size via several different mechanisms...
+	//
+	VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, (caddr_t)&readmaxcnt, 0, context);
+
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t)&readblockcnt, 0, context) == 0) {
+		tmp_readmaxcnt = readblockcnt * phys_blksz;
+		if (readmaxcnt == 0 || (readblockcnt > 0 && tmp_readmaxcnt < readmaxcnt)) {
+			readmaxcnt = tmp_readmaxcnt;
+		}
+	}
+
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t)&readsegcnt, 0, context)) {
+		readsegcnt = 0;
+	}
+
+	if (readsegcnt > 0 && (readsegcnt * PAGE_SIZE) < readmaxcnt) {
+		readmaxcnt = readsegcnt * PAGE_SIZE;
+	}
+	    
+	if (readmaxcnt == 0) {
+		readmaxcnt = 128 * 1024;
+	} else if (readmaxcnt > UINT32_MAX) {
+		readmaxcnt = UINT32_MAX;
+	}
+
+
+	//
+	// Now check the max writes size via several different mechanisms...
+	//
+	VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t)&writemaxcnt, 0, context);
+
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t)&writeblockcnt, 0, context) == 0) {
+		tmp_writemaxcnt = writeblockcnt * phys_blksz;
+		if (writemaxcnt == 0 || (writeblockcnt > 0 && tmp_writemaxcnt < writemaxcnt)) {
+			writemaxcnt = tmp_writemaxcnt;
+		}
+	}
+
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,	(caddr_t)&writesegcnt, 0, context)) {
+		writesegcnt = 0;
+	}
+
+	if (writesegcnt > 0 && (writesegcnt * PAGE_SIZE) < writemaxcnt) {
+		writemaxcnt = writesegcnt * PAGE_SIZE;
+	}
+
+	if (writemaxcnt == 0) {
+		writemaxcnt = 128 * 1024;
+	} else if (writemaxcnt > UINT32_MAX) {
+		writemaxcnt = UINT32_MAX;
+	}
+
+	jnl->max_read_size  = readmaxcnt;
+	jnl->max_write_size = writemaxcnt;
+	// printf("jnl: %s: max read/write: %lld k / %lld k\n",
+	//     jnl->jdev_name ? jnl->jdev_name : "unknown",
+	//     jnl->max_read_size/1024, jnl->max_write_size/1024);
+}
+
+
+journal *
+journal_create(struct vnode *jvp,
+			   off_t         offset,
+			   off_t         journal_size,
+			   struct vnode *fsvp,
+			   size_t        min_fs_blksz,
+			   int32_t       flags,
+			   int32_t       tbuffer_size,
+			   void        (*flush)(void *arg),
+			   void         *arg,
+			   struct mount *fsmount)
+{
+	journal		*jnl;
+	uint32_t	phys_blksz, new_txn_base;
+	u_int32_t	min_size;
+	const char	*jdev_name;
+	/* 
+	 * Cap the journal max size to 2GB.  On HFS, it will attempt to occupy
+	 * a full allocation block if the current size is smaller than the allocation
+	 * block on which it resides.  Once we hit the exabyte filesystem range, then
+	 * it will use 2GB allocation blocks.  As a result, make the cap 2GB.
+	 */
+
+	jdev_name = vnode_getname_printable(jvp);
+
+	/* Get the real physical block size. */
+	if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) {
+		goto cleanup_jdev_name;
+	}
+
+	if (journal_size < (256*1024) || journal_size > (MAX_JOURNAL_SIZE)) {
+		printf("jnl: %s: create: journal size %lld looks bogus.\n", jdev_name, journal_size);
+		goto cleanup_jdev_name;
+	}
+
+	min_size = phys_blksz * (phys_blksz / sizeof(block_info));
+	/* Reject journals that are too small given the sector size of the device */
+	if (journal_size < min_size) {
+		printf("jnl: %s: create: journal size (%lld) too small given sector size of (%u)\n", 
+				jdev_name, journal_size, phys_blksz);
+		goto cleanup_jdev_name;
+	}
+
+	if (phys_blksz > min_fs_blksz) {
+		printf("jnl: %s: create: error: phys blksize %u bigger than min fs blksize %zd\n",
+		       jdev_name, phys_blksz, min_fs_blksz);
+		goto cleanup_jdev_name;
+	}
+
+	if ((journal_size % phys_blksz) != 0) {
+		printf("jnl: %s: create: journal size 0x%llx is not an even multiple of block size 0x%ux\n",
+		       jdev_name, journal_size, phys_blksz);
+		goto cleanup_jdev_name;
+	}
+
+
+	jnl = hfs_mallocz(sizeof(struct journal));
+
+	jnl->jdev         = jvp;
+	jnl->jdev_offset  = offset;
+	jnl->fsdev        = fsvp;
+	jnl->flush        = flush;
+	jnl->flush_arg    = arg;
+	jnl->flags        = (flags & JOURNAL_OPTION_FLAGS_MASK);
+	jnl->jdev_name    = jdev_name;
+	lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr);
+
+	// Keep a point to the mount around for use in IO throttling.
+	jnl->fsmount      = fsmount;
+
+	get_io_info(jvp, phys_blksz, jnl, vfs_context_kernel());
+
+	jnl->header_buf = hfs_malloc(phys_blksz);
+	jnl->header_buf_size = phys_blksz;
+
+	jnl->jhdr = (journal_header *)jnl->header_buf;
+	memset(jnl->jhdr, 0, sizeof(journal_header));
+
+	// we have to set this up here so that do_journal_io() will work
+	jnl->jhdr->jhdr_size = phys_blksz;
+
+	//
+	// We try and read the journal header to see if there is already one
+	// out there.  If there is, it's possible that it has transactions
+	// in it that we might replay if we happen to pick a sequence number
+	// that is a little less than the old one, there is a crash and the 
+	// last txn written ends right at the start of a txn from the previous
+	// incarnation of this file system.  If all that happens we would
+	// replay the transactions from the old file system and that would
+	// destroy your disk.  Although it is extremely unlikely for all those
+	// conditions to happen, the probability is non-zero and the result is
+	// severe - you lose your file system.  Therefore if we find a valid
+	// journal header and the sequence number is non-zero we write junk
+	// over the entire journal so that there is no way we will encounter
+	// any old transactions.  This is slow but should be a rare event
+	// since most tools erase the journal.
+	//
+	if (   read_journal_header(jnl, jnl->jhdr, phys_blksz) == phys_blksz
+	       && jnl->jhdr->magic == JOURNAL_HEADER_MAGIC
+	       && jnl->jhdr->sequence_num != 0) {
+
+		new_txn_base = (jnl->jhdr->sequence_num + (journal_size / phys_blksz) + (random() % 16384)) & 0x00ffffff;
+		printf("jnl: %s: create: avoiding old sequence number 0x%x (0x%x)\n", jdev_name, jnl->jhdr->sequence_num, new_txn_base);
+
+#if 0
+		int i;
+		off_t pos=0;
+
+		for(i = 1; i < journal_size / phys_blksz; i++) {
+			pos = i*phys_blksz;
+
+			// we don't really care what data we write just so long
+			// as it's not a valid transaction header.  since we have
+			// the header_buf sitting around we'll use that.
+			write_journal_data(jnl, &pos, jnl->header_buf, phys_blksz);
+		}
+		printf("jnl: create: done clearing journal (i=%d)\n", i);
+#endif
+	} else {
+		new_txn_base = random() & 0x00ffffff;
+	}
+
+	memset(jnl->header_buf, 0, phys_blksz);
+    
+	jnl->jhdr->magic      = JOURNAL_HEADER_MAGIC;
+	jnl->jhdr->endian     = ENDIAN_MAGIC;
+	jnl->jhdr->start      = phys_blksz;    // start at block #1, block #0 is for the jhdr itself
+	jnl->jhdr->end        = phys_blksz;
+	jnl->jhdr->size       = journal_size;
+	jnl->jhdr->jhdr_size  = phys_blksz;
+	size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
+
+	jnl->active_start     = jnl->jhdr->start;
+
+	// XXXdbg  - for testing you can force the journal to wrap around
+	// jnl->jhdr->start = jnl->jhdr->size - (phys_blksz*3);
+	// jnl->jhdr->end   = jnl->jhdr->size - (phys_blksz*3);
+    
+	jnl->jhdr->sequence_num = new_txn_base;
+
+	lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
+	lck_mtx_init(&jnl->flock, jnl_mutex_group, jnl_lock_attr);
+	lck_rw_init(&jnl->trim_lock, jnl_mutex_group, jnl_lock_attr);
+
+
+	jnl->flushing = FALSE;
+	jnl->asyncIO = FALSE;
+	jnl->flush_aborted = FALSE;
+	jnl->writing_header = FALSE;
+	jnl->async_trim = NULL;
+	jnl->sequence_num = jnl->jhdr->sequence_num;
+	
+	if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num) != 0) {
+		printf("jnl: %s: journal_create: failed to write journal header.\n", jdev_name);
+		goto bad_write;
+	}
+
+	goto journal_create_complete;
+
+
+bad_write:
+	hfs_free(jnl->header_buf, phys_blksz);
+	jnl->jhdr = NULL;
+	hfs_free(jnl, sizeof(*jnl));
+cleanup_jdev_name:
+	vnode_putname_printable(jdev_name);
+	jnl = NULL;
+journal_create_complete:
+	return jnl;
+}
+
+
+journal *
+journal_open(struct vnode *jvp,
+			 off_t         offset,
+			 off_t         journal_size,
+			 struct vnode *fsvp,
+			 size_t        min_fs_blksz,
+			 int32_t       flags,
+			 int32_t       tbuffer_size,
+			 void        (*flush)(void *arg),
+			 void         *arg,
+			 struct mount *fsmount)
+{
+	journal		*jnl;
+	uint32_t	orig_blksz=0;
+	uint32_t	phys_blksz;
+	u_int32_t	min_size = 0;
+	int		orig_checksum, checksum;
+	const char	*jdev_name = vnode_getname_printable(jvp);
+
+	/* Get the real physical block size. */
+	if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) {
+		goto cleanup_jdev_name;
+	}
+
+	if (phys_blksz > min_fs_blksz) {
+		printf("jnl: %s: open: error: phys blksize %u bigger than min fs blksize %zd\n",
+		       jdev_name, phys_blksz, min_fs_blksz);
+		goto cleanup_jdev_name;
+	}
+
+	if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) {
+		printf("jnl: %s: open: journal size %lld looks bogus.\n", jdev_name, journal_size);
+		goto cleanup_jdev_name;
+	}
+
+	min_size = phys_blksz * (phys_blksz / sizeof(block_info));
+	/* Reject journals that are too small given the sector size of the device */
+	if (journal_size < min_size) {
+		printf("jnl: %s: open: journal size (%lld) too small given sector size of (%u)\n", 
+				jdev_name, journal_size, phys_blksz);
+		goto cleanup_jdev_name;
+	}
+    
+	if ((journal_size % phys_blksz) != 0) {
+		printf("jnl: %s: open: journal size 0x%llx is not an even multiple of block size 0x%x\n",
+		       jdev_name, journal_size, phys_blksz);
+		goto cleanup_jdev_name;
+	}
+
+	jnl = hfs_mallocz(sizeof(struct journal));
+
+	jnl->jdev         = jvp;
+	jnl->jdev_offset  = offset;
+	jnl->fsdev        = fsvp;
+	jnl->flush        = flush;
+	jnl->flush_arg    = arg;
+	jnl->flags        = (flags & JOURNAL_OPTION_FLAGS_MASK);
+	jnl->jdev_name    = jdev_name;
+	lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr);
+
+	/* We hold the mount to later pass to the throttling code for IO
+	 * accounting.
+	 */
+	jnl->fsmount      = fsmount;
+
+	get_io_info(jvp, phys_blksz, jnl, vfs_context_kernel());
+
+	jnl->header_buf = hfs_malloc(phys_blksz);
+	jnl->header_buf_size = phys_blksz;
+
+	jnl->jhdr = (journal_header *)jnl->header_buf;
+	memset(jnl->jhdr, 0, sizeof(journal_header));
+
+	// we have to set this up here so that do_journal_io() will work
+	jnl->jhdr->jhdr_size = phys_blksz;
+
+	if (read_journal_header(jnl, jnl->jhdr, phys_blksz) != phys_blksz) {
+		printf("jnl: %s: open: could not read %u bytes for the journal header.\n",
+		       jdev_name, phys_blksz);
+		goto bad_journal;
+	}
+
+	/* 
+	 * Check for a bad jhdr size after reading in the journal header.
+	 * The journal header length cannot be zero 
+	 */
+	if (jnl->jhdr->jhdr_size == 0) {
+		printf("jnl: %s: open: bad jhdr size (%d) \n", jdev_name, jnl->jhdr->jhdr_size);
+		goto bad_journal;
+	}
+
+	orig_checksum = jnl->jhdr->checksum;
+	jnl->jhdr->checksum = 0;
+
+	if (jnl->jhdr->magic == SWAP32(JOURNAL_HEADER_MAGIC)) {
+		// do this before the swap since it's done byte-at-a-time
+		orig_checksum = SWAP32(orig_checksum);
+		checksum = calc_checksum((char *)jnl->jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+		swap_journal_header(jnl);
+		jnl->flags |= JOURNAL_NEED_SWAP;
+	} else {
+		checksum = calc_checksum((char *)jnl->jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+	}
+
+	if (jnl->jhdr->magic != JOURNAL_HEADER_MAGIC && jnl->jhdr->magic != OLD_JOURNAL_HEADER_MAGIC) {
+		printf("jnl: %s: open: journal magic is bad (0x%x != 0x%x)\n",
+		       jnl->jdev_name, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC);
+		goto bad_journal;
+	}
+
+	// only check if we're the current journal header magic value
+	if (jnl->jhdr->magic == JOURNAL_HEADER_MAGIC) {
+
+		if (orig_checksum != checksum) {
+			printf("jnl: %s: open: journal checksum is bad (0x%x != 0x%x)\n",
+			       jdev_name, orig_checksum, checksum);
+				   
+			//goto bad_journal;
+		}
+	}
+
+	// XXXdbg - convert old style magic numbers to the new one
+	if (jnl->jhdr->magic == OLD_JOURNAL_HEADER_MAGIC) {
+		jnl->jhdr->magic = JOURNAL_HEADER_MAGIC;
+	}
+
+	if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
+		/*
+		 * The volume has probably been resized (such that we had to adjust the
+		 * logical sector size), or copied to media with a different logical
+		 * sector size.
+		 * 
+		 * Temporarily change the device's logical block size to match the
+		 * journal's header size.  This will allow us to replay the journal
+		 * safely.  If the replay succeeds, we will update the journal's header
+		 * size (later in this function).
+		 */
+		orig_blksz = phys_blksz;
+		phys_blksz = jnl->jhdr->jhdr_size;
+		VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, vfs_context_kernel());
+		printf("jnl: %s: open: temporarily switched block size from %u to %u\n",
+			   jdev_name, orig_blksz, phys_blksz);
+	}
+
+	if (   jnl->jhdr->start <= 0
+	       || jnl->jhdr->start > jnl->jhdr->size
+	       || jnl->jhdr->start > 1024*1024*1024) {
+		printf("jnl: %s: open: jhdr start looks bad (0x%llx max size 0x%llx)\n",
+		       jdev_name, jnl->jhdr->start, jnl->jhdr->size);
+		goto bad_journal;
+	}
+
+	if (   jnl->jhdr->end <= 0
+	       || jnl->jhdr->end > jnl->jhdr->size
+	       || jnl->jhdr->end > 1024*1024*1024) {
+		printf("jnl: %s: open: jhdr end looks bad (0x%llx max size 0x%llx)\n",
+		       jdev_name, jnl->jhdr->end, jnl->jhdr->size);
+		goto bad_journal;
+	}
+
+	if (jnl->jhdr->size < (256*1024) || jnl->jhdr->size > 1024*1024*1024) {
+		printf("jnl: %s: open: jhdr size looks bad (0x%llx)\n", jdev_name, jnl->jhdr->size);
+		goto bad_journal;
+	}
+
+// XXXdbg - can't do these checks because hfs writes all kinds of
+//          non-uniform sized blocks even on devices that have a block size
+//          that is larger than 512 bytes (i.e. optical media w/2k blocks).
+//          therefore these checks will fail and so we just have to punt and
+//          do more relaxed checking...
+// XXXdbg    if ((jnl->jhdr->start % jnl->jhdr->jhdr_size) != 0) {
+	if ((jnl->jhdr->start % 512) != 0) {
+		printf("jnl: %s: open: journal start (0x%llx) not a multiple of 512?\n",
+		       jdev_name, jnl->jhdr->start);
+		goto bad_journal;
+	}
+
+//XXXdbg    if ((jnl->jhdr->end % jnl->jhdr->jhdr_size) != 0) {
+	if ((jnl->jhdr->end % 512) != 0) {
+		printf("jnl: %s: open: journal end (0x%llx) not a multiple of block size (0x%x)?\n",
+		       jdev_name, jnl->jhdr->end, jnl->jhdr->jhdr_size);
+		goto bad_journal;
+	}
+
+	if (jnl->jhdr->blhdr_size < 0) {
+		//throw out invalid sizes
+		printf("jnl %s: open: blhdr size looks bogus! (%d) \n", 
+				jdev_name, jnl->jhdr->blhdr_size);
+		goto bad_journal;
+	}
+
+	// take care of replaying the journal if necessary
+	if (flags & JOURNAL_RESET) {
+		printf("jnl: %s: journal start/end pointers reset! (s 0x%llx e 0x%llx)\n",
+		       jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+		jnl->jhdr->start = jnl->jhdr->end;
+	} else if (replay_journal(jnl) != 0) {
+		printf("jnl: %s: journal_open: Error replaying the journal!\n", jdev_name);
+		goto bad_journal;
+	}
+	
+	/*
+	 * When we get here, we know that the journal is empty (jnl->jhdr->start ==
+	 * jnl->jhdr->end).  If the device's logical block size was different from
+	 * the journal's header size, then we can now restore the device's logical
+	 * block size and update the journal's header size to match.
+	 *
+	 * Note that we also adjust the journal's start and end so that they will
+	 * be aligned on the new block size.  We pick a new sequence number to
+	 * avoid any problems if a replay found previous transactions using the old
+	 * journal header size.  (See the comments in journal_create(), above.)
+	 */
+	
+	if (orig_blksz != 0) {
+		VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, vfs_context_kernel());
+		phys_blksz = orig_blksz;
+		
+		orig_blksz = 0;
+		
+		jnl->jhdr->jhdr_size = phys_blksz;
+		jnl->jhdr->start = phys_blksz;
+		jnl->jhdr->end = phys_blksz;
+		jnl->jhdr->sequence_num = (jnl->jhdr->sequence_num +
+								   (journal_size / phys_blksz) +
+								   (random() % 16384)) & 0x00ffffff;
+		
+		if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num)) {
+			printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
+			goto bad_journal;
+		}
+	}
+
+	// make sure this is in sync!
+	jnl->active_start = jnl->jhdr->start;
+	jnl->sequence_num = jnl->jhdr->sequence_num;
+
+	// set this now, after we've replayed the journal
+	size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
+
+	// TODO: Does this need to change if the device's logical block size changed?
+	if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) {
+		printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size,
+		       jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size);
+		goto bad_journal;
+	}
+
+	lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
+	lck_mtx_init(&jnl->flock, jnl_mutex_group, jnl_lock_attr);
+	lck_rw_init(&jnl->trim_lock, jnl_mutex_group, jnl_lock_attr);
+
+	goto journal_open_complete;
+
+bad_journal:
+	if (orig_blksz != 0) {
+		phys_blksz = orig_blksz;
+		VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, vfs_context_kernel());
+		printf("jnl: %s: open: restored block size after error\n", jdev_name);
+	}
+	hfs_free(jnl->header_buf, jnl->header_buf_size);
+	hfs_free(jnl, sizeof(*jnl));
+cleanup_jdev_name:
+	vnode_putname_printable(jdev_name);
+	jnl = NULL;
+journal_open_complete:
+	return jnl;    
+}
+
+
+int
+journal_is_clean(struct vnode *jvp,
+		 off_t         offset,
+		 off_t         journal_size,
+		 struct vnode *fsvp,
+                 size_t        min_fs_block_size)
+{
+	journal		jnl;
+	uint32_t	phys_blksz;
+	int		ret;
+	int		orig_checksum, checksum;
+	const		char *jdev_name = vnode_getname_printable(jvp);
+
+	/* Get the real physical block size. */
+	if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) {
+		printf("jnl: %s: is_clean: failed to get device block size.\n", jdev_name);
+		ret = EINVAL;
+		goto cleanup_jdev_name;
+	}
+
+	if (phys_blksz > (uint32_t)min_fs_block_size) {
+		printf("jnl: %s: is_clean: error: phys blksize %d bigger than min fs blksize %zd\n",
+		       jdev_name, phys_blksz, min_fs_block_size);
+		ret = EINVAL;
+		goto cleanup_jdev_name;
+	}
+
+	if (journal_size < (256*1024) || journal_size > (MAX_JOURNAL_SIZE)) {
+		printf("jnl: %s: is_clean: journal size %lld looks bogus.\n", jdev_name, journal_size);
+		ret = EINVAL;
+		goto cleanup_jdev_name;
+	}
+    
+	if ((journal_size % phys_blksz) != 0) {
+		printf("jnl: %s: is_clean: journal size 0x%llx is not an even multiple of block size 0x%x\n",
+		       jdev_name, journal_size, phys_blksz);
+		ret = EINVAL;
+		goto cleanup_jdev_name;
+	}
+
+	memset(&jnl, 0, sizeof(jnl));
+
+	jnl.header_buf = hfs_malloc(phys_blksz);
+	jnl.header_buf_size = phys_blksz;
+
+	get_io_info(jvp, phys_blksz, &jnl, vfs_context_kernel());
+    
+	jnl.jhdr = (journal_header *)jnl.header_buf;
+	memset(jnl.jhdr, 0, sizeof(journal_header));
+
+	jnl.jdev        = jvp;
+	jnl.jdev_offset = offset;
+	jnl.fsdev       = fsvp;
+
+	// we have to set this up here so that do_journal_io() will work
+	jnl.jhdr->jhdr_size = phys_blksz;
+
+	if (read_journal_header(&jnl, jnl.jhdr, phys_blksz) != (unsigned)phys_blksz) {
+		printf("jnl: %s: is_clean: could not read %d bytes for the journal header.\n",
+		       jdev_name, phys_blksz);
+		ret = EINVAL;
+		goto get_out;
+	}
+
+	orig_checksum = jnl.jhdr->checksum;
+	jnl.jhdr->checksum = 0;
+
+	if (jnl.jhdr->magic == SWAP32(JOURNAL_HEADER_MAGIC)) {
+		// do this before the swap since it's done byte-at-a-time
+		orig_checksum = SWAP32(orig_checksum);
+		checksum = calc_checksum((char *)jnl.jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+		swap_journal_header(&jnl);
+		jnl.flags |= JOURNAL_NEED_SWAP;
+	} else {
+		checksum = calc_checksum((char *)jnl.jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+	}
+
+	if (jnl.jhdr->magic != JOURNAL_HEADER_MAGIC && jnl.jhdr->magic != OLD_JOURNAL_HEADER_MAGIC) {
+		printf("jnl: %s: is_clean: journal magic is bad (0x%x != 0x%x)\n",
+		       jdev_name, jnl.jhdr->magic, JOURNAL_HEADER_MAGIC);
+		ret = EINVAL;
+		goto get_out;
+	}
+
+	if (orig_checksum != checksum) {
+		printf("jnl: %s: is_clean: journal checksum is bad (0x%x != 0x%x)\n", jdev_name, orig_checksum, checksum);
+		ret = EINVAL;
+		goto get_out;
+	}
+
+	//
+	// if the start and end are equal then the journal is clean.
+	// otherwise it's not clean and therefore an error.
+	//
+	if (jnl.jhdr->start == jnl.jhdr->end) {
+		ret = 0;
+	} else {
+		ret = EBUSY;    // so the caller can differentiate an invalid journal from a "busy" one
+	}
+
+get_out:
+	hfs_free(jnl.header_buf, jnl.header_buf_size);
+cleanup_jdev_name:
+	vnode_putname_printable(jdev_name);
+	return ret;
+}
+
+
+void
+journal_close(journal *jnl)
+{
+	volatile off_t *start, *end;
+	int             counter=0;
+
+	CHECK_JOURNAL(jnl);
+
+	// set this before doing anything that would block so that
+	// we start tearing things down properly.
+	//
+	jnl->flags |= JOURNAL_CLOSE_PENDING;
+
+	if (jnl->owner != current_thread()) {
+		journal_lock(jnl);
+	}
+
+	wait_condition(jnl, &jnl->flushing, "journal_close");
+
+	//
+	// only write stuff to disk if the journal is still valid
+	//
+	if ((jnl->flags & JOURNAL_INVALID) == 0) {
+
+		if (jnl->active_tr) {
+			/*
+			 * "journal_end_transaction" will fire the flush asynchronously
+			 */
+			journal_end_transaction(jnl);
+		}
+		
+		// flush any buffered transactions
+		if (jnl->cur_tr) {
+			transaction *tr = jnl->cur_tr;
+
+			jnl->cur_tr = NULL;
+			/*
+			 * "end_transaction" will wait for any in-progress flush to complete
+			 * before flushing "cur_tr" synchronously("must_wait" == TRUE)
+			 */
+			end_transaction(tr, 1, NULL, NULL, FALSE, TRUE);
+		}
+		/*
+		 * if there was an "active_tr", make sure we wait for
+		 * it to flush if there was no "cur_tr" to process
+		 */
+		wait_condition(jnl, &jnl->flushing, "journal_close");
+    
+		//start = &jnl->jhdr->start;
+		start = &jnl->active_start;
+		end   = &jnl->jhdr->end;
+    
+		while (*start != *end && counter++ < 5000) {
+			//printf("jnl: close: flushing the buffer cache (start 0x%llx end 0x%llx)\n", *start, *end);
+			if (jnl->flush) {
+				jnl->flush(jnl->flush_arg);
+			}
+			tsleep((caddr_t)jnl, PRIBIO, "jnl_close", 2);
+		}
+
+		if (*start != *end) {
+			printf("jnl: %s: close: buffer flushing didn't seem to flush out all the transactions! (0x%llx - 0x%llx)\n",
+			       jnl->jdev_name, *start, *end);
+		}
+
+		// make sure this is in sync when we close the journal
+		jnl->jhdr->start = jnl->active_start;
+
+		// if this fails there's not much we can do at this point...
+		write_journal_header(jnl, 1, jnl->sequence_num);
+	} else {
+		// if we're here the journal isn't valid any more.
+		// so make sure we don't leave any locked blocks lying around
+		printf("jnl: %s: close: journal is invalid.  aborting outstanding transactions\n", jnl->jdev_name);
+		if (jnl->active_tr || jnl->cur_tr) {
+			transaction *tr;
+
+			if (jnl->active_tr) {
+				tr = jnl->active_tr;
+				jnl->active_tr = NULL;
+			} else {
+				tr = jnl->cur_tr;
+				jnl->cur_tr = NULL;
+			}
+			abort_transaction(jnl, tr);
+
+			if (jnl->active_tr || jnl->cur_tr) {
+				panic("jnl: %s: close: jnl @ %p had both an active and cur tr\n", jnl->jdev_name, jnl);
+			}
+		}
+	}
+	wait_condition(jnl, &jnl->asyncIO, "journal_close");
+
+	free_old_stuff(jnl);
+
+	hfs_free(jnl->header_buf, jnl->header_buf_size);
+	jnl->jhdr = (void *)0xbeefbabe;
+
+	vnode_putname_printable(jnl->jdev_name);
+
+	journal_unlock(jnl);
+	lck_mtx_destroy(&jnl->old_start_lock, jnl_mutex_group);
+	lck_mtx_destroy(&jnl->jlock, jnl_mutex_group);
+	lck_mtx_destroy(&jnl->flock, jnl_mutex_group);
+	hfs_free(jnl, sizeof(*jnl));
+}
+
+static void
+dump_journal(journal *jnl)
+{
+	transaction *ctr;
+
+	printf("journal for dev %s:", jnl->jdev_name);
+	printf("  jdev_offset %.8llx\n", jnl->jdev_offset);
+	printf("  magic: 0x%.8x\n", jnl->jhdr->magic);
+	printf("  start: 0x%.8llx\n", jnl->jhdr->start);
+	printf("  end:   0x%.8llx\n", jnl->jhdr->end);
+	printf("  size:  0x%.8llx\n", jnl->jhdr->size);
+	printf("  blhdr size: %d\n", jnl->jhdr->blhdr_size);
+	printf("  jhdr size: %d\n", jnl->jhdr->jhdr_size);
+	printf("  chksum: 0x%.8x\n", jnl->jhdr->checksum);
+    
+	printf("  completed transactions:\n");
+	for (ctr = jnl->completed_trs; ctr; ctr = ctr->next) {
+		printf("    0x%.8llx - 0x%.8llx\n", ctr->journal_start, ctr->journal_end);
+	}
+}
+
+
+
+static off_t
+free_space(journal *jnl)
+{
+	off_t free_space_offset;
+	
+	if (jnl->jhdr->start < jnl->jhdr->end) {
+		free_space_offset = jnl->jhdr->size - (jnl->jhdr->end - jnl->jhdr->start) - jnl->jhdr->jhdr_size;
+	} else if (jnl->jhdr->start > jnl->jhdr->end) {
+		free_space_offset = jnl->jhdr->start - jnl->jhdr->end;
+	} else {
+		// journal is completely empty
+		free_space_offset = jnl->jhdr->size - jnl->jhdr->jhdr_size;
+	}
+
+	return free_space_offset;
+}
+
+
+//
+// The journal must be locked on entry to this function.
+// The "desired_size" is in bytes.
+//
+static int
+check_free_space(journal *jnl, int desired_size, boolean_t *delayed_header_write, uint32_t sequence_num)
+{
+	size_t	i;
+	int	counter=0;
+
+	//printf("jnl: check free space (desired 0x%x, avail 0x%Lx)\n",
+        //	   desired_size, free_space(jnl));
+
+	if (delayed_header_write)
+		*delayed_header_write = FALSE;
+    
+	while (1) {
+		int old_start_empty;
+		
+		// make sure there's space in the journal to hold this transaction
+		if (free_space(jnl) > desired_size && jnl->old_start[0] == 0) {
+			break;
+		}
+		if (counter++ == 5000) {
+			dump_journal(jnl);
+			panic("jnl: check_free_space: buffer flushing isn't working "
+			      "(jnl @ %p s %lld e %lld f %lld [active start %lld]).\n", jnl,
+			      jnl->jhdr->start, jnl->jhdr->end, free_space(jnl), jnl->active_start);
+		}
+		if (counter > 7500) {
+			printf("jnl: %s: check_free_space: giving up waiting for free space.\n", jnl->jdev_name);
+			return ENOSPC;
+		}
+
+		//
+		// here's where we lazily bump up jnl->jhdr->start.  we'll consume
+		// entries until there is enough space for the next transaction.
+		//
+		old_start_empty = 1;
+		lock_oldstart(jnl);
+
+		for (i = 0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) {
+			int   lcl_counter;
+
+			lcl_counter = 0;
+			while (jnl->old_start[i] & 0x8000000000000000LL) {
+				if (lcl_counter++ > 10000) {
+					panic("jnl: check_free_space: tr starting @ 0x%llx not flushing (jnl %p).\n",
+					      jnl->old_start[i], jnl);
+				}
+				
+				unlock_oldstart(jnl);
+				if (jnl->flush) {
+					jnl->flush(jnl->flush_arg);
+				}
+				tsleep((caddr_t)jnl, PRIBIO, "check_free_space1", 1);
+				lock_oldstart(jnl);
+			}
+
+			if (jnl->old_start[i] == 0) {
+				continue;
+			}
+
+			old_start_empty   = 0;
+			jnl->jhdr->start  = jnl->old_start[i];
+			jnl->old_start[i] = 0;
+
+			if (free_space(jnl) > desired_size) {
+				
+				if (delayed_header_write)
+					*delayed_header_write = TRUE;
+				else {
+					unlock_oldstart(jnl);
+					write_journal_header(jnl, 1, sequence_num);
+					lock_oldstart(jnl);
+				}
+				break;
+			}
+		}
+		unlock_oldstart(jnl);
+		
+		// if we bumped the start, loop and try again
+		if (i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0])) {
+			continue;
+		} else if (old_start_empty) {
+			//
+			// if there is nothing in old_start anymore then we can
+			// bump the jhdr->start to be the same as active_start
+			// since it is possible there was only one very large
+			// transaction in the old_start array.  if we didn't do
+			// this then jhdr->start would never get updated and we
+			// would wind up looping until we hit the panic at the
+			// start of the loop.
+			//
+			jnl->jhdr->start = jnl->active_start;
+			
+			if (delayed_header_write)
+				*delayed_header_write = TRUE;
+			else
+				write_journal_header(jnl, 1, sequence_num);
+			continue;
+		}
+
+
+		// if the file system gave us a flush function, call it to so that
+		// it can flush some blocks which hopefully will cause some transactions
+		// to complete and thus free up space in the journal.
+		if (jnl->flush) {
+			jnl->flush(jnl->flush_arg);
+		}
+	
+		// wait for a while to avoid being cpu-bound (this will
+		// put us to sleep for 10 milliseconds)
+		tsleep((caddr_t)jnl, PRIBIO, "check_free_space2", 1);
+	}
+
+	return 0;
+}
+
+/*
+ * Allocate a new active transaction.
+ */
+static errno_t
+journal_allocate_transaction(journal *jnl)
+{
+	transaction *tr;
+	boolean_t was_vm_privileged = FALSE;
+	
+	if (vfs_isswapmount(jnl->fsmount)) {
+		/*
+		 * the disk driver can allocate memory on this path...
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+	tr = hfs_mallocz(sizeof(transaction));
+
+	tr->tbuffer_size = jnl->tbuffer_size;
+
+	tr->tbuffer = hfs_malloc(tr->tbuffer_size);
+
+	if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+		set_vm_privilege(FALSE);
+
+	// journal replay code checksum check depends on this.
+	memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE);
+	// Fill up the rest of the block with unimportant bytes (0x5a 'Z' chosen for visibility)
+	memset(tr->tbuffer + BLHDR_CHECKSUM_SIZE, 0x5a, jnl->jhdr->blhdr_size - BLHDR_CHECKSUM_SIZE);
+
+	tr->blhdr = (block_list_header *)tr->tbuffer;
+	tr->blhdr->max_blocks = (jnl->jhdr->blhdr_size / sizeof(block_info)) - 1;
+	tr->blhdr->num_blocks = 1;      // accounts for this header block
+	tr->blhdr->bytes_used = jnl->jhdr->blhdr_size;
+	tr->blhdr->flags = BLHDR_CHECK_CHECKSUMS | BLHDR_FIRST_HEADER;
+
+	tr->sequence_num = ++jnl->sequence_num;
+	tr->num_blhdrs  = 1;
+	tr->total_bytes = jnl->jhdr->blhdr_size;
+	tr->jnl         = jnl;
+
+	jnl->active_tr  = tr;
+	
+	return 0;
+}
+
+int
+journal_start_transaction(journal *jnl)
+{
+	int ret;
+
+	CHECK_JOURNAL(jnl);
+    
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+	if (jnl->owner == current_thread()) {
+		if (jnl->active_tr == NULL) {
+			panic("jnl: start_tr: active_tr is NULL (jnl @ %p, owner %p, current_thread %p\n",
+			      jnl, jnl->owner, current_thread());
+		}
+		jnl->nested_count++;
+		return 0;
+	}
+
+	journal_lock(jnl);
+
+	if (jnl->nested_count != 0 || jnl->active_tr != NULL) {
+		panic("jnl: start_tr: owner %p, nested count %d, active_tr %p jnl @ %p\n",
+		      jnl->owner, jnl->nested_count, jnl->active_tr, jnl);
+	}
+
+	jnl->nested_count = 1;
+
+#if JOE
+	// make sure there's room in the journal
+	if (free_space(jnl) < jnl->tbuffer_size) {
+
+		KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_START, jnl, 0, 0, 0, 0);
+
+		// this is the call that really waits for space to free up
+		// as well as updating jnl->jhdr->start
+		if (check_free_space(jnl, jnl->tbuffer_size, NULL, jnl->sequence_num) != 0) {
+			printf("jnl: %s: start transaction failed: no space\n", jnl->jdev_name);
+			ret = ENOSPC;
+			goto bad_start;
+		}
+		KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_END, jnl, 0, 0, 0, 0);
+	}
+#endif
+
+	// if there's a buffered transaction, use it.
+	if (jnl->cur_tr) {
+		jnl->active_tr = jnl->cur_tr;
+		jnl->cur_tr    = NULL;
+
+		return 0;
+	}
+
+	ret = journal_allocate_transaction(jnl);
+	if (ret) {
+		goto bad_start;
+	}
+
+	// printf("jnl: start_tr: owner 0x%x new tr @ 0x%x\n", jnl->owner, jnl->active_tr);
+
+	return 0;
+
+bad_start:
+	jnl->nested_count = 0;
+	journal_unlock(jnl);
+
+	return ret;
+}
+
+
+int
+journal_modify_block_start(journal *jnl, struct buf *bp)
+{
+	transaction *tr;
+	boolean_t was_vm_privileged = FALSE;
+    
+	CHECK_JOURNAL(jnl);
+
+
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+
+	if (vfs_isswapmount(jnl->fsmount)) {
+		/*
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+
+	// XXXdbg - for debugging I want this to be true.  later it may
+	//          not be necessary.
+	if ((buf_flags(bp) & B_META) == 0) {
+		panic("jnl: modify_block_start: bp @ %p is not a meta-data block! (jnl %p)\n", bp, jnl);
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: modify_block_start: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
+	}
+
+	//printf("jnl: mod block start (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d; total bytes %d)\n",
+	//   bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes);
+
+	// can't allow blocks that aren't an even multiple of the
+	// underlying block size.
+	if ((buf_size(bp) % jnl->jhdr->jhdr_size) != 0) {
+		uint32_t phys_blksz, bad=0;
+	    
+		if (VNOP_IOCTL(jnl->jdev, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) {
+			bad = 1;
+		} else if (phys_blksz != (uint32_t)jnl->jhdr->jhdr_size) {
+			if (phys_blksz < 512) {
+				panic("jnl: mod block start: phys blksz %d is too small (%d, %d)\n",
+				      phys_blksz, buf_size(bp), jnl->jhdr->jhdr_size);
+			}
+
+			if ((buf_size(bp) % phys_blksz) != 0) {
+				bad = 1;
+			} else if (phys_blksz < (uint32_t)jnl->jhdr->jhdr_size) {
+				jnl->jhdr->jhdr_size = phys_blksz;
+			} else {
+				// the phys_blksz is now larger... need to realloc the jhdr
+				char *new_header_buf;
+
+				printf("jnl: %s: phys blksz got bigger (was: %d/%d now %d)\n",
+				       jnl->jdev_name, jnl->header_buf_size, jnl->jhdr->jhdr_size, phys_blksz);
+				new_header_buf = hfs_malloc(phys_blksz);
+				memcpy(new_header_buf, jnl->header_buf, jnl->header_buf_size);
+				memset(&new_header_buf[jnl->header_buf_size], 0x18, (phys_blksz - jnl->header_buf_size));
+				hfs_free(jnl->header_buf, jnl->header_buf_size);
+				jnl->header_buf = new_header_buf;
+				jnl->header_buf_size = phys_blksz;
+				
+				jnl->jhdr = (journal_header *)jnl->header_buf;
+				jnl->jhdr->jhdr_size = phys_blksz;
+			}
+		} else {
+			bad = 1;
+		}
+	    
+		if (bad) {
+			panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n",
+			      buf_size(bp), jnl->jhdr->jhdr_size);
+
+			if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+				set_vm_privilege(FALSE);
+			return -1;
+		}
+	}
+
+	// make sure that this transaction isn't bigger than the whole journal
+	if (tr->total_bytes+buf_size(bp) >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) {
+		panic("jnl: transaction too big (%d >= %lld bytes, bufsize %d, tr %p bp %p)\n",
+		      tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), buf_size(bp), tr, bp);
+
+		if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+			set_vm_privilege(FALSE);
+		return -1;
+	}
+
+#if DEBUG
+	const int f = buf_flags(bp);
+	hfs_assert(!ISSET(f, B_DELWRI) || ISSET(f, B_LOCKED));
+#endif
+
+	buf_setflags(bp, B_LOCKED);
+
+	if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+		set_vm_privilege(FALSE);
+
+	return 0;
+}
+
+int
+journal_modify_block_abort(journal *jnl, struct buf *bp)
+{
+	transaction	*tr;
+	block_list_header *blhdr;
+	int		i;
+    
+	CHECK_JOURNAL(jnl);
+
+	free_old_stuff(jnl);
+
+	tr = jnl->active_tr;
+	
+	//
+	// if there's no active transaction then we just want to
+	// call buf_brelse() and return since this is just a block
+	// that happened to be modified as part of another tr.
+	//
+	if (tr == NULL) {
+		buf_brelse(bp);
+		return 0;
+	}
+
+	if (jnl->flags & JOURNAL_INVALID) {
+    	/* Still need to buf_brelse(). Callers assume we consume the bp. */
+    	buf_brelse(bp);
+		return EINVAL;
+	}
+
+	CHECK_TRANSACTION(tr);
+    
+	if (jnl->owner != current_thread()) {
+		panic("jnl: modify_block_abort: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
+	}
+
+	// printf("jnl: modify_block_abort: tr 0x%x bp 0x%x\n", jnl->active_tr, bp);
+
+	// first check if it's already part of this transaction
+	for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			if (bp == blhdr->binfo[i].u.bp) {
+				break;
+			}
+		}
+
+		if (i < blhdr->num_blocks) {
+			break;
+		}
+	}
+
+	//
+	// if blhdr is null, then this block has only had modify_block_start
+	// called on it as part of the current transaction.  that means that
+	// it is ok to clear the LOCKED bit since it hasn't actually been
+	// modified.  if blhdr is non-null then modify_block_end was called
+	// on it and so we need to keep it locked in memory.
+	//
+	if (blhdr == NULL) { 
+		buf_clearflags(bp, B_LOCKED);
+	}
+
+	buf_brelse(bp);
+	return 0;
+}
+
+
+int
+journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, void *arg), void *arg)
+{
+	int		i = 1;
+	int		tbuffer_offset=0;
+	block_list_header *blhdr, *prev=NULL;
+	transaction	*tr;
+
+	CHECK_JOURNAL(jnl);
+
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
+    	/* Still need to buf_brelse(). Callers assume we consume the bp. */
+    	buf_brelse(bp);
+		return EINVAL;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: modify_block_end: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
+	}
+
+	//printf("jnl: mod block end:  (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d, total bytes %d)\n", 
+	//   bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes);
+
+	if ((buf_flags(bp) & B_LOCKED) == 0) {
+		panic("jnl: modify_block_end: bp %p not locked! jnl @ %p\n", bp, jnl);
+	}
+	 
+	// first check if it's already part of this transaction
+	for (blhdr = tr->blhdr; blhdr; prev = blhdr, blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
+		tbuffer_offset = jnl->jhdr->blhdr_size;
+
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			if (bp == blhdr->binfo[i].u.bp) {
+				break;
+			}
+			if (blhdr->binfo[i].bnum != (off_t)-1) {
+				tbuffer_offset += buf_size(blhdr->binfo[i].u.bp);
+			} else {
+				tbuffer_offset += blhdr->binfo[i].u.bi.bsize;
+			}
+		}
+
+		if (i < blhdr->num_blocks) {
+			break;
+		}
+	}
+
+	if (blhdr == NULL
+	    && prev
+	    && (prev->num_blocks+1) <= prev->max_blocks
+	    && (prev->bytes_used+buf_size(bp)) <= (uint32_t)tr->tbuffer_size) {
+		blhdr = prev;
+
+	} else if (blhdr == NULL) {
+		block_list_header *nblhdr;
+		if (prev == NULL) {
+			panic("jnl: modify block end: no way man, prev == NULL?!?, jnl %p, bp %p\n", jnl, bp);
+		}
+
+		// we got to the end of the list, didn't find the block and there's
+		// no room in the block_list_header pointed to by prev
+	
+		// we allocate another tbuffer and link it in at the end of the list
+		// through prev->binfo[0].bnum.  that's a skanky way to do things but
+		// avoids having yet another linked list of small data structures to manage.
+
+		nblhdr = hfs_malloc(tr->tbuffer_size);
+
+		// journal replay code checksum check depends on this.
+		memset(nblhdr, 0, BLHDR_CHECKSUM_SIZE);
+		// Fill up the rest of the block with unimportant bytes
+		memset(nblhdr + BLHDR_CHECKSUM_SIZE, 0x5a, jnl->jhdr->blhdr_size - BLHDR_CHECKSUM_SIZE);
+
+		// initialize the new guy
+		nblhdr->max_blocks = (jnl->jhdr->blhdr_size / sizeof(block_info)) - 1;
+		nblhdr->num_blocks = 1;      // accounts for this header block
+		nblhdr->bytes_used = jnl->jhdr->blhdr_size;
+		nblhdr->flags = BLHDR_CHECK_CHECKSUMS;
+	    
+		tr->num_blhdrs++;
+		tr->total_bytes += jnl->jhdr->blhdr_size;
+
+		// then link him in at the end
+		prev->binfo[0].bnum = (off_t)((long)nblhdr);
+
+		// and finally switch to using the new guy
+		blhdr          = nblhdr;
+		tbuffer_offset = jnl->jhdr->blhdr_size;
+		i              = 1;
+	}
+
+
+	if ((i+1) > blhdr->max_blocks) {
+		panic("jnl: modify_block_end: i = %d, max_blocks %d\n", i, blhdr->max_blocks);
+	}
+
+	// if this is true then this is a new block we haven't seen
+	if (i >= blhdr->num_blocks) {
+                int	bsize;
+		vnode_t	vp;
+
+		vp = buf_vnode(bp);
+		if (vnode_ref(vp)) {
+			// Nobody checks the return values, so...
+			jnl->flags |= JOURNAL_INVALID;
+
+			buf_brelse(bp);
+
+			// We're probably here due to a force unmount, so EIO is appropriate
+			return EIO;
+		}
+
+		bsize = buf_size(bp);
+
+		blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp));
+		blhdr->binfo[i].u.bp = bp;
+
+		KERNEL_DEBUG_CONSTANT(0x3018004, kdebug_vnode(vp), blhdr->binfo[i].bnum, bsize, 0, 0);
+		/* 
+		 * Update the per-task logical counter for metadata write. 
+		 * We use (2 * bsize) to account for the write to the journal and the 
+		 * corresponding write to the btree.
+		 */
+		task_update_logical_writes(current_task(), (2 * bsize), TASK_WRITE_METADATA, vp);
+
+		if (func) {
+			void (*old_func)(buf_t, void *)=NULL, *old_arg=NULL;
+			
+			buf_setfilter(bp, func, arg, &old_func, &old_arg);
+			if (old_func != NULL && old_func != func) {
+			    panic("jnl: modify_block_end: old func %p / arg %p (func %p)", old_func, old_arg, func);
+			}
+		}
+		
+		blhdr->bytes_used += bsize;
+		tr->total_bytes   += bsize;
+
+		blhdr->num_blocks++;
+	}
+	buf_bdwrite(bp);
+
+	return 0;
+}
+
+int
+journal_kill_block(journal *jnl, struct buf *bp)
+{
+	int		i;
+	int		bflags;
+	block_list_header *blhdr;
+	transaction	*tr;
+
+	CHECK_JOURNAL(jnl);
+
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
+		buf_brelse(bp);
+		return 0;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: modify_block_end: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
+	}
+
+	bflags = buf_flags(bp);
+
+	if ( !(bflags & B_LOCKED))
+		panic("jnl: modify_block_end: called with bp not B_LOCKED");
+
+	/*
+	 * bp must be BL_BUSY and B_LOCKED
+	 * first check if it's already part of this transaction
+	 */
+	for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
+
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			if (bp == blhdr->binfo[i].u.bp) {
+			        vnode_t vp;
+
+				buf_clearflags(bp, B_LOCKED);
+
+				// this undoes the vnode_ref() in journal_modify_block_end()
+				vp = buf_vnode(bp);
+				vnode_rele_ext(vp, 0, 1);
+
+				// if the block has the DELWRI and FILTER bits sets, then
+				// things are seriously weird.  if it was part of another
+				// transaction then journal_modify_block_start() should
+				// have force it to be written.
+				//
+				//if ((bflags & B_DELWRI) && (bflags & B_FILTER)) {
+				//	panic("jnl: kill block: this defies all logic! bp 0x%x\n", bp);
+				//} else {
+					tr->num_killed += buf_size(bp);
+				//}
+				blhdr->binfo[i].bnum = (off_t)-1;
+				blhdr->binfo[i].u.bp = NULL;
+				blhdr->binfo[i].u.bi.bsize = buf_size(bp);
+
+				buf_markinvalid(bp);
+				buf_brelse(bp);
+
+				return 0;
+			}
+		}
+	}
+
+	/*
+	 * We did not find the block in any transaction buffer but we still
+	 * need to release it or else it will be left locked forever.
+	 */
+	buf_brelse(bp);
+
+	return 0;
+}
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		journal_trim_set_callback
+;
+; Function:		Provide the journal with a routine to be called back when a
+;				TRIM has (or would have) been issued to the device.  That
+;				is, the transaction has been flushed to the device, and the
+;				blocks freed by the transaction are now safe for reuse.
+;
+;				CAUTION: If the journal becomes invalid (eg., due to an I/O
+;				error when trying to write to the journal), this callback
+;				will stop getting called, even if extents got freed before
+;				the journal became invalid!
+;
+; Input Arguments:
+;	jnl			- The journal structure for the filesystem.
+;	callback	- The function to call when the TRIM is complete.
+;	arg			- An argument to be passed to callback.
+;________________________________________________________________________________
+*/
+void
+journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg)
+{
+	jnl->trim_callback = callback;
+	jnl->trim_callback_arg = arg;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		journal_trim_realloc
+;
+; Function:		Increase the amount of memory allocated for the list of extents
+;				to be unmapped (trimmed).  This routine will be called when
+;				adding an extent to the list, and the list already occupies
+;				all of the space allocated to it.  This routine returns ENOMEM
+;				if unable to allocate more space, or 0 if the extent list was
+;				grown successfully.
+;
+; Input Arguments:
+;	trim		- The trim list to be resized.
+;
+; Output:
+;	(result)	- ENOMEM or 0.
+;
+; Side effects:
+;	 The allocated_count and extents fields of tr->trim are updated
+;	 if the function returned 0.
+;________________________________________________________________________________
+*/
+static int
+trim_realloc(journal *jnl, struct jnl_trim_list *trim)
+{
+	void *new_extents;
+	uint32_t new_allocated_count;
+	boolean_t was_vm_privileged = FALSE;
+	
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_START, obfuscate_addr(trim), 0, trim->allocated_count, trim->extent_count, 0);
+	
+	new_allocated_count = trim->allocated_count + JOURNAL_DEFAULT_TRIM_EXTENTS;
+
+	if (vfs_isswapmount(jnl->fsmount)) {
+		/*
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+	new_extents = hfs_malloc(new_allocated_count * sizeof(dk_extent_t));
+	if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+		set_vm_privilege(FALSE);
+
+	if (new_extents == NULL) {
+		printf("jnl: trim_realloc: unable to grow extent list!\n");
+		/*
+		 * Since we could be called when allocating space previously marked
+		 * to be trimmed, we need to empty out the list to be safe.
+		 */
+		trim->extent_count = 0;
+		if (jnl_kdebug)
+			KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_END, ENOMEM, 0, trim->allocated_count, 0, 0);
+		return ENOMEM;
+	}
+	
+	/* Copy the old extent list to the newly allocated list. */
+	if (trim->extents != NULL) {
+		memmove(new_extents,
+				trim->extents,
+				trim->allocated_count * sizeof(dk_extent_t));
+		hfs_free(trim->extents, trim->allocated_count * sizeof(dk_extent_t));
+	}
+	
+	trim->allocated_count = new_allocated_count;
+	trim->extents = new_extents;
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_END, 0, 0, new_allocated_count, trim->extent_count, 0);
+	
+	return 0;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		trim_search_extent
+ ;
+ ; Function:		Search the given extent list to see if any of its extents
+ ;				overlap the given extent.
+ ;
+ ; Input Arguments:
+ ;	trim		- The trim list to be searched.
+ ;	offset		- The first byte of the range to be searched for.
+ ;	length		- The number of bytes of the extent being searched for.
+ ;  overlap_start - start of the overlapping extent
+ ;  overlap_len   - length of the overlapping extent
+ ;
+ ; Output:
+ ;	(result)	- TRUE if one or more extents overlap, FALSE otherwise.
+ ;________________________________________________________________________________
+ */
+static int
+trim_search_extent(struct jnl_trim_list *trim, uint64_t offset,
+		uint64_t length, uint64_t *overlap_start, uint64_t *overlap_len)
+{
+	uint64_t end = offset + length;
+	uint32_t lower = 0;						/* Lowest index to search */
+	uint32_t upper = trim->extent_count;	/* Highest index to search + 1 */
+	uint32_t middle;
+
+	/* A binary search over the extent list. */
+	while (lower < upper) {
+		middle = (lower + upper) / 2;
+
+		if (trim->extents[middle].offset >= end)
+			upper = middle;
+		else if (trim->extents[middle].offset + trim->extents[middle].length <= offset)
+			lower = middle + 1;
+		else {
+			if (overlap_start) {
+				*overlap_start = trim->extents[middle].offset;
+			}
+			if (overlap_len) {
+				*overlap_len = trim->extents[middle].length;
+			}
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		journal_trim_add_extent
+;
+; Function:		Keep track of extents that have been freed as part of this
+;				transaction.  If the underlying device supports TRIM (UNMAP),
+;				then those extents will be trimmed/unmapped once the
+;				transaction has been written to the journal.  (For example,
+;				SSDs can support trim/unmap and avoid having to recopy those
+;				blocks when doing wear leveling, and may reuse the same
+;				phsyical blocks for different logical blocks.)
+;
+;				HFS also uses this, in combination with journal_trim_set_callback,
+;				to add recently freed extents to its free extent cache, but
+;				only after the transaction that freed them is committed to
+;				disk.  (This reduces the chance of overwriting live data in
+;				a way that causes data loss if a transaction never gets
+;				written to the journal.)
+;
+; Input Arguments:
+;	jnl			- The journal for the volume containing the byte range.
+;	offset		- The first byte of the range to be trimmed.
+;	length		- The number of bytes of the extent being trimmed.
+;________________________________________________________________________________
+*/
+int
+journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
+{
+	uint64_t end;
+	transaction *tr;
+	dk_extent_t *extent;
+	uint32_t insert_index;
+	uint32_t replace_count;
+		
+	CHECK_JOURNAL(jnl);
+
+	/* TODO: Is it OK to manipulate the trim list even if JOURNAL_INVALID is set?  I think so... */
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_START, obfuscate_addr(jnl), offset, length, tr->trim.extent_count, 0);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: trim_add_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+			  jnl, jnl->owner, current_thread());
+	}
+
+	free_old_stuff(jnl);
+		
+	end = offset + length;
+		
+	/*
+	 * Find the range of existing extents that can be combined with the
+	 * input extent.  We start by counting the number of extents that end
+	 * strictly before the input extent, then count the number of extents
+	 * that overlap or are contiguous with the input extent.
+	 */
+	extent = tr->trim.extents;
+	insert_index = 0;
+	while (insert_index < tr->trim.extent_count && extent->offset + extent->length < offset) {
+		++insert_index;
+		++extent;
+	}
+	replace_count = 0;
+	while (insert_index + replace_count < tr->trim.extent_count && extent->offset <= end) {
+		++replace_count;
+		++extent;
+	}
+		
+	/*
+	 * If none of the existing extents can be combined with the input extent,
+	 * then just insert it in the list (before item number insert_index).
+	 */
+	if (replace_count == 0) {
+		/* If the list was already full, we need to grow it. */
+		if (tr->trim.extent_count == tr->trim.allocated_count) {
+			if (trim_realloc(jnl, &tr->trim) != 0) {
+				printf("jnl: trim_add_extent: out of memory!");
+				if (jnl_kdebug)
+					KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, ENOMEM, 0, 0, tr->trim.extent_count, 0);
+				return ENOMEM;
+			}
+		}
+		
+		/* Shift any existing extents with larger offsets. */
+		if (insert_index < tr->trim.extent_count) {
+			memmove(&tr->trim.extents[insert_index+1],
+					&tr->trim.extents[insert_index],
+					(tr->trim.extent_count - insert_index) * sizeof(dk_extent_t));
+		}
+		tr->trim.extent_count++;
+		
+		/* Store the new extent in the list. */
+		tr->trim.extents[insert_index].offset = offset;
+		tr->trim.extents[insert_index].length = length;
+		
+		/* We're done. */
+		if (jnl_kdebug)
+			KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, 0, 0, 0, tr->trim.extent_count, 0);
+		return 0;
+	}
+	
+	/*
+	 * Update extent number insert_index to be the union of the input extent
+	 * and all of the replaced extents.
+	 */
+	if (tr->trim.extents[insert_index].offset < offset)
+		offset = tr->trim.extents[insert_index].offset;
+	extent = &tr->trim.extents[insert_index + replace_count - 1];
+	if (extent->offset + extent->length > end)
+		end = extent->offset + extent->length;
+	tr->trim.extents[insert_index].offset = offset;
+	tr->trim.extents[insert_index].length = end - offset;
+	
+	/*
+	 * If we were replacing more than one existing extent, then shift any
+	 * extents with larger offsets, and update the count of extents.
+	 *
+	 * We're going to leave extent #insert_index alone since it was just updated, above.
+	 * We need to move extents from index (insert_index + replace_count) through the end of
+	 * the list by (replace_count - 1) positions so that they overwrite extent #(insert_index + 1).
+	 */
+	if (replace_count > 1 && (insert_index + replace_count) < tr->trim.extent_count) {
+		memmove(&tr->trim.extents[insert_index + 1],
+				&tr->trim.extents[insert_index + replace_count],
+				(tr->trim.extent_count - insert_index - replace_count) * sizeof(dk_extent_t));
+	}
+	tr->trim.extent_count -= replace_count - 1;
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, 0, 0, 0, tr->trim.extent_count, 0);
+    return 0;
+}
+
+/*
+ * journal_trim_extent_overlap
+ *
+ * Return 1 if there are any pending TRIMs that overlap with the given offset and length
+ * Return 0 otherwise.
+ */
+
+int journal_trim_extent_overlap (journal *jnl, uint64_t offset, uint64_t length, uint64_t *end) {
+	transaction *tr = NULL;
+	int overlap = 0;
+
+	uint64_t overlap_start;
+	uint64_t overlap_len;
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	/*
+	 * There are two lists that need to be examined for potential overlaps:
+	 *
+	 * The first is the current transaction. Since this function requires that
+	 * a transaction be active when this is called, this is the "active_tr"
+	 * pointer in the journal struct.  This has a trimlist pointer which needs
+	 * to be searched.
+	 */
+	overlap = trim_search_extent (&tr->trim, offset, length, &overlap_start, &overlap_len);
+	if (overlap == 0) {
+		/*
+		 * The second is the async trim list, which is only done if the current
+		 * transaction group (active transaction) did not overlap with our target
+		 * extent. This async trim list is the set of all previously
+		 * committed transaction groups whose I/Os are now in-flight. We need to hold the
+		 * trim lock in order to search this list.  If we grab the list before the
+		 * TRIM has completed, then we will compare it. If it is grabbed AFTER the
+		 * TRIM has completed, then the pointer will be zeroed out and we won't have
+		 * to check anything.
+		 */
+		lck_rw_lock_shared (&jnl->trim_lock);
+		if (jnl->async_trim != NULL) {
+			overlap = trim_search_extent(jnl->async_trim, offset, length, &overlap_start, &overlap_len);
+		}
+		lck_rw_unlock_shared (&jnl->trim_lock);
+	}
+
+	if (overlap) {
+		/* compute the end (min) of the overlapping range */
+		if ( (overlap_start + overlap_len) < (offset + length)) {
+			*end = (overlap_start + overlap_len);
+		}
+		else {
+			*end = (offset + length);
+		}
+	}
+
+
+	return overlap;
+}
+
+/*
+ * journal_request_immediate_flush
+ *
+ * FS requests that the journal flush immediately upon the
+ * active transaction's completion.
+ *
+ * Returns 0 if operation succeeds
+ * Returns EPERM if we failed to leave hint
+ */
+int
+journal_request_immediate_flush (journal *jnl) {
+
+	transaction *tr = NULL;
+	/*
+	 * Is a transaction still in process? You must do
+	 * this while there are txns open
+	 */
+	tr = jnl->active_tr;
+	if (tr != NULL) {
+		CHECK_TRANSACTION(tr);
+		tr->flush_on_completion = TRUE;
+	}
+	else {
+		return EPERM;
+	}
+	return 0;
+}
+
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		trim_remove_extent
+;
+; Function:		Indicate that a range of bytes, some of which may have previously
+;				been passed to journal_trim_add_extent, is now allocated.
+;				Any overlapping ranges currently in the journal's trim list will
+;				be removed.  If the underlying device supports TRIM (UNMAP), then
+;				these extents will not be trimmed/unmapped when the transaction
+;				is written to the journal.
+;
+;				HFS also uses this to prevent newly allocated space from being
+;				added to its free extent cache (if some portion of the newly
+;				allocated space was recently freed).
+;
+; Input Arguments:
+;	trim		- The trim list to update.
+;	offset		- The first byte of the range to be trimmed.
+;	length		- The number of bytes of the extent being trimmed.
+;________________________________________________________________________________
+*/
+static int
+trim_remove_extent(journal *jnl, struct jnl_trim_list *trim, uint64_t offset, uint64_t length)
+{
+	u_int64_t end;
+	dk_extent_t *extent;
+	u_int32_t keep_before;
+	u_int32_t keep_after;
+	
+	end = offset + length;
+	
+	/*
+	 * Find any existing extents that start before or end after the input
+	 * extent.  These extents will be modified if they overlap the input
+	 * extent.  Other extents between them will be deleted.
+	 */
+	extent = trim->extents;
+	keep_before = 0;
+	while (keep_before < trim->extent_count && extent->offset < offset) {
+		++keep_before;
+		++extent;
+	}
+	keep_after = keep_before;
+	if (keep_after > 0) {
+		/* See if previous extent extends beyond both ends of input extent. */
+		--keep_after;
+		--extent;
+	}
+	while (keep_after < trim->extent_count && (extent->offset + extent->length) <= end) {
+		++keep_after;
+		++extent;
+	}
+	
+	/*
+	 * When we get here, the first keep_before extents (0 .. keep_before-1)
+	 * start before the input extent, and extents (keep_after .. extent_count-1)
+	 * end after the input extent.  We'll need to keep, all of those extents,
+	 * but possibly modify #(keep_before-1) and #keep_after to remove the portion
+	 * that overlaps with the input extent.
+	 */
+	
+	/*
+	 * Does the input extent start after and end before the same existing
+	 * extent?  If so, we have to "punch a hole" in that extent and convert
+	 * it to two separate extents.
+	 */
+	if (keep_before >  keep_after) {
+		/* If the list was already full, we need to grow it. */
+		if (trim->extent_count == trim->allocated_count) {
+			if (trim_realloc(jnl, trim) != 0) {
+				printf("jnl: trim_remove_extent: out of memory!");
+				return ENOMEM;
+			}
+		}
+		
+		/*
+		 * Make room for a new extent by shifting extents #keep_after and later
+		 * down by one extent.  When we're done, extents #keep_before and
+		 * #keep_after will be identical, and we can fall through to removing
+		 * the portion that overlaps the input extent.
+		 */
+		memmove(&trim->extents[keep_before],
+				&trim->extents[keep_after],
+				(trim->extent_count - keep_after) * sizeof(dk_extent_t));
+		++trim->extent_count;
+		++keep_after;
+		
+		/*
+		 * Fall through.  We now have the case where the length of extent
+		 * #(keep_before - 1) needs to be updated, and the start of extent
+		 * #(keep_after) needs to be updated.
+		 */
+	}
+	
+	/*
+	 * May need to truncate the end of extent #(keep_before - 1) if it overlaps
+	 * the input extent.
+	 */
+	if (keep_before > 0) {
+		extent = &trim->extents[keep_before - 1];
+		if (extent->offset + extent->length > offset) {
+			extent->length = offset - extent->offset;
+		}
+	}
+	
+	/*
+	 * May need to update the start of extent #(keep_after) if it overlaps the
+	 * input extent.
+	 */
+	if (keep_after < trim->extent_count) {
+		extent = &trim->extents[keep_after];
+		if (extent->offset < end) {
+			extent->length = extent->offset + extent->length - end;
+			extent->offset = end;
+		}
+	}
+	
+	/*
+	 * If there were whole extents that overlapped the input extent, get rid
+	 * of them by shifting any following extents, and updating the count.
+	 */
+	if (keep_after > keep_before && keep_after < trim->extent_count) {
+		memmove(&trim->extents[keep_before],
+				&trim->extents[keep_after],
+				(trim->extent_count - keep_after) * sizeof(dk_extent_t));
+	}
+	trim->extent_count -= keep_after - keep_before;
+
+	return 0;
+}
+
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:		journal_trim_remove_extent
+ ;
+ ; Function:		Make note of a range of bytes, some of which may have previously
+ ;				been passed to journal_trim_add_extent, is now in use on the
+ ;				volume.  The given bytes will be not be trimmed as part of
+ ;				this transaction, or a pending trim of a transaction being
+ ;				asynchronously flushed.
+ ;
+ ; Input Arguments:
+ ;	jnl			- The journal for the volume containing the byte range.
+ ;	offset		- The first byte of the range to be trimmed.
+ ;	length		- The number of bytes of the extent being trimmed.
+ ;________________________________________________________________________________
+ */
+int
+journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length)
+{
+	int error = 0;
+	transaction *tr;
+	
+	CHECK_JOURNAL(jnl);
+
+	/* TODO: Is it OK to manipulate the trim list even if JOURNAL_INVALID is set?  I think so... */
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE | DBG_FUNC_START, obfuscate_addr(jnl), offset, length, tr->trim.extent_count, 0);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: trim_remove_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+			  jnl, jnl->owner, current_thread());
+	}
+
+	free_old_stuff(jnl);
+		
+	error = trim_remove_extent(jnl, &tr->trim, offset, length);
+	if (error == 0) {
+		int found = FALSE;
+		
+		/*
+		 * See if a pending trim has any extents that overlap with the
+		 * one we were given.
+		 */
+		lck_rw_lock_shared(&jnl->trim_lock);
+		if (jnl->async_trim != NULL)
+			found = trim_search_extent(jnl->async_trim, offset, length, NULL, NULL);
+		lck_rw_unlock_shared(&jnl->trim_lock);
+		
+		if (found) {
+			/*
+			 * There was an overlap, so avoid trimming the extent we
+			 * just allocated.  (Otherwise, it might get trimmed after
+			 * we've written to it, which will cause that data to be
+			 * corrupted.)
+			 */
+			uint32_t async_extent_count = 0;
+			
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE_PENDING | DBG_FUNC_START, obfuscate_addr(jnl), offset, length, 0, 0);
+			lck_rw_lock_exclusive(&jnl->trim_lock);
+			if (jnl->async_trim != NULL) {
+				error = trim_remove_extent(jnl, jnl->async_trim, offset, length);
+				async_extent_count = jnl->async_trim->extent_count;
+			}
+			lck_rw_unlock_exclusive(&jnl->trim_lock);
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE_PENDING | DBG_FUNC_END, error, 0, 0, async_extent_count, 0);
+		}
+	}
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE | DBG_FUNC_END, error, 0, 0, tr->trim.extent_count, 0);
+	return error;
+}
+
+
+static int
+journal_trim_flush(journal *jnl, transaction *tr)
+{
+	int err = 0;
+	boolean_t was_vm_privileged = FALSE;
+	
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, obfuscate_addr(jnl), tr, 0, tr->trim.extent_count, 0);
+
+	if (vfs_isswapmount(jnl->fsmount)) {
+		/*
+		 * the disk driver can allocate memory on this path...
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+	lck_rw_lock_shared(&jnl->trim_lock);
+	if (tr->trim.extent_count > 0) {
+		dk_unmap_t unmap;
+				
+		bzero(&unmap, sizeof(unmap));
+		if (jnl->flags & JOURNAL_USE_UNMAP) {
+			unmap.extents = tr->trim.extents;
+			unmap.extentsCount = tr->trim.extent_count;
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_START, obfuscate_addr(jnl), tr, 0, tr->trim.extent_count, 0);
+			err = VNOP_IOCTL(jnl->fsdev, DKIOCUNMAP, (caddr_t)&unmap, FWRITE, vfs_context_kernel());
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+		}
+		
+		/*
+		 * Call back into the file system to tell them that we have
+		 * trimmed some extents and that they can now be reused.
+		 *
+		 * CAUTION: If the journal becomes invalid (eg., due to an I/O
+		 * error when trying to write to the journal), this callback
+		 * will stop getting called, even if extents got freed before
+		 * the journal became invalid!
+		 */
+		if (jnl->trim_callback)
+			jnl->trim_callback(jnl->trim_callback_arg, tr->trim.extent_count, tr->trim.extents);
+	}
+	lck_rw_unlock_shared(&jnl->trim_lock);
+
+	if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+		set_vm_privilege(FALSE);
+	/*
+	 * If the transaction we're flushing was the async transaction, then
+	 * tell the current transaction that there is no pending trim
+	 * any more.
+	 *
+	 * NOTE: Since we released the lock, another thread could have
+	 * removed one or more extents from our list.  That's not a
+	 * problem since any writes to the re-allocated blocks
+	 * would get sent to the device after the DKIOCUNMAP.
+	 */
+	lck_rw_lock_exclusive(&jnl->trim_lock);
+	if (jnl->async_trim == &tr->trim)
+		jnl->async_trim = NULL;
+	lck_rw_unlock_exclusive(&jnl->trim_lock);
+
+	/*
+	 * By the time we get here, no other thread can discover the address
+	 * of "tr", so it is safe for us to manipulate tr->trim without
+	 * holding any locks.
+	 */
+	if (tr->trim.extents) {			
+		hfs_free(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
+		tr->trim.allocated_count = 0;
+		tr->trim.extent_count = 0;
+		tr->trim.extents = NULL;
+	}
+	
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_END, err, 0, 0, 0, 0);
+
+	return err;
+}
+
+static int
+journal_binfo_cmp(const void *a, const void *b)
+{
+	const block_info *bi_a = (const struct block_info *)a;
+	const block_info *bi_b = (const struct block_info *)b;
+	daddr64_t res;
+
+	if (bi_a->bnum == (off_t)-1) {
+		return 1;
+	}
+	if (bi_b->bnum == (off_t)-1) {
+		return -1;
+	}
+
+	// don't have to worry about negative block
+	// numbers so this is ok to do.
+	//
+	res = (buf_blkno(bi_a->u.bp) - buf_blkno(bi_b->u.bp));
+
+	return (int)res;
+}
+
+
+/*
+ * End a transaction.  If the transaction is small enough, and we're not forcing
+ * a write to disk, the "active" transaction becomes the "current" transaction,
+ * and will be reused for the next transaction that is started (group commit).
+ *
+ * If the transaction gets written to disk (because force_it is true, or no
+ * group commit, or the transaction is sufficiently full), the blocks get
+ * written into the journal first, then the are written asynchronously.  When
+ * those async writes complete, the transaction can be freed and removed from
+ * the journal.
+ *
+ * An optional callback can be supplied.  If given, it is called after the
+ * the blocks have been written to the journal, but before the async writes
+ * of those blocks to their normal on-disk locations.  This is used by
+ * journal_relocate so that the location of the journal can be changed and
+ * flushed to disk before the blocks get written to their normal locations.
+ * Note that the callback is only called if the transaction gets written to
+ * the journal during this end_transaction call; you probably want to set the
+ * force_it flag.
+ *
+ * Inputs:
+ *	tr			 Transaction to add to the journal
+ *	force_it	 If true, force this transaction to the on-disk journal immediately.
+ *	callback	 See description above.  Pass NULL for no callback.
+ *	callback_arg Argument passed to callback routine.
+ *
+ * Result
+ *		 0		No errors
+ *		-1		An error occurred.  The journal is marked invalid.
+ */
+static int
+end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void *callback_arg, boolean_t drop_lock, boolean_t must_wait)
+{
+	block_list_header  *blhdr=NULL, *next=NULL;
+	int		i, ret_val = 0;
+	errno_t		err;
+	journal		*jnl = tr->jnl;
+	struct buf	*bp;
+	size_t		tbuffer_offset;
+	boolean_t	drop_lock_early;
+
+	if (jnl->cur_tr) {
+		panic("jnl: jnl @ %p already has cur_tr %p, new tr: %p\n",
+			  jnl, jnl->cur_tr, tr);
+	}
+
+	// if there weren't any modified blocks in the transaction
+	// just save off the transaction pointer and return.
+	if (tr->total_bytes == jnl->jhdr->blhdr_size) {
+		jnl->cur_tr = tr;
+		goto done;
+	}
+	
+    // if our transaction buffer isn't very full, just hang
+    // on to it and don't actually flush anything.  this is
+    // what is known as "group commit".  we will flush the
+    // transaction buffer if it's full or if we have more than
+    // one of them so we don't start hogging too much memory.
+    //
+    // We also check the device supports UNMAP/TRIM, and if so,
+    // the number of extents waiting to be trimmed.  If it is
+    // small enough, then keep accumulating more (so we can
+    // reduce the overhead of trimming).  If there was a prior
+    // trim error, then we stop issuing trims for this
+    // volume, so we can also coalesce transactions.
+	//
+    if (   force_it == 0
+		   && (jnl->flags & JOURNAL_NO_GROUP_COMMIT) == 0 
+		   && tr->num_blhdrs < 3
+		   && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8))
+		   && (!(jnl->flags & JOURNAL_USE_UNMAP) || (tr->trim.extent_count < jnl_trim_flush_limit))) {
+
+		jnl->cur_tr = tr;
+		goto done;
+	}
+
+	KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_START, jnl, tr, drop_lock, must_wait, 0);
+
+	lock_condition(jnl, &jnl->flushing, "end_transaction");
+
+	/*
+	 * if the previous 'finish_end_transaction' was being run
+	 * asynchronously, it could have encountered a condition
+	 * that caused it to mark the journal invalid... if that
+	 * occurred while we were waiting for it to finish, we
+	 * need to notice and abort the current transaction
+	 */
+	if ((jnl->flags & JOURNAL_INVALID) || jnl->flush_aborted == TRUE) {
+		unlock_condition(jnl, &jnl->flushing);
+
+		abort_transaction(jnl, tr);
+		ret_val = -1;
+		KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_END, jnl, tr, ret_val, 0, 0);
+		goto done;
+	}
+	
+	/*
+	 * Store a pointer to this transaction's trim list so that
+	 * future transactions can find it.
+	 *
+	 * Note: if there are no extents in the trim list, then don't
+	 * bother saving the pointer since nothing can add new extents
+	 * to the list (and other threads/transactions only care if
+	 * there is a trim pending).
+	 */
+	lck_rw_lock_exclusive(&jnl->trim_lock);
+	if (jnl->async_trim != NULL)
+		panic("jnl: end_transaction: async_trim already non-NULL!");
+	if (tr->trim.extent_count > 0)
+		jnl->async_trim = &tr->trim;
+	lck_rw_unlock_exclusive(&jnl->trim_lock);
+
+	/*
+	 * snapshot the transaction sequence number while we are still behind
+	 * the journal lock since it will be bumped upon the start of the
+	 * next transaction group which may overlap the current journal flush...
+	 * we pass the snapshot into write_journal_header during the journal
+	 * flush so that it can write the correct version in the header...
+	 * because we hold the 'flushing' condition variable for the duration
+	 * of the journal flush, 'saved_sequence_num' remains stable
+	 */
+	jnl->saved_sequence_num = jnl->sequence_num;
+	
+	/*
+	 * if we're here we're going to flush the transaction buffer to disk.
+	 * 'check_free_space' will not return untl there is enough free
+	 * space for this transaction in the journal and jnl->old_start[0]
+	 * is avaiable for use
+	 */
+	KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_START, jnl, 0, 0, 0, 0);
+
+	check_free_space(jnl, tr->total_bytes, &tr->delayed_header_write, jnl->saved_sequence_num);
+
+	KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_END, jnl, tr->delayed_header_write, 0, 0, 0);
+
+	// range check the end index
+	if (jnl->jhdr->end <= 0 || jnl->jhdr->end > jnl->jhdr->size) {
+		panic("jnl: end_transaction: end is bogus 0x%llx (sz 0x%llx)\n",
+			  jnl->jhdr->end, jnl->jhdr->size);
+	}
+	if (tr->delayed_header_write == TRUE) {
+		thread_t	thread = THREAD_NULL;
+
+		lock_condition(jnl, &jnl->writing_header, "end_transaction");
+		/*
+		 * fire up a thread to write the journal header
+		 * asynchronously... when it finishes, it will call
+		 * unlock_condition... we can overlap the preparation of
+		 * the log and buffers during this time
+		 */
+		kernel_thread_start((thread_continue_t)write_header_thread, jnl, &thread);
+	} else
+		jnl->write_header_failed = FALSE;
+
+
+	// this transaction starts where the current journal ends
+	tr->journal_start = jnl->jhdr->end;
+
+	lock_oldstart(jnl);
+	/*
+	 * Because old_start is locked above, we can cast away the volatile qualifier before passing it to memcpy.
+	 * slide everyone else down and put our latest guy in the last
+	 * entry in the old_start array
+	 */
+	memcpy(__CAST_AWAY_QUALIFIER(&jnl->old_start[0], volatile, void *), __CAST_AWAY_QUALIFIER(&jnl->old_start[1], volatile, void *), sizeof(jnl->old_start)-sizeof(jnl->old_start[0]));
+	jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] = tr->journal_start | 0x8000000000000000LL;
+
+	unlock_oldstart(jnl);
+
+
+	for (blhdr = tr->blhdr; blhdr; blhdr = next) {
+		char	*blkptr;
+		buf_t	sbp;
+		int32_t	bsize;
+
+		tbuffer_offset = jnl->jhdr->blhdr_size;
+
+		for (i = 1; i < blhdr->num_blocks; i++) {
+
+			if (blhdr->binfo[i].bnum != (off_t)-1) {
+				void (*func)(buf_t, void *);
+				void  *arg;
+
+				bp = blhdr->binfo[i].u.bp;
+
+				if (bp == NULL) {
+					panic("jnl: inconsistent binfo (NULL bp w/bnum %lld; jnl @ %p, tr %p)\n",
+						blhdr->binfo[i].bnum, jnl, tr);
+				}
+				/*
+				 * acquire the bp here so that we can safely
+				 * mess around with its data.  buf_acquire()
+				 * will return EAGAIN if the buffer was busy,
+				 * so loop trying again.
+				 */
+				do {
+					err = buf_acquire(bp, BAC_REMOVE, 0, 0);
+				} while (err == EAGAIN);
+					
+				if (err)
+					panic("could not acquire bp %p (err %d)\n", bp, err);
+
+				if ((buf_flags(bp) & (B_LOCKED|B_DELWRI)) != (B_LOCKED|B_DELWRI)) {
+					if (jnl->flags & JOURNAL_CLOSE_PENDING) {
+						buf_clearflags(bp, B_LOCKED);
+						buf_brelse(bp);
+						
+						/*
+						 * this is an odd case that appears to happen occasionally
+						 * make sure we mark this block as no longer valid
+						 * so that we don't process it in "finish_end_transaction" since
+						 * the bp that is recorded in our array no longer belongs
+						 * to us (normally we substitute a shadow bp to be processed
+						 * issuing a 'buf_bawrite' on a stale buf_t pointer leads
+						 * to all kinds of problems.
+						 */
+						blhdr->binfo[i].bnum = (off_t)-1;
+						continue;
+					} else {
+						panic("jnl: end_tr: !!!DANGER!!! bp %p flags (0x%x) not LOCKED & DELWRI\n", bp, buf_flags(bp));
+					}
+				}
+				bsize = buf_size(bp);
+
+				buf_setfilter(bp, NULL, NULL, &func, &arg);
+				
+				blkptr = (char *)&((char *)blhdr)[tbuffer_offset];
+
+				sbp = buf_create_shadow_priv(bp, FALSE, (uintptr_t)blkptr, 0, 0);
+
+				if (sbp == NULL)
+					panic("jnl: buf_create_shadow returned NULL");
+
+				/*
+				 * copy the data into the transaction buffer...
+				 */
+				memcpy(blkptr, (char *)buf_dataptr(bp), bsize);
+
+				buf_clearflags(bp, B_LOCKED);
+				buf_markclean(bp);
+				buf_drop(bp);
+
+				/*
+				 * adopt the shadow buffer for this block
+				 */
+				if (func) {
+					/*
+					 * transfer FS hook function to the
+					 * shadow buffer... it will get called
+					 * in finish_end_transaction
+					 */
+					buf_setfilter(sbp, func, arg, NULL, NULL);
+				}
+				blhdr->binfo[i].u.bp = sbp;
+
+			} else {
+				// bnum == -1, only true if a block was "killed" 
+				bsize = blhdr->binfo[i].u.bi.bsize;
+			}
+			tbuffer_offset += bsize;
+		}
+		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
+	}
+	/*
+	 * if callback != NULL, we don't want to drop the journal
+	 * lock, or complete end_transaction asynchronously, since
+	 * the caller is expecting the callback to run in the calling
+	 * context
+	 *
+	 * if drop_lock == FALSE, we can't complete end_transaction
+	 * asynchronously
+	 */
+	if (callback)
+		drop_lock_early = FALSE;
+	else
+		drop_lock_early = drop_lock;
+
+	if (drop_lock_early == FALSE)
+		must_wait = TRUE;
+
+	if (drop_lock_early == TRUE) {
+		journal_unlock(jnl);
+		drop_lock = FALSE;
+	}
+	if (must_wait == TRUE)
+		ret_val = finish_end_transaction(tr, callback, callback_arg);
+	else {
+		thread_t	thread = THREAD_NULL;
+
+		/*
+		 * fire up a thread to complete processing this transaction
+		 * asynchronously... when it finishes, it will call
+		 * unlock_condition
+		 */
+		kernel_thread_start((thread_continue_t)finish_end_thread, tr, &thread);
+	}
+	KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_END, jnl, tr, ret_val, 0, 0);
+done:
+	if (drop_lock == TRUE) {
+		journal_unlock(jnl);
+	}
+	return (ret_val);
+}
+
+
+static void
+finish_end_thread(transaction *tr)
+{
+	throttle_set_thread_io_policy(IOPOL_PASSIVE);
+
+	finish_end_transaction(tr, NULL, NULL);
+
+	thread_deallocate(current_thread());
+	thread_terminate(current_thread());
+}
+
+static void
+write_header_thread(journal *jnl)
+{
+	throttle_set_thread_io_policy(IOPOL_PASSIVE);
+
+	if (write_journal_header(jnl, 1, jnl->saved_sequence_num))
+		jnl->write_header_failed = TRUE;
+	else
+		jnl->write_header_failed = FALSE;
+	unlock_condition(jnl, &jnl->writing_header);
+
+	thread_deallocate(current_thread());
+	thread_terminate(current_thread());
+}
+
+static int
+finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callback_arg)
+{
+	int		i, amt;
+	int		ret = 0;
+	off_t		end;
+	journal		*jnl = tr->jnl;
+	buf_t		bp, *bparray;
+	vnode_t		vp;
+	block_list_header  *blhdr=NULL, *next=NULL;
+	size_t		tbuffer_offset;
+	int		bufs_written = 0;
+	int		ret_val = 0;
+	boolean_t	was_vm_privileged = FALSE;
+
+	KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_START, jnl, tr, 0, 0, 0);
+
+	if (vfs_isswapmount(jnl->fsmount)) {
+		/*
+		 * if we block waiting for memory, and there is enough pressure to
+		 * cause us to try and create a new swap file, we may end up deadlocking
+		 * due to waiting for the journal on the swap file creation path...
+		 * by making ourselves vm_privileged, we give ourselves the best chance
+		 * of not blocking
+		 */
+		was_vm_privileged = set_vm_privilege(TRUE);
+	}
+	end  = jnl->jhdr->end;
+
+	for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
+
+		amt = blhdr->bytes_used;
+
+		blhdr->binfo[0].u.bi.b.sequence_num = tr->sequence_num;
+
+		blhdr->checksum = 0;
+		blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
+
+		bparray = hfs_malloc(blhdr->num_blocks * sizeof(buf_t));
+		tbuffer_offset = jnl->jhdr->blhdr_size;
+
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			void (*func)(buf_t, void *);
+			void	*arg;
+			int32_t	bsize;
+		    
+			/*
+			 * finish preparing the shadow buf_t before 
+			 * calculating the individual block checksums
+			 */
+			if (blhdr->binfo[i].bnum != (off_t)-1) {
+				daddr64_t blkno;
+				daddr64_t lblkno;
+
+				bp = blhdr->binfo[i].u.bp;
+				
+				vp = buf_vnode(bp);
+				blkno = buf_blkno(bp);
+				lblkno = buf_lblkno(bp);
+
+				if (vp == NULL && lblkno == blkno) {
+					printf("jnl: %s: end_tr: bad news! buffer w/null vp and l/blkno = %qd/%qd.  aborting the transaction.\n",
+					       jnl->jdev_name, lblkno, blkno);
+					ret_val = -1;
+					goto bad_journal;
+				}
+	    
+				// if the lblkno is the same as blkno and this bp isn't
+				// associated with the underlying file system device then
+				// we need to call bmap() to get the actual physical block.
+				//
+				if ((lblkno == blkno) && (vp != jnl->fsdev)) {
+					off_t	f_offset;
+					size_t 	contig_bytes;
+
+					if (hfs_vnop_blktooff(&(struct vnop_blktooff_args){
+												.a_vp     = vp,
+												.a_lblkno = lblkno,
+												.a_offset = &f_offset
+											})) {
+						printf("jnl: %s: end_tr: vnop_blktooff failed\n", jnl->jdev_name);
+						ret_val = -1;
+						goto bad_journal;
+					}
+
+					if (hfs_vnop_blockmap(&(struct vnop_blockmap_args) {
+												.a_vp      = vp,
+												.a_foffset = f_offset,
+												.a_size    = buf_count(bp),
+												.a_bpn     = &blkno,
+												.a_run     = &contig_bytes
+											})) {
+						printf("jnl: %s: end_tr: can't blockmap the buffer\n", jnl->jdev_name);
+						ret_val = -1;
+						goto bad_journal;
+					}
+
+					if ((uint32_t)contig_bytes < buf_count(bp)) {
+						printf("jnl: %s: end_tr: blk not physically contiguous on disk\n", jnl->jdev_name);
+						ret_val = -1;
+						goto bad_journal;
+					}
+					buf_setblkno(bp, blkno);
+				}
+				// update this so we write out the correct physical block number!
+				blhdr->binfo[i].bnum = (off_t)(blkno);
+
+				/*
+				 * pick up the FS hook function (if any) and prepare
+				 * to fire this buffer off in the next pass
+				 */
+				buf_setfilter(bp, buffer_flushed_callback, tr, &func, &arg);
+
+				if (func) {
+					/*
+					 * call the hook function supplied by the filesystem...
+					 * this needs to happen BEFORE cacl_checksum in case
+					 * the FS morphs the data in the buffer
+					 */
+					func(bp, arg);
+				}
+				bparray[i] = bp;
+				bsize = buf_size(bp);
+				blhdr->binfo[i].u.bi.bsize = bsize;
+				blhdr->binfo[i].u.bi.b.cksum = calc_checksum(&((char *)blhdr)[tbuffer_offset], bsize);
+			} else {
+				bparray[i] = NULL;
+				bsize = blhdr->binfo[i].u.bi.bsize;
+				blhdr->binfo[i].u.bi.b.cksum = 0;
+			}
+			tbuffer_offset += bsize;
+		}
+		/*
+		 * if we fired off the journal_write_header asynchronously in
+		 * 'end_transaction', we need to wait for its completion
+		 * before writing the actual journal data
+		 */
+		wait_condition(jnl, &jnl->writing_header, "finish_end_transaction");
+
+		if (jnl->write_header_failed == FALSE)
+			ret = write_journal_data(jnl, &end, blhdr, amt);
+		else 
+			ret_val = -1;
+		/*
+		 * put the bp pointers back so that we can 
+		 * make the final pass on them
+		 */
+		for (i = 1; i < blhdr->num_blocks; i++)
+			blhdr->binfo[i].u.bp = bparray[i];
+
+		hfs_free(bparray, blhdr->num_blocks * sizeof(buf_t));
+
+		if (ret_val == -1)
+			goto bad_journal;
+
+		if (ret != amt) {
+			printf("jnl: %s: end_transaction: only wrote %d of %d bytes to the journal!\n",
+			       jnl->jdev_name, ret, amt);
+
+			ret_val = -1;
+			goto bad_journal;
+		}
+	}
+	jnl->jhdr->end  = end;    // update where the journal now ends
+	tr->journal_end = end;    // the transaction ends here too
+
+	if (tr->journal_start == 0 || tr->journal_end == 0) {
+		panic("jnl: end_transaction: bad tr journal start/end: 0x%llx 0x%llx\n",
+		      tr->journal_start, tr->journal_end);
+	}
+
+	if (write_journal_header(jnl, 0, jnl->saved_sequence_num) != 0) {
+		ret_val = -1;
+		goto bad_journal;
+	}
+	/*
+	 * If the caller supplied a callback, call it now that the blocks have been
+	 * written to the journal.  This is used by journal_relocate so, for example,
+	 * the file system can change its pointer to the new journal.
+	 */
+	if (callback != NULL && callback(callback_arg) != 0) {
+		ret_val = -1;
+		goto bad_journal;
+	}
+	
+	//
+	// Send a DKIOCUNMAP for the extents trimmed by this transaction, and
+	// free up the extent list.
+	//
+	journal_trim_flush(jnl, tr);
+	
+	// the buffer_flushed_callback will only be called for the 
+	// real blocks that get flushed so we have to account for 
+	// the block_list_headers here.
+	//
+	tr->num_flushed = tr->num_blhdrs * jnl->jhdr->blhdr_size;
+
+	lock_condition(jnl, &jnl->asyncIO, "finish_end_transaction");
+
+	//
+	// setup for looping through all the blhdr's.
+	//
+	for (blhdr = tr->blhdr; blhdr; blhdr = next) {
+		uint16_t	num_blocks;
+
+		/*
+		 * grab this info ahead of issuing the buf_bawrites...
+		 * once the last one goes out, its possible for blhdr
+		 * to be freed (especially if we get preempted) before
+		 * we do the last check of num_blocks or
+		 * grab the next blhdr pointer...
+		 */
+		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
+		num_blocks = blhdr->num_blocks;
+
+		/*
+		 * we can re-order the buf ptrs because everything is written out already
+		 */
+		kx_qsort(&blhdr->binfo[1], num_blocks-1, sizeof(block_info), journal_binfo_cmp);
+
+		/*
+		 * need to make sure that the loop issuing the buf_bawrite's
+		 * does not touch blhdr once the last buf_bawrite has been
+		 * issued... at that point, we no longer have a legitmate
+		 * reference on the associated storage since it will be
+		 * released upon the completion of that last buf_bawrite
+		 */
+		for (i = num_blocks-1; i >= 1; i--) {
+			if (blhdr->binfo[i].bnum != (off_t)-1)
+				break;
+			num_blocks--;
+		}
+		for (i = 1; i < num_blocks; i++) {
+
+			if ((bp = blhdr->binfo[i].u.bp)) {
+				vp = buf_vnode(bp);
+		    
+				buf_bawrite(bp);
+				
+				// this undoes the vnode_ref() in journal_modify_block_end()
+				vnode_rele_ext(vp, 0, 1);
+
+				bufs_written++;
+			}
+		}
+	}
+	if (bufs_written == 0) {
+		/*
+		 * since we didn't issue any buf_bawrite's, there is no
+		 * async trigger to cause the memory associated with this
+		 * transaction to be freed... so, move it to the garbage
+		 * list now
+		 */
+		lock_oldstart(jnl);
+
+		tr->next       = jnl->tr_freeme;
+		jnl->tr_freeme = tr;
+
+		unlock_oldstart(jnl);
+
+		unlock_condition(jnl, &jnl->asyncIO);
+	}
+
+	//printf("jnl: end_tr: tr @ 0x%x, jnl-blocks: 0x%llx - 0x%llx. exit!\n",
+	//   tr, tr->journal_start, tr->journal_end);
+
+bad_journal:
+	if (ret_val == -1) {
+		abort_transaction(jnl, tr);		// cleans up list of extents to be trimmed
+
+		/*
+		 * 'flush_aborted' is protected by the flushing condition... we need to
+		 * set it before dropping the condition so that it will be
+		 * noticed in 'end_transaction'... we add this additional
+		 * aborted condition so that we can drop the 'flushing' condition
+		 * before grabbing the journal lock... this avoids a deadlock
+		 * in 'end_transaction' which is holding the journal lock while
+		 * waiting for the 'flushing' condition to clear...
+		 * everyone else will notice the JOURNAL_INVALID flag
+		 */
+		jnl->flush_aborted = TRUE;
+
+		unlock_condition(jnl, &jnl->flushing);
+		journal_lock(jnl);
+
+		jnl->flags |= JOURNAL_INVALID;
+		jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL;
+
+		journal_unlock(jnl);
+	} else
+		unlock_condition(jnl, &jnl->flushing);
+
+	if (vfs_isswapmount(jnl->fsmount) && (was_vm_privileged == FALSE))
+		set_vm_privilege(FALSE);
+
+	KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_END, jnl, tr, bufs_written, ret_val, 0);
+
+	return (ret_val);
+}
+
+
+static void
+lock_condition(journal *jnl, boolean_t *condition, const char *condition_name)
+{
+
+	KERNEL_DEBUG(0xbbbbc020|DBG_FUNC_START, jnl, condition, 0, 0, 0);
+
+	lock_flush(jnl);
+
+	while (*condition == TRUE)
+		msleep(condition, &jnl->flock, PRIBIO, condition_name, NULL);
+
+	*condition = TRUE;
+	unlock_flush(jnl);
+
+	KERNEL_DEBUG(0xbbbbc020|DBG_FUNC_END, jnl, condition, 0, 0, 0);
+}
+
+static void
+wait_condition(journal *jnl, boolean_t *condition, const char *condition_name)
+{
+
+	if (*condition == FALSE)
+		return;
+
+	KERNEL_DEBUG(0xbbbbc02c|DBG_FUNC_START, jnl, condition, 0, 0, 0);
+
+	lock_flush(jnl);
+
+	while (*condition == TRUE)
+		msleep(condition, &jnl->flock, PRIBIO, condition_name, NULL);
+
+	unlock_flush(jnl);
+
+	KERNEL_DEBUG(0xbbbbc02c|DBG_FUNC_END, jnl, condition, 0, 0, 0);
+}
+
+static void
+unlock_condition(journal *jnl, boolean_t *condition)
+{
+	lock_flush(jnl);
+
+	*condition = FALSE;
+	wakeup(condition);
+
+	unlock_flush(jnl);
+}
+
+static void
+abort_transaction(journal *jnl, transaction *tr)
+{
+	block_list_header *blhdr, *next;
+
+	// for each block list header, iterate over the blocks then
+	// free up the memory associated with the block list.
+	//
+	// find each of the primary blocks (i.e. the list could
+	// contain a mix of shadowed and real buf_t's depending
+	// on when the abort condition was detected) and mark them
+	// clean and locked in the cache... this at least allows 
+	// the FS a consistent view between it's incore data structures
+	// and the meta-data held in the cache
+	//
+	KERNEL_DEBUG(0xbbbbc034|DBG_FUNC_START, jnl, tr, 0, 0, 0);
+
+	for (blhdr = tr->blhdr; blhdr; blhdr = next) {
+		int	i;
+		
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			buf_t		bp, tbp, sbp;
+			vnode_t		bp_vp;
+			errno_t		err;
+
+			if (blhdr->binfo[i].bnum == (off_t)-1)
+				continue;
+
+			tbp = blhdr->binfo[i].u.bp;
+
+			bp_vp = buf_vnode(tbp);
+
+			if (buf_shadow(tbp)) {
+				sbp = tbp;
+				buf_setfilter(tbp, NULL, NULL, NULL, NULL);
+			} else {
+				hfs_assert(ISSET(buf_flags(tbp), B_LOCKED));
+
+				sbp = NULL;
+
+				do {
+					err = buf_acquire(tbp, BAC_REMOVE, 0, 0);
+				} while (err == EAGAIN);
+
+				if (!err) {
+					buf_setfilter(tbp, NULL, NULL, NULL, NULL);
+					buf_brelse(tbp);
+				}
+			}
+
+			if (bp_vp) {
+				err = buf_meta_bread(bp_vp,
+						       buf_lblkno(tbp),
+						       buf_size(tbp),
+						       NOCRED,
+						       &bp);
+				if (err == 0) {
+					if (sbp == NULL && bp != tbp && (buf_flags(tbp) & B_LOCKED)) {
+						panic("jnl: abort_tr: got back a different bp! (bp %p should be %p, jnl %p\n",
+						      bp, tbp, jnl);
+					}
+					/*
+					 * once the journal has been marked INVALID and aborted,
+					 * NO meta data can be written back to the disk, so 
+					 * mark the buf_t clean and make sure it's locked in the cache
+					 * note: if we found a shadow, the real buf_t needs to be relocked
+					 */
+					buf_setflags(bp, B_LOCKED);
+					buf_markclean(bp);
+					buf_brelse(bp);
+
+					KERNEL_DEBUG(0xbbbbc034|DBG_FUNC_NONE, jnl, tr, bp, 0, 0);
+
+					/*
+					 * this undoes the vnode_ref() in journal_modify_block_end()
+					 */
+					vnode_rele_ext(bp_vp, 0, 1);
+				} else {
+					printf("jnl: %s: abort_tr: could not find block %lld for vnode!\n",
+					       jnl->jdev_name, blhdr->binfo[i].bnum);
+					if (bp) {
+						buf_brelse(bp);
+					}
+				}
+			}
+			if (sbp)
+				buf_brelse(sbp);
+		}
+		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
+
+		// we can free blhdr here since we won't need it any more
+		blhdr->binfo[0].bnum = 0xdeadc0de;
+		hfs_free(blhdr, tr->tbuffer_size);
+	}
+
+	/*
+	 * If the transaction we're aborting was the async transaction, then
+	 * tell the current transaction that there is no pending trim
+	 * any more.
+	 */
+	lck_rw_lock_exclusive(&jnl->trim_lock);
+	if (jnl->async_trim == &tr->trim)
+		jnl->async_trim = NULL;
+	lck_rw_unlock_exclusive(&jnl->trim_lock);
+	
+	
+	if (tr->trim.extents) {
+		hfs_free(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
+	}
+	tr->trim.allocated_count = 0;
+	tr->trim.extent_count = 0;
+	tr->trim.extents = NULL;
+	tr->tbuffer     = NULL;
+	tr->blhdr       = NULL;
+	tr->total_bytes = 0xdbadc0de;
+	hfs_free(tr, sizeof(*tr));
+
+	KERNEL_DEBUG(0xbbbbc034|DBG_FUNC_END, jnl, tr, 0, 0, 0);
+}
+
+
+int
+journal_end_transaction(journal *jnl)
+{
+	int ret;
+	transaction *tr;
+    
+	CHECK_JOURNAL(jnl);
+
+	free_old_stuff(jnl);
+
+	if ((jnl->flags & JOURNAL_INVALID) && jnl->owner == NULL) {
+		return 0;
+	}
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: end_tr: I'm not the owner! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
+	}
+	jnl->nested_count--;
+
+	if (jnl->nested_count > 0) {
+		return 0;
+	} else if (jnl->nested_count < 0) {
+		panic("jnl: jnl @ %p has negative nested count (%d). bad boy.\n", jnl, jnl->nested_count);
+	}
+    
+	if (jnl->flags & JOURNAL_INVALID) {
+		if (jnl->active_tr) {
+			if (jnl->cur_tr != NULL) {
+				panic("jnl: journal @ %p has active tr (%p) and cur tr (%p)\n",
+				      jnl, jnl->active_tr, jnl->cur_tr);
+			}
+			tr             = jnl->active_tr;
+			jnl->active_tr = NULL;
+
+			abort_transaction(jnl, tr);
+		}
+		journal_unlock(jnl);
+
+		return EINVAL;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	// clear this out here so that when check_free_space() calls
+	// the FS flush function, we don't panic in journal_flush()
+	// if the FS were to call that.  note: check_free_space() is
+	// called from end_transaction().
+	// 
+	jnl->active_tr = NULL;
+	
+	/* Examine the force-journal-flush state in the active txn */
+	if (tr->flush_on_completion == TRUE) {
+		/*
+		 * If the FS requested it, disallow group commit and force the
+		 * transaction out to disk immediately.
+		 */
+		ret = end_transaction(tr, 1, NULL, NULL, TRUE, TRUE);
+	}
+	else {
+		/* in the common path we can simply use the double-buffered journal */
+		ret = end_transaction(tr, 0, NULL, NULL, TRUE, FALSE);
+	}
+
+	return ret;
+}
+
+
+/* 
+ * Flush the contents of the journal to the disk. 
+ *
+ *  Input: 
+ *  	wait_for_IO - 
+ *  	If TRUE, wait to write in-memory journal to the disk 
+ *  	consistently, and also wait to write all asynchronous 
+ *  	metadata blocks to its corresponding locations
+ *  	consistently on the disk.  This means that the journal 
+ *  	is empty at this point and does not contain any 
+ *  	transactions.  This is overkill in normal scenarios  
+ *  	but is useful whenever the metadata blocks are required 
+ *  	to be consistent on-disk instead of just the journal 
+ *  	being consistent; like before live verification 
+ *  	and live volume resizing.  
+ *
+ *  	If FALSE, only wait to write in-memory journal to the 
+ *  	disk consistently.  This means that the journal still 
+ *  	contains uncommitted transactions and the file system 
+ *  	metadata blocks in the journal transactions might be 
+ *  	written asynchronously to the disk.  But there is no 
+ *  	guarantee that they are written to the disk before 
+ *  	returning to the caller.  Note that this option is 
+ *  	sufficient for file system data integrity as it 
+ *  	guarantees consistent journal content on the disk.
+ */
+int
+journal_flush(journal *jnl, journal_flush_options_t options)
+{
+	boolean_t drop_lock = FALSE;
+	errno_t error = 0;
+	uint32_t flush_count = 0;
+    
+	CHECK_JOURNAL(jnl);
+    
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
+		return -1;
+	}
+
+	KDBG(DBG_JOURNAL_FLUSH | DBG_FUNC_START, jnl);
+
+	if (jnl->owner != current_thread()) {
+		journal_lock(jnl);
+		drop_lock = TRUE;
+	}
+
+	if (ISSET(options, JOURNAL_FLUSH_FULL))
+		flush_count = jnl->flush_counter;
+
+	// if we're not active, flush any buffered transactions
+	if (jnl->active_tr == NULL && jnl->cur_tr) {
+		transaction *tr = jnl->cur_tr;
+
+		jnl->cur_tr = NULL;
+
+		if (ISSET(options, JOURNAL_WAIT_FOR_IO)) {
+			wait_condition(jnl, &jnl->flushing, "journal_flush");
+			wait_condition(jnl, &jnl->asyncIO, "journal_flush");
+		}
+		/*
+		 * "end_transction" will wait for any current async flush
+		 * to complete, before flushing "cur_tr"... because we've
+		 * specified the 'must_wait' arg as TRUE, it will then
+		 * synchronously flush the "cur_tr"
+		 */
+		end_transaction(tr, 1, NULL, NULL, drop_lock, TRUE);   // force it to get flushed
+
+	} else  { 
+		if (drop_lock == TRUE) {
+			journal_unlock(jnl);
+		}
+
+		/* Because of pipelined journal, the journal transactions 
+		 * might be in process of being flushed on another thread.  
+		 * If there is nothing to flush currently, we should 
+		 * synchronize ourselves with the pipelined journal thread 
+		 * to ensure that all inflight transactions, if any, are 
+		 * flushed before we return success to caller.
+		 */
+		wait_condition(jnl, &jnl->flushing, "journal_flush");
+	}
+	if (ISSET(options, JOURNAL_WAIT_FOR_IO)) {
+		wait_condition(jnl, &jnl->asyncIO, "journal_flush");
+	}
+
+	if (ISSET(options, JOURNAL_FLUSH_FULL)) {
+
+		dk_synchronize_t sync_request = {
+			.options                        = 0,
+		};
+
+		// We need a full cache flush. If it has not been done, do it here.
+		if (flush_count == jnl->flush_counter)
+			error = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel());
+
+		// If external journal partition is enabled, flush filesystem data partition.
+		if (jnl->jdev != jnl->fsdev)
+			error = VNOP_IOCTL(jnl->fsdev, DKIOCSYNCHRONIZE, (caddr_t)&sync_request, FWRITE, vfs_context_kernel());
+
+	}
+
+	KDBG(DBG_JOURNAL_FLUSH | DBG_FUNC_END, jnl);
+
+	return 0;
+}
+
+int
+journal_active(journal *jnl)
+{
+	if (jnl->flags & JOURNAL_INVALID) {
+		return -1;
+	}
+    
+	return (jnl->active_tr == NULL) ? 0 : 1;
+}
+
+void *
+journal_owner(journal *jnl)
+{
+	return jnl->owner;
+}
+
+int journal_uses_fua(journal *jnl)
+{
+	if (jnl->flags & JOURNAL_DO_FUA_WRITES)
+		return 1;
+	return 0;
+}
+
+/*
+ * Relocate the journal.
+ * 
+ * You provide the new starting offset and size for the journal. You may
+ * optionally provide a new tbuffer_size; passing zero defaults to not
+ * changing the tbuffer size except as needed to fit within the new journal
+ * size.
+ * 
+ * You must have already started a transaction. The transaction may contain
+ * modified blocks (such as those needed to deallocate the old journal,
+ * allocate the new journal, and update the location and size of the journal
+ * in filesystem-private structures). Any transactions prior to the active
+ * transaction will be flushed to the old journal. The new journal will be
+ * initialized, and the blocks from the active transaction will be written to
+ * the new journal.
+ *
+ * The caller will need to update the structures that identify the location
+ * and size of the journal.  These updates should be made in the supplied
+ * callback routine.  These updates must NOT go into a transaction.  You should
+ * force these updates to the media before returning from the callback.  In the
+ * even of a crash, either the old journal will be found, with an empty journal,
+ * or the new journal will be found with the contents of the active transaction.
+ *
+ * Upon return from the callback, the blocks from the active transaction are
+ * written to their normal locations on disk.
+ *
+ * (Remember that we have to ensure that blocks get committed to the journal
+ * before being committed to their normal locations.  But the blocks don't count
+ * as committed until the new journal is pointed at.)
+ *
+ * Upon return, there is still an active transaction: newly allocated, and
+ * with no modified blocks.  Call journal_end_transaction as normal.  You may
+ * modifiy additional blocks before calling journal_end_transaction, and those
+ * blocks will (eventually) go to the relocated journal.
+ *
+ * Inputs:
+ *	jnl				The (opened) journal to relocate.
+ *	offset			The new journal byte offset (from start of the journal device).
+ *	journal_size	The size, in bytes, of the new journal.
+ *	tbuffer_size	The new desired transaction buffer size.  Pass zero to keep
+ *					the same size as the current journal.  The size will be
+ *					modified as needed to fit the new journal.
+ *	callback		Routine called after the new journal has been initialized,
+ *					and the active transaction written to the new journal, but
+ *					before the blocks are written to their normal locations.
+ *					Pass NULL for no callback.
+ *	callback_arg	An argument passed to the callback routine.
+ *
+ * Result:
+ *	0				No errors
+ *	EINVAL			The offset is not block aligned
+ *	EINVAL			The journal_size is not a multiple of the block size
+ *	EINVAL			The journal is invalid
+ *	(any)			An error returned by journal_flush.
+ *
+ */
+int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbuffer_size,
+	errno_t (*callback)(void *), void *callback_arg)
+{
+	int		ret;
+	transaction	*tr;
+	size_t i = 0;
+
+	/*
+	 * Sanity check inputs, and adjust the size of the transaction buffer.
+	 */
+	if (jnl->jhdr->jhdr_size == 0) {
+		printf("jnl: %s: relocate: bad jhdr size (%d)\n", jnl->jdev_name, jnl->jhdr->jhdr_size);
+		return EINVAL;  
+	}
+
+	if ((offset % jnl->jhdr->jhdr_size) != 0) {
+		printf("jnl: %s: relocate: offset 0x%llx is not an even multiple of block size 0x%x\n",
+		       jnl->jdev_name, offset, jnl->jhdr->jhdr_size);
+		return EINVAL;
+	}
+	if ((journal_size % jnl->jhdr->jhdr_size) != 0) {
+		printf("jnl: %s: relocate: journal size 0x%llx is not an even multiple of block size 0x%x\n",
+		       jnl->jdev_name, journal_size, jnl->jhdr->jhdr_size);
+		return EINVAL;
+	}
+
+	CHECK_JOURNAL(jnl);
+
+	/* Guarantee we own the active transaction. */
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+	if (jnl->owner != current_thread()) {
+		panic("jnl: relocate: Not the owner! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
+	}
+	
+	if (tbuffer_size == 0)
+		tbuffer_size = jnl->tbuffer_size;
+	size_up_tbuffer(jnl, tbuffer_size, jnl->jhdr->jhdr_size);
+	
+	/*
+	 * Flush any non-active transactions.  We have to temporarily hide the
+	 * active transaction to make journal_flush flush out non-active but
+	 * current (unwritten) transactions.
+	 */
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+	jnl->active_tr = NULL;
+	ret = journal_flush(jnl, JOURNAL_WAIT_FOR_IO);
+	jnl->active_tr = tr;
+
+	if (ret) {
+		return ret;
+	}
+	wait_condition(jnl, &jnl->flushing, "end_transaction");
+
+	/*
+	 * At this point, we have completely flushed the contents of the current
+	 * journal to disk (and have asynchronously written all of the txns to 
+	 * their actual desired locations).  As a result, we can (and must) clear 
+	 * out the old_start array.  If we do not, then if the last written transaction
+	 * started at the beginning of the journal (starting 1 block into the 
+	 * journal file) it could confuse the buffer_flushed callback. This is
+	 * because we're about to reset the start/end pointers of the journal header
+	 * below. 
+	 */
+	lock_oldstart(jnl); 
+	for (i = 0; i < sizeof (jnl->old_start) / sizeof(jnl->old_start[0]); i++) { 
+		jnl->old_start[i] = 0; 
+	}
+	unlock_oldstart(jnl);
+
+	/* Update the journal's offset and size in memory. */
+	jnl->jdev_offset = offset;
+	jnl->jhdr->start = jnl->jhdr->end = jnl->jhdr->jhdr_size;
+	jnl->jhdr->size = journal_size;
+	jnl->active_start = jnl->jhdr->start;
+	
+	/*
+	 * Force the active transaction to be written to the new journal.  Call the
+	 * supplied callback after the blocks have been written to the journal, but
+	 * before they get written to their normal on-disk locations.
+	 */
+	jnl->active_tr = NULL;
+	ret = end_transaction(tr, 1, callback, callback_arg, FALSE, TRUE);
+	if (ret) {
+		printf("jnl: %s: relocate: end_transaction failed (%d)\n", jnl->jdev_name, ret);
+		goto bad_journal;
+	}
+	
+	/*
+	 * Create a new, empty transaction to be the active transaction.  This way
+	 * our caller can use journal_end_transaction as usual.
+	 */
+	ret = journal_allocate_transaction(jnl);
+	if (ret) {
+		printf("jnl: %s: relocate: could not allocate new transaction (%d)\n", jnl->jdev_name, ret);
+		goto bad_journal;
+	}
+	
+	return 0;
+
+bad_journal:
+	jnl->flags |= JOURNAL_INVALID;
+	abort_transaction(jnl, tr);
+	return ret;
+}
+
+uint32_t journal_current_txn(journal *jnl)
+{
+	return jnl->sequence_num + (jnl->active_tr || jnl->cur_tr ? 0 : 1);
+}
diff --git a/core/hfs_journal.h b/core/hfs_journal.h
new file mode 100644
index 0000000..ff8b851
--- /dev/null
+++ b/core/hfs_journal.h
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * This header contains the structures and function prototypes
+ * for the vfs journaling code.  The data types are not meant
+ * to be modified by user code.  Just use the functions and do
+ * not mess around with the structs.
+ */ 
+#ifndef HFS_JOURNAL_H_
+#define HFS_JOURNAL_H_
+
+#include <sys/appleapiopts.h>
+#include <sys/cdefs.h>
+
+#ifdef __APPLE_API_UNSTABLE
+
+#include <sys/types.h>
+#include <kern/locks.h>
+#include <sys/disk.h>
+
+
+typedef struct _blk_info {
+    int32_t    bsize;
+    union {
+	int32_t    cksum;
+	uint32_t   sequence_num;
+    } b;
+} _blk_info;
+
+typedef struct block_info {
+    off_t       bnum;                // block # on the file system device
+    union {
+	_blk_info   bi;
+	struct buf *bp;
+    } u;
+} __attribute__((__packed__)) block_info;
+
+typedef struct block_list_header {
+    u_int16_t   max_blocks;          // max number of blocks in this chunk
+    u_int16_t   num_blocks;          // number of valid block numbers in block_nums
+    int32_t     bytes_used;          // how many bytes of this tbuffer are used
+    uint32_t     checksum;            // on-disk: checksum of this header and binfo[0]
+    int32_t     flags;               // check-checksums, initial blhdr, etc
+    block_info  binfo[1];            // so we can reference them by name
+} block_list_header;
+
+#define BLHDR_CHECK_CHECKSUMS   0x0001
+#define BLHDR_FIRST_HEADER      0x0002
+
+
+struct journal;
+
+struct jnl_trim_list {
+	uint32_t	allocated_count;
+	uint32_t	extent_count;
+	dk_extent_t *extents;
+};
+
+typedef void (*jnl_trim_callback_t)(void *arg, uint32_t extent_count, const dk_extent_t *extents);
+
+typedef struct transaction {
+    int                 tbuffer_size;  // in bytes
+    char               *tbuffer;       // memory copy of the transaction
+    block_list_header  *blhdr;         // points to the first byte of tbuffer
+    int                 num_blhdrs;    // how many buffers we've allocated
+    int                 total_bytes;   // total # of bytes in transaction
+    int                 num_flushed;   // how many bytes have been flushed
+    int                 num_killed;    // how many bytes were "killed"
+    off_t               journal_start; // where in the journal this transaction starts
+    off_t               journal_end;   // where in the journal this transaction ends
+    struct journal     *jnl;           // ptr back to the journal structure
+    struct transaction *next;          // list of tr's (either completed or to be free'd)
+    uint32_t            sequence_num;
+	struct jnl_trim_list trim;
+    boolean_t		delayed_header_write;
+	boolean_t       flush_on_completion; //flush transaction immediately upon txn end.
+} transaction;
+
+
+/*
+ * This is written to block zero of the journal and it
+ * maintains overall state about the journal.
+ */
+typedef struct journal_header {
+    int32_t        magic;
+    int32_t        endian;
+    volatile off_t start;         // zero-based byte offset of the start of the first transaction
+    volatile off_t end;           // zero-based byte offset of where free space begins
+    off_t          size;          // size in bytes of the entire journal
+    int32_t        blhdr_size;    // size in bytes of each block_list_header in the journal
+    uint32_t        checksum;
+    int32_t        jhdr_size;     // block size (in bytes) of the journal header
+    uint32_t       sequence_num;  // NEW FIELD: a monotonically increasing value assigned to all txn's
+} journal_header;
+
+#define JOURNAL_HEADER_MAGIC  0x4a4e4c78   // 'JNLx'
+#define ENDIAN_MAGIC          0x12345678
+
+//
+// we only checksum the original size of the journal_header to remain
+// backwards compatible.  the size of the original journal_heade is
+// everything up to the the sequence_num field, hence we use the
+// offsetof macro to calculate the size.
+//
+#define JOURNAL_HEADER_CKSUM_SIZE  (offsetof(struct journal_header, sequence_num))
+
+#define OLD_JOURNAL_HEADER_MAGIC  0x4a484452   // 'JHDR'
+
+
+/*
+ * In memory structure about the journal.
+ */
+typedef struct journal {
+    lck_mtx_t           jlock;             // protects the struct journal data
+    lck_mtx_t		flock;             // serializes flushing of journal
+	lck_rw_t            trim_lock;         // protects the async_trim field, below
+
+
+    struct vnode       *jdev;              // vnode of the device where the journal lives
+    off_t               jdev_offset;       // byte offset to the start of the journal
+    const char         *jdev_name;
+
+    struct vnode       *fsdev;             // vnode of the file system device
+    struct mount       *fsmount;           // mount of the file system
+    
+    void              (*flush)(void *arg); // fs callback to flush meta data blocks
+    void               *flush_arg;         // arg that's passed to flush()
+
+    int32_t             flags;
+    int32_t             tbuffer_size;      // default transaction buffer size
+    boolean_t		flush_aborted;
+    boolean_t		flushing;
+    boolean_t		asyncIO;
+    boolean_t		writing_header;
+    boolean_t		write_header_failed;
+	
+    struct jnl_trim_list *async_trim;      // extents to be trimmed by transaction being asynchronously flushed
+    jnl_trim_callback_t	trim_callback;
+    void				*trim_callback_arg;
+    
+    char               *header_buf;        // in-memory copy of the journal header
+    int32_t             header_buf_size;
+    journal_header     *jhdr;              // points to the first byte of header_buf
+
+	uint32_t		saved_sequence_num;
+	uint32_t		sequence_num;
+
+    off_t               max_read_size;
+    off_t               max_write_size;
+
+    transaction        *cur_tr;            // for group-commit
+    transaction        *completed_trs;     // out-of-order transactions that completed
+    transaction        *active_tr;         // for nested transactions
+    int32_t             nested_count;      // for nested transactions
+    void               *owner;             // a ptr that's unique to the calling process
+
+    transaction        *tr_freeme;         // transaction structs that need to be free'd
+
+    volatile off_t      active_start;      // the active start that we only keep in memory
+    lck_mtx_t           old_start_lock;    // protects the old_start
+    volatile off_t      old_start[16];     // this is how we do lazy start update
+
+    int                 last_flush_err;    // last error from flushing the cache
+    uint32_t            flush_counter;     // a monotonically increasing value assigned on track cache flush
+} journal;
+
+/* internal-only journal flags (top 16 bits) */
+#define JOURNAL_CLOSE_PENDING     0x00010000
+#define JOURNAL_INVALID           0x00020000
+#define JOURNAL_FLUSHCACHE_ERR    0x00040000   // means we already printed this err
+#define JOURNAL_NEED_SWAP         0x00080000   // swap any data read from disk
+#define JOURNAL_DO_FUA_WRITES     0x00100000   // do force-unit-access writes
+#define JOURNAL_USE_UNMAP         0x00200000   // device supports UNMAP (TRIM)
+#define JOURNAL_FEATURE_BARRIER   0x00400000   // device supports barrier-only flush
+
+
+/* journal_open/create options are always in the low-16 bits */
+#define JOURNAL_OPTION_FLAGS_MASK 0x0000ffff
+
+__BEGIN_DECLS
+/*
+ * Prototypes.
+ */
+
+/*
+ * Call journal_init() to initialize the journaling code (sets up lock attributes)
+ */
+void      journal_init(void);
+
+/*
+ * Call journal_create() to create a new journal.  You only
+ * call this once, typically at file system creation time.
+ *
+ * The "jvp" argument is the vnode where the journal is written.
+ * The journal starts at "offset" and is "journal_size" bytes long.
+ *
+ * The "fsvp" argument is the vnode of your file system.  It may be
+ * the same as "jvp".
+ *
+ * The "min_fs_block_size" argument is the minimum block size
+ * (in bytes) that the file system will ever write.  Typically
+ * this is the block size of the file system (1k, 4k, etc) but
+ * on HFS+ it is the minimum block size of the underlying device.
+ *
+ * The flags argument lets you disable group commit if you
+ * want tighter guarantees on transactions (in exchange for
+ * lower performance).
+ *
+ * The tbuffer_size is the size of the transaction buffer
+ * used by the journal. If you specify zero, the journal code
+ * will use a reasonable defaults.  The tbuffer_size should 
+ * be an integer multiple of the min_fs_block_size.
+ *
+ * Returns a valid journal pointer or NULL if one could not
+ * be created.
+ */
+journal *journal_create(struct vnode *jvp,
+						off_t         offset,
+						off_t         journal_size,
+						struct vnode *fsvp,
+						size_t        min_fs_block_size,
+						int32_t       flags,
+						int32_t       tbuffer_size,
+						void        (*flush)(void *arg),
+						void         *arg,
+						struct mount *fsmount);
+
+/*
+ * Call journal_open() when mounting an existing file system
+ * that has a previously created journal.  It will take care
+ * of validating the journal and replaying it if necessary.
+ *
+ * See journal_create() for a description of the arguments.
+ *
+ * Returns a valid journal pointer of NULL if it runs into
+ * trouble reading/playing back the journal.
+ */
+journal  *journal_open(struct vnode *jvp,
+					   off_t         offset,
+					   off_t         journal_size,
+					   struct vnode *fsvp,
+					   size_t        min_fs_block_size,
+					   int32_t       flags,
+					   int32_t       tbuffer_size,
+					   void        (*flush)(void *arg),
+					   void         *arg,
+					   struct mount *fsmount);
+
+/*
+ * Test whether the journal is clean or not.  This is intended
+ * to be used when you're mounting read-only.  If the journal
+ * is not clean for some reason then you should not mount the
+ * volume as your data structures may be in an unknown state.
+ */
+int journal_is_clean(struct vnode *jvp,
+		     off_t         offset,
+		     off_t         journal_size,
+		     struct vnode *fsvp,
+                     size_t        min_fs_block_size);
+
+
+/*
+ * Call journal_close() just before your file system is unmounted.
+ * It flushes any outstanding transactions and makes sure the
+ * journal is in a consistent state.
+ */
+void      journal_close(journal *journalp);
+
+/*
+ * flags for journal_create/open.  only can use 
+ * the low 16 bits for flags because internal 
+ * bits go in the high 16.
+ */
+#define JOURNAL_NO_GROUP_COMMIT   0x00000001
+#define JOURNAL_RESET             0x00000002
+
+/*
+ * Transaction related functions.
+ *
+ * Before you start modifying file system meta data, you
+ * should call journal_start_transaction().  Then before
+ * you modify each block, call journal_modify_block_start()
+ * and when you're done, journal_modify_block_end().  When
+ * you've modified the last block as part of a transaction,
+ * call journal_end_transaction() to commit the changes.
+ *
+ * If you decide to abort the modifications to a block you
+ * should call journal_modify_block_abort().
+ *
+ * If as part of a transaction you need want to throw out
+ * any previous copies of a block (because it got deleted)
+ * then call journal_kill_block().  This will mark it so
+ * that the journal does not play it back (effectively
+ * dropping it).
+ *
+ * journal_trim_add_extent() marks a range of bytes on the device which should
+ * be trimmed (invalidated, unmapped).  journal_trim_remove_extent() marks a
+ * range of bytes which should no longer be trimmed.  Accumulated extents
+ * will be trimmed when the transaction is flushed to the on-disk journal.
+ */
+int   journal_start_transaction(journal *jnl);
+int   journal_modify_block_start(journal *jnl, struct buf *bp);
+int   journal_modify_block_abort(journal *jnl, struct buf *bp);
+int   journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *bp, void *arg), void *arg);
+int   journal_kill_block(journal *jnl, struct buf *bp);
+int   journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length);
+int   journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length);
+void  journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg);
+int   journal_trim_extent_overlap (journal *jnl, uint64_t offset, uint64_t length, uint64_t *end);
+/* Mark state in the journal that requests an immediate journal flush upon txn completion */
+int   journal_request_immediate_flush (journal *jnl);
+int   journal_end_transaction(journal *jnl);
+
+int   journal_active(journal *jnl);
+
+typedef enum journal_flush_options {
+	JOURNAL_WAIT_FOR_IO       = 0x01,   // Flush journal and metadata blocks, wait for async IO to complete.
+	JOURNAL_FLUSH_FULL        = 0x02,   // Flush track cache to media
+} journal_flush_options_t;
+
+int   journal_flush(journal *jnl, journal_flush_options_t options);
+void *journal_owner(journal *jnl);    // compare against current_thread()
+int   journal_uses_fua(journal *jnl);
+void  journal_lock(journal *jnl);
+void  journal_unlock(journal *jnl);
+
+
+/*
+ * Relocate the journal.
+ * 
+ * You provide the new starting offset and size for the journal. You may
+ * optionally provide a new tbuffer_size; passing zero defaults to not
+ * changing the tbuffer size except as needed to fit within the new journal
+ * size.
+ * 
+ * You must have already started a transaction. The transaction may contain
+ * modified blocks (such as those needed to deallocate the old journal,
+ * allocate the new journal, and update the location and size of the journal
+ * in filesystem-private structures). Any transactions prior to the active
+ * transaction will be flushed to the old journal. The new journal will be
+ * initialized, and the blocks from the active transaction will be written to
+ * the new journal. The caller will need to update the structures that
+ * identify the location and size of the journal from the callback routine.
+ */
+int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbuffer_size,
+	errno_t (*callback)(void *), void *callback_arg);
+
+uint32_t journal_current_txn(journal *jnl);
+
+__END_DECLS
+
+#endif /* __APPLE_API_UNSTABLE */
+#endif /* !HFS_JOURNAL_H_ */
diff --git a/core/hfs_kdebug.h b/core/hfs_kdebug.h
new file mode 100644
index 0000000..827fc4f
--- /dev/null
+++ b/core/hfs_kdebug.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef HFS_KDEBUG_H_
+#define HFS_KDEBUG_H_
+
+#include <sys/kdebug.h>
+
+/*
+ * KERNEL_DEBUG related definitions for HFS.
+ *
+ * NOTE: The Class DBG_FSYSTEM = 3, and Subclass DBG_HFS = 8, so these
+ * debug codes are of the form 0x0308nnnn.
+ */
+#define HFSDBG_CODE(code)	FSDBG_CODE(DBG_HFS, code)
+
+enum {
+	HFSDBG_WRITE        		= FSDBG_CODE(DBG_FSRW, 0),    /* 0x3010000 */
+	HFSDBG_TRUNCATE     		= FSDBG_CODE(DBG_FSRW, 7),    /* 0x301001C */
+	HFSDBG_READ         		= FSDBG_CODE(DBG_FSRW, 12),   /* 0x3010030 */
+	HFSDBG_GETNEWVNODE  		= FSDBG_CODE(DBG_FSRW, 37),   /* 0x3010094 */
+	HFSDBG_UPDATE       		= FSDBG_CODE(DBG_FSRW, 8192), /* 0x3018000 */
+	HFSDBG_UNMAP_FREE    		= HFSDBG_CODE(0),	/* 0x03080000 */
+	HFSDBG_UNMAP_ALLOC    		= HFSDBG_CODE(1),	/* 0x03080004 */
+	HFSDBG_UNMAP_CALLBACK		= HFSDBG_CODE(2),	/* 0x03080008 */
+	/* 0x0308000C is unused */
+	HFSDBG_BLOCK_ALLOCATE		= HFSDBG_CODE(4),	/* 0x03080010 */
+	HFSDBG_BLOCK_DEALLOCATE		= HFSDBG_CODE(5),	/* 0x03080014 */
+	HFSDBG_READ_BITMAP_BLOCK	= HFSDBG_CODE(6),	/* 0x03080018 */
+	HFSDBG_RELEASE_BITMAP_BLOCK	= HFSDBG_CODE(7),	/* 0x0308001C */
+	HFSDBG_FIND_CONTIG_BITMAP	= HFSDBG_CODE(8),	/* 0x03080020 */
+	HFSDBG_ALLOC_ANY_BITMAP		= HFSDBG_CODE(9),	/* 0x03080024 */
+	HFSDBG_ALLOC_FIND_KNOWN		= HFSDBG_CODE(10),	/* 0x03080028 */
+	HFSDBG_MARK_ALLOC_BITMAP	= HFSDBG_CODE(11),	/* 0x0308002C */
+	HFSDBG_MARK_FREE_BITMAP		= HFSDBG_CODE(12),	/* 0x03080030 */
+	HFSDBG_BLOCK_FIND_CONTIG	= HFSDBG_CODE(13),	/* 0x03080034 */
+	HFSDBG_IS_ALLOCATED   		= HFSDBG_CODE(14),	/* 0x03080038 */
+	/* 0x0308003C is unused */
+	HFSDBG_RESET_EXTENT_CACHE	= HFSDBG_CODE(16),	/* 0x03080040 */
+	HFSDBG_REMOVE_EXTENT_CACHE	= HFSDBG_CODE(17),	/* 0x03080044 */
+	HFSDBG_ADD_EXTENT_CACHE		= HFSDBG_CODE(18),	/* 0x03080048 */
+	HFSDBG_READ_BITMAP_RANGE	= HFSDBG_CODE(19),	/* 0x0308004C */
+	HFSDBG_RELEASE_SCAN_BITMAP	= HFSDBG_CODE(20),	/* 0x03080050 */
+	HFSDBG_SYNCER        		= HFSDBG_CODE(21),	/* 0x03080054 */
+	HFSDBG_SYNCER_TIMED   		= HFSDBG_CODE(22),	/* 0x03080058 */
+	HFSDBG_UNMAP_SCAN    		= HFSDBG_CODE(23),	/* 0x0308005C */	
+	HFSDBG_UNMAP_SCAN_TRIM   	= HFSDBG_CODE(24),	/* 0x03080060 */
+};
+
+/*
+    Parameters logged by the above tracepoints: 
+---------------------------------------------------------------------------------------------------------------------------------
+    CODE    EVENT NAME                  DBG_FUNC_START arg1, arg2, arg3, arg4, arg5 ... DBG_FUNC_END arg1, arg2, arg3, arg4, arg5
+                                        DBG_FUNC_NONE  arg1, arg2, arg3, arg4, arg5
+---------------------------------------------------------------------------------------------------------------------------------
+0x3010000   HFSDBG_WRITE                offset, uio_resid, ff_size, filebytes, 0 ... uio_offset, uio_resid, ff_size, filebytes, 0
+                                        offset, uio_resid, ff_size, filebytes, 0
+0x301001C   HFSDBG_TRUNCATE             length, ff_size, filebytes, 0, 0 ... length, ff_size, filebytes, retval, 0
+                                        length, ff_size, filebytes, 0, 0
+0x3010030   HFSDBG_READ                 uio_offset, uio_resid, filesize, filebytes, 0 ... uio_offset, uio_resid, filesize, filebytes, 0 
+0x3010094   HFSDBG_GETNEWVNODE          c_vp, c_rsrc_vp, 0, 0, 0 
+0x3018000   HFSDBG_UPDATE               vp, tstate, 0, 0, 0 ... vp, tstate, error, 0/-1, 0
+    0       HFSDBG_UNMAP_FREE           startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0
+    1       HFSDBG_UNMAP_ALLOC          startBlock, blockCount, 0, 0, 0 ... err, 0, 0, 0, 0
+    2       HFSDBG_UNMAP_CALLBACK       0, extentCount, 0, 0, 0 ... 0, 0, 0, 0, 0
+    3       unused 
+    4       HFSDBG_BLOCK_ALLOCATE       startBlock, minBlocks, maxBlocks, flags, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    5       HFSDBG_BLOCK_DEALLOCATE     startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0
+    6       HFSDBG_READ_BITMAP_BLOCK    startBlock, 0, 0, 0, 0 ... err, 0, 0, 0, 0
+    7       HFSDBG_RELEASE_BITMAP_BLOCK dirty, 0, 0, 0, 0 ... 0, 0, 0, 0, 0
+    8       HFSDBG_FIND_CONTIG_BITMAP	startBlock, minBlocks, maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    9       HFSDBG_ALLOC_ANY_BITMAP     startBlock, endBlock,  maxBlocks, useMeta, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    10      HFSDBG_ALLOC_FIND_KNOWN		0, 0, maxBlocks, 0, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    11      HFSDBG_MARK_ALLOC_BITMAP    startBlock, blockCount, flags, 0, 0 ... err, 0, 0, 0, 0
+    12      HFSDBG_MARK_FREE_BITMAP     startBlock, blockCount, valid, 0, 0 ... err, 0, 0, 0, 0
+    13      HFSDBG_BLOCK_FIND_CONTIG    startBlock, endBlock, minBlocks, maxBlocks, 0 ... err, actualStartBlock, actualBlockCount, 0, 0
+    14      HFSDBG_IS_ALLOCATED         startBlock, blockCount, stop, 0, 0 ... err, 0, actualBlockCount, 0, 0
+    15      unused
+    16      HFSDBG_RESET_EXTENT_CACHE   0, 0, 0, 0, 0 ... 0, 0, 0, 0, 0
+    17      HFSDBG_REMOVE_EXTENT_CACHE  startBlock, blockCount, vcbFreeExtCnt, 0, 0 ... 0, 0, vcbFreeExtCnt, extentsRemoved, 0
+    18      HFSDBG_ADD_EXTENT_CACHE     startBlock, blockCount, vcbFreeExtCnt, 0, 0 ... 0, 0, vcbFreeExtCnt, retval, 0
+    19      HFSDBG_READ_BITMAP_RANGE    startBlock, iosize, 0, 0, 0 ... err, 0, 0, 0, 0 
+    20      HFSDBG_RELEASE_SCAN_BITMAP  0, 0, 0, 0, 0 ... 0, 0, 0, 0, 0
+    21      HFSDBG_SYNCER               hfsmp, now, mnt_last_write_completed_timestamp, mnt_pending_write_size, 0 ... err, deadline, 0, 0, 0
+    22      HFSDBG_SYNCER_TIMED         now, last_write_completed, hfs_mp->mnt_last_write_issued_timestamp, mnt_pending_write_size, 0 ... now, mnt_last_write_completed_timestamp, mnt_last_write_issued_timestamp, hfs_mp->mnt_pending_write_size, 0 
+    23      HFSDBG_UNMAP_SCAN           hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0
+    24      HFSDBG_UNMAP_TRIM           hfs_raw_dev, 0, 0, 0, 0 ... hfs_raw_dev, error, 0, 0, 0  
+*/
+
+#endif // HFS_KDEBUG_H_
diff --git a/core/hfs_link.c b/core/hfs_link.c
new file mode 100644
index 0000000..478f519
--- /dev/null
+++ b/core/hfs_link.c
@@ -0,0 +1,1419 @@
+/*
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <vfs/vfs_support.h>
+#include <libkern/libkern.h>
+#include <sys/fsctl.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_format.h"
+#include "hfs_endian.h"
+
+
+static int cur_link_id = 0;
+
+/*
+ * Private directories where hardlink inodes reside.
+ */
+const char *hfs_private_names[] = {
+	HFSPLUSMETADATAFOLDER,      /* FILE HARDLINKS */
+	HFSPLUS_DIR_METADATA_FOLDER /* DIRECTORY HARDLINKS */
+};
+
+
+/*
+ * Hardlink inodes save the head of their link chain in a
+ * private extended attribute.  The following calls are
+ * used to access this attribute.
+ */
+static int  setfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t firstlink);
+static int  getfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t *firstlink);
+
+/*
+ * Create a new catalog link record
+ *
+ * An indirect link is a reference to an inode (the real
+ * file or directory record).
+ *
+ * All the indirect links for a given inode are chained
+ * together in a doubly linked list.
+ *
+ * Pre-Leopard file hard links do not have kHFSHasLinkChainBit 
+ * set and do not have first/prev/next link IDs i.e. the values 
+ * are zero.  If a new link is being added to an existing 
+ * pre-Leopard file hard link chain, do not set kHFSHasLinkChainBit.
+ */
+static int
+createindirectlink(struct hfsmount *hfsmp, u_int32_t linknum, struct cat_desc *descp,
+                   cnid_t nextcnid, cnid_t *linkcnid, int is_inode_linkchain_set)
+{
+	struct FndrFileInfo *fip;
+	struct cat_attr attr;
+
+	if (linknum == 0) {
+		printf("hfs: createindirectlink: linknum is zero!\n");
+		return (EINVAL);
+	}
+
+	/* Setup the default attributes */
+	bzero(&attr, sizeof(attr));
+	
+	/* Links are matched to inodes by link ID and to volumes by create date */
+	attr.ca_linkref = linknum;
+	attr.ca_itime = hfsmp->hfs_metadata_createdate;
+	attr.ca_mode = S_IFREG | S_IRUSR | S_IRGRP | S_IROTH;
+	attr.ca_recflags = kHFSHasLinkChainMask | kHFSThreadExistsMask;
+	attr.ca_flags = UF_IMMUTABLE;
+	fip = (struct FndrFileInfo *)&attr.ca_finderinfo;
+
+	if (descp->cd_flags & CD_ISDIR) {
+		fip->fdType    = SWAP_BE32 (kHFSAliasType);
+		fip->fdCreator = SWAP_BE32 (kHFSAliasCreator);
+		fip->fdFlags   = SWAP_BE16 (kIsAlias);
+	} else /* file */ {
+		fip->fdType    = SWAP_BE32 (kHardLinkFileType);
+		fip->fdCreator = SWAP_BE32 (kHFSPlusCreator);
+		fip->fdFlags   = SWAP_BE16 (kHasBeenInited);
+		/* If the file inode does not have kHFSHasLinkChainBit set 
+		 * and the next link chain ID is zero, assume that this 
+		 * is pre-Leopard file inode.  Therefore clear the bit.
+		 */
+		if ((is_inode_linkchain_set == 0) && (nextcnid == 0)) {
+			attr.ca_recflags &= ~kHFSHasLinkChainMask;
+		}
+	}
+	/* Create the indirect link directly in the catalog */
+	return cat_createlink(hfsmp, descp, &attr, nextcnid, linkcnid);
+}
+
+
+/*
+ * Make a link to the cnode cp in the directory dp
+ * using the name in cnp.  src_vp is the vnode that 
+ * corresponds to 'cp' which was part of the arguments to
+ * hfs_vnop_link.
+ *
+ * The cnodes cp and dcp must be locked.
+ */
+static int
+hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, 
+		struct cnode *dcp, struct componentname *cnp, vfs_context_t ctx)
+{
+	struct proc *p = vfs_context_proc(ctx);
+	u_int32_t indnodeno = 0;
+	char inodename[32]; 
+	struct cat_desc to_desc;
+	struct cat_desc link_desc;
+	int newlink = 0;
+	int lockflags;
+	int retval = 0;
+	cat_cookie_t cookie;
+	cnid_t orig_cnid;
+	cnid_t linkcnid = 0;
+	cnid_t orig_firstlink;
+	enum privdirtype type;
+
+	type = S_ISDIR(cp->c_mode) ? DIR_HARDLINKS : FILE_HARDLINKS;
+
+	if (cur_link_id == 0) {
+		cur_link_id = ((random() & 0x3fffffff) + 100);
+	}
+	
+	/* We don't allow link nodes in our private system directories. */
+	if (dcp->c_fileid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+	    dcp->c_fileid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+		return (EPERM);
+	}
+
+	bzero(&cookie, sizeof(cat_cookie_t));
+	/* Reserve some space in the Catalog file. */
+	if ((retval = cat_preflight(hfsmp, (2 * CAT_CREATE)+ CAT_RENAME, &cookie, p))) {
+		return (retval);
+	}
+
+	lockflags = SFL_CATALOG | SFL_ATTRIBUTE;
+	/* Directory hard links allocate space for a symlink. */
+	if (type == DIR_HARDLINKS) {
+		lockflags |= SFL_BITMAP;
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+	/* Save the current cnid value so we restore it if an error occurs. */
+	orig_cnid = cp->c_desc.cd_cnid;
+
+	/*
+	 * If this is a new hardlink then we need to create the inode
+	 * and replace the original file/dir object with a link node.
+	 */
+	if ((cp->c_linkcount == 2) && !(cp->c_flag & C_HARDLINK)) {
+		newlink = 1;
+		bzero(&to_desc, sizeof(to_desc));
+		to_desc.cd_parentcnid = hfsmp->hfs_private_desc[type].cd_cnid;
+		to_desc.cd_cnid = cp->c_fileid;
+		to_desc.cd_flags = (type == DIR_HARDLINKS) ? CD_ISDIR : 0;
+
+		do {
+			if (type == DIR_HARDLINKS) {
+				/* Directory hardlinks always use the cnid. */
+				indnodeno = cp->c_fileid;
+				MAKE_DIRINODE_NAME(inodename, sizeof(inodename),
+							indnodeno);
+			} else {
+				/* Get a unique indirect node number */
+				if (retval == 0) {
+					indnodeno = cp->c_fileid;
+				} else {
+					indnodeno = cur_link_id++;
+				}
+				MAKE_INODE_NAME(inodename, sizeof(inodename),
+						indnodeno);
+			}
+			/* Move original file/dir to data node directory */
+			to_desc.cd_nameptr = (const u_int8_t *)inodename;
+			to_desc.cd_namelen = strlen(inodename);
+		
+			retval = cat_rename(hfsmp, &cp->c_desc, &hfsmp->hfs_private_desc[type],
+					&to_desc, NULL);
+
+			if (retval != 0 && retval != EEXIST) {
+			    printf("hfs_makelink: cat_rename to %s failed (%d) fileid=%d, vol=%s\n",
+				inodename, retval, cp->c_fileid, hfsmp->vcbVN);
+			}
+		} while ((retval == EEXIST) && (type == FILE_HARDLINKS));
+		if (retval)
+			goto out;
+
+		/*
+		 * Replace original file/dir with a link record.
+		 */
+		
+		bzero(&link_desc, sizeof(link_desc));
+		link_desc.cd_nameptr = cp->c_desc.cd_nameptr;
+		link_desc.cd_namelen = cp->c_desc.cd_namelen;
+		link_desc.cd_parentcnid = cp->c_parentcnid;
+		link_desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0;
+
+		retval = createindirectlink(hfsmp, indnodeno, &link_desc, 0, &linkcnid, true);
+		if (retval) {
+			int err;
+
+			/* Restore the cnode's cnid. */
+			cp->c_desc.cd_cnid = orig_cnid;
+
+			/* Put the original file back. */
+			err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL);
+			if (err) {
+				if (err != EIO && err != ENXIO)
+					printf("hfs_makelink: error %d from cat_rename backout 1", err);
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+			}
+			if (retval != EIO && retval != ENXIO) {
+				printf("hfs_makelink: createindirectlink (1) failed: %d\n", retval);
+				retval = EIO;
+			}
+			goto out;
+		}
+		cp->c_attr.ca_linkref = indnodeno;
+		cp->c_desc.cd_cnid = linkcnid;
+		/* Directory hard links store the first link in an attribute. */
+		if (type == DIR_HARDLINKS) {
+			if (setfirstlink(hfsmp, cp->c_fileid, linkcnid) == 0)
+				cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+		} else /* FILE_HARDLINKS */ {
+			cp->c_attr.ca_firstlink = linkcnid;
+		}
+		cp->c_attr.ca_recflags |= kHFSHasLinkChainMask;
+	} else {
+		indnodeno = cp->c_attr.ca_linkref;
+	}
+
+	/*
+	 * Create a catalog entry for the new link (parentID + name).
+	 */
+	
+	bzero(&link_desc, sizeof(link_desc));
+	link_desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+	link_desc.cd_namelen = strlen(cnp->cn_nameptr);
+	link_desc.cd_parentcnid = dcp->c_fileid;
+	link_desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0;
+
+	/* Directory hard links store the first link in an attribute. */
+	if (type == DIR_HARDLINKS) {
+		retval = getfirstlink(hfsmp, cp->c_fileid, &orig_firstlink);
+	} else /* FILE_HARDLINKS */ {
+		orig_firstlink = cp->c_attr.ca_firstlink;
+	}
+	if (retval == 0)
+		retval = createindirectlink(hfsmp, indnodeno, &link_desc, 
+				orig_firstlink, &linkcnid, 
+				(cp->c_attr.ca_recflags & kHFSHasLinkChainMask));
+	if (retval && newlink) {
+		int err;
+
+		/* Get rid of new link */
+		(void) cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+		
+		/* Restore the cnode's cnid. */
+		cp->c_desc.cd_cnid = orig_cnid;
+		
+		/* Put the original file back. */
+		err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL);
+		if (err) {
+			if (err != EIO && err != ENXIO)
+				printf("hfs_makelink: error %d from cat_rename backout 2", err);
+			hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+		}
+
+		cp->c_attr.ca_linkref = 0;
+
+		if (retval != EIO && retval != ENXIO) {
+			printf("hfs_makelink: createindirectlink (2) failed: %d\n", retval);
+			retval = EIO;
+		}
+		goto out;
+	} else if (retval == 0) {
+
+	    /* Update the original first link to point back to the new first link. */
+	    if (cp->c_attr.ca_recflags & kHFSHasLinkChainMask) {
+		(void) cat_update_siblinglinks(hfsmp, orig_firstlink, linkcnid, HFS_IGNORABLE_LINK);
+
+		/* Update the inode's first link value. */
+		if (type == DIR_HARDLINKS) {
+		    if (setfirstlink(hfsmp, cp->c_fileid, linkcnid) == 0)
+			cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+		} else {
+		    cp->c_attr.ca_firstlink = linkcnid;
+		}
+	    }
+	    /*
+	     * Finally, if this is a new hardlink then:
+	     *  - update the private system directory
+	     *  - mark the cnode as a hard link
+	     */
+	    if (newlink) {
+		vnode_t vp;
+		
+		hfsmp->hfs_private_attr[type].ca_entries++;
+		/* From application perspective, directory hard link is a 
+		 * normal directory.  Therefore count the new directory 
+		 * hard link for folder count calculation.
+		 */
+		if (type == DIR_HARDLINKS) {
+			INC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[type]);
+		}
+		retval = cat_update(hfsmp, &hfsmp->hfs_private_desc[type],
+		    &hfsmp->hfs_private_attr[type], NULL, NULL);
+		if (retval) {
+			if (retval != EIO && retval != ENXIO) {
+				printf("hfs_makelink: cat_update of privdir failed! (%d)\n", retval);
+				retval = EIO;
+			}
+			hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
+		}
+		cp->c_flag |= C_HARDLINK;
+
+		/*
+		 * Now we need to mark the vnodes as being hardlinks via the vnode_setmultipath call.
+		 * Note that we're calling vnode_get here, which should simply add an iocount if possible, without
+		 * doing much checking.  It's safe to call this because we are protected by the cnode lock, which
+		 * ensures that anyone trying to reclaim it will block until we release it.  vnode_get will usually 
+		 * give us an extra iocount, unless the vnode is about to be reclaimed (and has no iocounts).  
+		 * In that case, we'd error out, but we'd also not care if we added the VISHARDLINK bit to the vnode.  
+		 * 
+		 * As for the iocount we're about to add, we can't necessarily always call vnode_put here.  
+		 * If the one we add is the only iocount on the vnode, and there was
+		 * sufficient vnode pressure, it could go through VNOP_INACTIVE immediately, which would
+		 * require the cnode lock and cause us to double-lock panic.  We can only call vnode_put if we know
+		 * that the vnode we're operating on is the one with which we came into hfs_vnop_link, because
+		 * that means VFS took an iocount on it for us.  If it's *not* the one that we came into the call 
+		 * with, then mark it as NEED_VNODE_PUT to have hfs_unlock drop it for us.  hfs_vnop_link will 
+		 * unlock the cnode when it is finished.
+		 */
+		if ((vp = cp->c_vp) != NULLVP) {
+			if (vnode_get(vp) == 0) {
+				vnode_setmultipath(vp);
+				if (vp == src_vp) {
+					/* we have an iocount on data fork vnode already. */
+					vnode_put(vp);
+				}
+				else {
+					cp->c_flag |= C_NEED_DVNODE_PUT;
+				}
+			}
+		}
+		if ((vp = cp->c_rsrc_vp) != NULLVP) {
+			if (vnode_get(vp) == 0) {
+				vnode_setmultipath(vp);
+				if (vp == src_vp) {
+					vnode_put(vp);
+				}
+				else {
+					cp->c_flag |= C_NEED_RVNODE_PUT;
+				}
+			}
+		}
+			cp->c_flag |= C_MODIFIED;
+		cp->c_touch_chgtime = TRUE;
+	    }
+	}
+out:
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	cat_postflight(hfsmp, &cookie, p);
+	
+	if (retval == 0 && newlink) {
+		hfs_volupdate(hfsmp, VOL_MKFILE, 0);
+	}
+	return (retval);
+}
+
+
+/*
+ * link vnode operation
+ *
+ *  IN vnode_t  a_vp;
+ *  IN vnode_t  a_tdvp;
+ *  IN struct componentname  *a_cnp;
+ *  IN vfs_context_t  a_context;
+ */
+int
+hfs_vnop_link(struct vnop_link_args *ap)
+{
+	struct hfsmount *hfsmp;
+	struct vnode *vp = ap->a_vp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *fdvp = NULLVP;
+	struct componentname *cnp = ap->a_cnp;
+	struct cnode *cp;
+	struct cnode *tdcp;
+	struct cnode *fdcp = NULL;
+	struct cat_desc todesc;
+	cnid_t parentcnid;
+	int lockflags = 0;
+	int intrans = 0;
+	enum vtype v_type;
+	int error, ret;
+
+	hfsmp = VTOHFS(vp);
+	v_type = vnode_vtype(vp);
+
+	/* No hard links in HFS standard file systems. */
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return (ENOTSUP);
+	}
+	/* Linking to a special file is not permitted. */
+	if (v_type == VBLK || v_type == VCHR) {
+		return (EPERM);  
+	}
+
+	/*
+	 * For now, return ENOTSUP for a symlink target. This can happen
+	 * for linkat(2) when called without AT_SYMLINK_FOLLOW.
+	 */
+	if (v_type == VLNK)
+		return (ENOTSUP);
+
+	cp = VTOC(vp);
+
+	if (v_type == VDIR) {
+#if CONFIG_HFS_DIRLINK
+		/* Make sure our private directory exists. */
+		if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid == 0) {
+			return (EPERM);
+		}
+		/*
+		 * Directory hardlinks (ADLs) have only been qualified on
+		 * journaled HFS+.  If/when they are tested on non-journaled
+		 * file systems then this test can be removed.
+		 */
+		if (hfsmp->jnl == NULL) {
+			return (EPERM);
+		}
+
+		/* Directory hardlinks also need the parent of the original directory. */
+		if ((error = hfs_vget(hfsmp, hfs_currentparent(cp, /* have_lock: */ false),
+							  &fdvp, 1, 0))) {
+			return (error);
+		}
+#else
+		/* some platforms don't support directory hardlinks. */
+		return EPERM;
+#endif
+	} else {
+		/* Make sure our private directory exists. */
+		if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid == 0) {
+			return (ENOTSUP);
+		}
+	}
+	if (hfs_freeblks(hfsmp, 0) == 0) {
+		if (fdvp) {
+			vnode_put(fdvp);
+		}
+		return (ENOSPC);
+	}
+
+	nspace_snapshot_event(vp, VTOC(vp)->c_ctime, NAMESPACE_HANDLER_LINK_CREATE, NULL);
+
+	/* Lock the cnodes. */
+	if (fdvp) {
+		if ((error = hfs_lockfour(VTOC(tdvp), VTOC(vp), VTOC(fdvp), NULL, HFS_EXCLUSIVE_LOCK, NULL))) {
+			if (fdvp) {
+				vnode_put(fdvp);
+		    	}
+			return (error);
+		}
+		fdcp = VTOC(fdvp);
+	} else {
+		if ((error = hfs_lockpair(VTOC(tdvp), VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
+			return (error);
+		}
+	}
+	tdcp = VTOC(tdvp);
+	/* grab the parent CNID from originlist after grabbing cnode locks */
+	parentcnid = hfs_currentparent(cp, /* have_lock: */ true);
+
+	/* 
+	 * Make sure we didn't race the src or dst parent directories with rmdir.
+	 * Note that we should only have a src parent directory cnode lock 
+	 * if we're dealing with a directory hardlink here.
+	 */
+	if (fdcp) {
+		if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
+			error = ENOENT;
+			goto out;
+		}
+	}
+
+	if (tdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
+		error = ENOENT;
+		goto out;
+	}
+
+	/* Check the source for errors: 
+	 * too many links, immutable, race with unlink
+	 */
+	if (cp->c_linkcount >= HFS_LINK_MAX) {
+		error = EMLINK;
+		goto out;
+	}
+	if (cp->c_bsdflags & (IMMUTABLE | APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+	if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+		error = ENOENT;
+		goto out;
+	}
+
+	tdcp->c_flag |= C_DIR_MODIFICATION;
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+	intrans = 1;
+
+	todesc.cd_flags = (v_type == VDIR) ? CD_ISDIR : 0;
+	todesc.cd_encoding = 0;
+	todesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+	todesc.cd_namelen = cnp->cn_namelen;
+	todesc.cd_parentcnid = tdcp->c_fileid;
+	todesc.cd_hint = 0;
+	todesc.cd_cnid = 0;
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	/* If destination exists then we lost a race with create. */
+	if (cat_lookup(hfsmp, &todesc, 0, 0, NULL, NULL, NULL, NULL) == 0) {
+		error = EEXIST;
+		goto out;
+	}
+	if (cp->c_flag & C_HARDLINK) {
+		struct cat_attr cattr;
+
+		/* If inode is missing then we lost a race with unlink. */
+		if ((cat_idlookup(hfsmp, cp->c_fileid, 0, 0, NULL, &cattr, NULL) != 0) ||
+		    (cattr.ca_fileid != cp->c_fileid)) {
+			error = ENOENT;
+			goto out;
+		}
+	} else {
+		cnid_t fileid;
+
+		/* If source is missing then we lost a race with unlink. */
+		if ((cat_lookup(hfsmp, &cp->c_desc, 0, 0, NULL, NULL, NULL, &fileid) != 0) ||
+		    (fileid != cp->c_fileid)) {
+			error = ENOENT;
+			goto out;
+		}
+	}
+	/* 
+	 * All directory links must reside in an non-ARCHIVED hierarchy.
+	 */
+	if (v_type == VDIR) {
+		/*
+		 * - Source parent and destination parent cannot match
+		 * - A link is not permitted in the root directory
+		 * - Parent of 'pointed at' directory is not the root directory
+		 * - The 'pointed at' directory (source) is not an ancestor
+		 *   of the new directory hard link (destination).
+		 * - No ancestor of the new directory hard link (destination) 
+		 *   is a directory hard link.
+		 */
+		if ((parentcnid == tdcp->c_fileid) ||
+		    (tdcp->c_fileid == kHFSRootFolderID) ||
+		    (parentcnid == kHFSRootFolderID) ||
+		    cat_check_link_ancestry(hfsmp, tdcp->c_fileid, cp->c_fileid)) {
+			error = EPERM;  /* abide by the rules, you did not */
+			goto out;
+		}
+	}
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	lockflags = 0;
+
+	cp->c_linkcount++;
+	cp->c_flag |= C_MODIFIED;
+	cp->c_touch_chgtime = TRUE;
+	error = hfs_makelink(hfsmp, vp, cp, tdcp, cnp, ap->a_context);
+	if (error) {
+		cp->c_linkcount--;
+		hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+	} else {
+		/* Invalidate negative cache entries in the destination directory */
+		if (tdcp->c_flag & C_NEG_ENTRIES) {
+			cache_purge_negatives(tdvp);
+			tdcp->c_flag &= ~C_NEG_ENTRIES;
+		}
+
+		/* Update the target directory and volume stats */
+		tdcp->c_entries++;
+		if (v_type == VDIR) {
+			INC_FOLDERCOUNT(hfsmp, tdcp->c_attr);
+			tdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
+
+			/* Set kHFSHasChildLinkBit in the destination hierarchy */
+			error = cat_set_childlinkbit(hfsmp, tdcp->c_parentcnid);
+			if (error) {
+				printf ("hfs_vnop_link: error updating destination parent chain for id=%u, vol=%s\n", tdcp->c_cnid, hfsmp->vcbVN);
+				error = 0;
+			}
+		}
+		tdcp->c_dirchangecnt++;
+		tdcp->c_flag |= C_MODIFIED;
+		hfs_incr_gencount(tdcp);
+		tdcp->c_touch_chgtime = TRUE;
+		tdcp->c_touch_modtime = TRUE;
+
+		error = hfs_update(tdvp, 0);
+		if (error) {
+			if (error != EIO && error != ENXIO) {
+				printf("hfs_vnop_link: error %d updating tdvp %p\n", error, tdvp);
+				error = EIO;
+			}
+			hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
+		}
+
+		if ((v_type == VDIR) && 
+		    (fdcp != NULL) && 
+		    ((fdcp->c_attr.ca_recflags & kHFSHasChildLinkMask) == 0)) {
+
+			fdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
+			fdcp->c_flag |= C_MODIFIED;
+			fdcp->c_touch_chgtime = TRUE;
+			error = hfs_update(fdvp, 0);
+			if (error) {
+				if (error != EIO && error != ENXIO) {
+					printf("hfs_vnop_link: error %d updating fdvp %p\n", error, fdvp);
+					// No point changing error as it's set immediate below
+				}
+				hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
+			}
+
+			/* Set kHFSHasChildLinkBit in the source hierarchy */
+			error = cat_set_childlinkbit(hfsmp, fdcp->c_parentcnid);
+			if (error) {
+				printf ("hfs_vnop_link: error updating source parent chain for id=%u, vol=%s\n", fdcp->c_cnid, hfsmp->vcbVN);
+				error = 0;
+			}
+		}
+		hfs_volupdate(hfsmp, VOL_MKFILE,
+			(tdcp->c_cnid == kHFSRootFolderID));
+	}
+
+	if (error == 0 && (ret = hfs_update(vp, 0)) != 0) {
+		if (ret != EIO && ret != ENXIO)
+			printf("hfs_vnop_link: error %d updating vp @ %p\n", ret, vp);
+		hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
+	}
+
+out:
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+	if (intrans) {
+		hfs_end_transaction(hfsmp);
+	}
+
+	tdcp->c_flag &= ~C_DIR_MODIFICATION;
+	wakeup((caddr_t)&tdcp->c_flag);
+
+	if (fdcp) {
+		hfs_unlockfour(tdcp, cp, fdcp, NULL);
+	} else {
+		hfs_unlockpair(tdcp, cp);
+	}
+	if (fdvp) {
+		vnode_put(fdvp);
+	}
+	return (error);
+}
+
+
+/*
+ * Remove a link to a hardlink file/dir.
+ *
+ * Note: dvp and vp cnodes are already locked.
+ */
+int
+hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int skip_reserve)
+{
+	struct cnode *cp;
+	struct cnode *dcp;
+	struct cat_desc cndesc;
+	struct timeval tv;
+	char inodename[32];
+	cnid_t  prevlinkid;
+	cnid_t  nextlinkid;
+	int lockflags = 0;
+	int started_tr;
+	int error;
+	
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return (EPERM);
+	}
+	cp = VTOC(vp);
+	dcp = VTOC(dvp);
+
+	dcp->c_flag |= C_DIR_MODIFICATION;
+	
+	/* Remove the entry from the namei cache: */
+	cache_purge(vp);
+
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+		started_tr = 0;
+		goto out;
+	}
+	started_tr = 1;
+
+	/* 
+	 * Protect against a race with rename by using the component
+	 * name passed in and parent id from dvp (instead of using 
+	 * the cp->c_desc which may have changed).  
+	 *
+	 * Re-lookup the component name so we get the correct cnid
+	 * for the name (as opposed to the c_cnid in the cnode which
+	 * could have changed before the cnode was locked).
+	 */
+	cndesc.cd_flags = vnode_isdir(vp) ? CD_ISDIR : 0;
+	cndesc.cd_encoding = cp->c_desc.cd_encoding;
+	cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+	cndesc.cd_namelen = cnp->cn_namelen;
+	cndesc.cd_parentcnid = dcp->c_fileid;
+	cndesc.cd_hint = dcp->c_childhint;
+
+	lockflags = SFL_CATALOG | SFL_ATTRIBUTE;
+	if (cndesc.cd_flags & CD_ISDIR) {
+		/* We'll be removing the alias resource allocation blocks. */
+		lockflags |= SFL_BITMAP;
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+	if ((error = cat_lookuplink(hfsmp, &cndesc, &cndesc.cd_cnid, &prevlinkid, &nextlinkid))) {
+		goto out;
+	}
+
+	/* Reserve some space in the catalog file. */
+	if (!skip_reserve && (error = cat_preflight(hfsmp, 2 * CAT_DELETE, NULL, 0))) {
+		goto out;
+	}
+
+	/* Purge any cached origin entries for a directory or file hard link. */
+	hfs_relorigin(cp, dcp->c_fileid);
+	if (dcp->c_fileid != dcp->c_cnid) {
+		hfs_relorigin(cp, dcp->c_cnid);
+	}
+
+	/* Delete the link record. */
+	if ((error = cat_deletelink(hfsmp, &cndesc))) {
+		goto out;
+	}
+
+	/* Update the parent directory. */
+	if (dcp->c_entries > 0) {
+		dcp->c_entries--;
+	}
+	if (cndesc.cd_flags & CD_ISDIR) {
+		DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+	}
+	dcp->c_dirchangecnt++;
+	hfs_incr_gencount(dcp);
+	microtime(&tv);
+	dcp->c_touch_chgtime = dcp->c_touch_modtime = true;
+	dcp->c_flag |= C_MODIFIED;
+	hfs_update(dcp->c_vp, 0);
+
+	/*
+	 * If this is the last link then we need to process the inode.
+	 * Otherwise we need to fix up the link chain.
+	 */
+	--cp->c_linkcount;
+	if (cp->c_linkcount < 1) {
+		char delname[32];
+		struct cat_desc to_desc;
+		struct cat_desc from_desc;
+
+		/*
+		 * If a file inode or directory inode is being deleted, rename 
+		 * it to an open deleted file.  This ensures that deletion 
+		 * of inode and its corresponding extended attributes does 
+		 * not overflow the journal.  This inode will be deleted 
+		 * either in hfs_vnop_inactive() or in hfs_remove_orphans(). 
+		 * Note: a rename failure here is not fatal.
+		 */	
+		bzero(&from_desc, sizeof(from_desc));
+		bzero(&to_desc, sizeof(to_desc));
+		if (vnode_isdir(vp)) {
+			if (cp->c_entries != 0) {
+				panic("hfs_unlink: dir not empty (id %d, %d entries)", cp->c_fileid, cp->c_entries);
+			}
+			MAKE_DIRINODE_NAME(inodename, sizeof(inodename),
+						cp->c_attr.ca_linkref);
+			from_desc.cd_parentcnid = hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid;
+			from_desc.cd_flags = CD_ISDIR;
+			to_desc.cd_flags = CD_ISDIR;
+		} else { 
+			MAKE_INODE_NAME(inodename, sizeof(inodename),
+					cp->c_attr.ca_linkref);
+			from_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+			from_desc.cd_flags = 0;
+			to_desc.cd_flags = 0;
+		}
+		from_desc.cd_nameptr = (const u_int8_t *)inodename;
+		from_desc.cd_namelen = strlen(inodename);
+		from_desc.cd_cnid = cp->c_fileid;
+
+		MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid);
+		to_desc.cd_nameptr = (const u_int8_t *)delname;
+		to_desc.cd_namelen = strlen(delname);
+		to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+		to_desc.cd_cnid = cp->c_fileid;
+
+		error = cat_rename(hfsmp, &from_desc, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+				   &to_desc, (struct cat_desc *)NULL);
+		if (error == 0) {
+			cp->c_flag |= C_DELETED;
+			cp->c_attr.ca_recflags &= ~kHFSHasLinkChainMask;
+			cp->c_attr.ca_firstlink = 0;
+			if (vnode_isdir(vp)) {
+				hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries--;
+				DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[DIR_HARDLINKS]);
+
+				hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries++;
+				INC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+
+				(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[DIR_HARDLINKS],
+					&hfsmp->hfs_private_attr[DIR_HARDLINKS], NULL, NULL);
+				(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+					&hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+			}
+		} else {
+			error = 0;  /* rename failure here is not fatal */
+		}
+	} else /* Still some links left */ {
+		cnid_t firstlink;
+
+		/*
+		 * Update the start of the link chain.
+		 * Note: Directory hard links store the first link in an attribute.
+		 */
+		if (vnode_isdir(vp) &&
+		    getfirstlink(hfsmp, cp->c_fileid, &firstlink) == 0 &&
+		    firstlink == cndesc.cd_cnid) {
+			if (setfirstlink(hfsmp, cp->c_fileid, nextlinkid) == 0)
+				cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+		} else if (cp->c_attr.ca_firstlink == cndesc.cd_cnid) {
+			cp->c_attr.ca_firstlink = nextlinkid;
+		}
+		/* Update previous link. */
+		if (prevlinkid) {
+			(void) cat_update_siblinglinks(hfsmp, prevlinkid, HFS_IGNORABLE_LINK, nextlinkid);
+		}
+		/* Update next link. */
+		if (nextlinkid) {
+			(void) cat_update_siblinglinks(hfsmp, nextlinkid, prevlinkid, HFS_IGNORABLE_LINK);
+		}
+	}
+
+	/*
+	 * The call to cat_releasedesc below will only release the name
+	 * buffer; it does not zero out the rest of the fields in the
+	 * 'cat_desc' data structure.
+	 *
+	 * As a result, since there are still other links at this point,
+	 * we need to make the current cnode descriptor point to the raw
+	 * inode.  If a path-based system call comes along first, it will
+	 * replace the descriptor with a valid link ID.  If a userland
+	 * process already has a file descriptor open, then they will
+	 * bypass that lookup, though.  Replacing the descriptor CNID with
+	 * the raw inode will force it to generate a new full path.
+	 */
+	cp->c_cnid = cp->c_fileid;
+
+	/* Push new link count to disk. */
+	cp->c_ctime = tv.tv_sec;	
+	(void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+
+	/* All done with the system files. */
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	lockflags = 0;
+
+	/* Update file system stats. */
+	hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
+
+	/*
+	 * All done with this cnode's descriptor...
+	 *
+	 * Note: all future catalog calls for this cnode may be
+	 * by fileid only.  This is OK for HFS (which doesn't have
+	 * file thread records) since HFS doesn't support hard links.
+	 */
+	cat_releasedesc(&cp->c_desc);
+
+out:
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+	if (started_tr) {
+		hfs_end_transaction(hfsmp);
+	}
+
+	dcp->c_flag &= ~C_DIR_MODIFICATION;
+	wakeup((caddr_t)&dcp->c_flag);
+
+	return (error);
+}
+
+
+/*
+ * Initialize the HFS+ private system directories.
+ *
+ * These directories are used to hold the inodes
+ * for file and directory hardlinks as well as
+ * open-unlinked files.
+ *
+ * If they don't yet exist they will get created.
+ *
+ * This call is assumed to be made during mount.
+ */
+void
+hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type)
+{
+	struct vnode * dvp = NULLVP;
+	struct cnode * dcp = NULL;
+	struct cat_desc *priv_descp;
+	struct cat_attr *priv_attrp;
+	struct FndrDirInfo * fndrinfo;
+	struct timeval tv;
+	int lockflags;
+	int trans = 0;
+	int error;
+	
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return;
+	}
+
+	priv_descp = &hfsmp->hfs_private_desc[type];
+	priv_attrp = &hfsmp->hfs_private_attr[type];
+
+	/* Check if directory already exists. */
+	if (priv_descp->cd_cnid != 0) {
+		return;
+	}
+
+	priv_descp->cd_parentcnid = kRootDirID;
+	priv_descp->cd_nameptr = (const u_int8_t *)hfs_private_names[type];
+	priv_descp->cd_namelen = strlen((const char *)priv_descp->cd_nameptr);
+	priv_descp->cd_flags = CD_ISDIR | CD_DECOMPOSED;
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+	error = cat_lookup(hfsmp, priv_descp, 0, 0, NULL, priv_attrp, NULL, NULL);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (error == 0) {
+		if (type == FILE_HARDLINKS) {
+			hfsmp->hfs_metadata_createdate = priv_attrp->ca_itime;
+		}
+		priv_descp->cd_cnid = priv_attrp->ca_fileid;
+		goto exit;
+	}
+
+	/* Directory is missing, if this is read-only then we're done. */
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		goto exit;
+	}
+
+	/* Grab the root directory so we can update it later. */
+	if (hfs_vget(hfsmp, kRootDirID, &dvp, 0, 0) != 0) {
+		goto exit;
+	}
+	dcp = VTOC(dvp);
+
+	/* Setup the default attributes */
+	bzero(priv_attrp, sizeof(struct cat_attr));
+	priv_attrp->ca_flags = UF_IMMUTABLE | UF_HIDDEN;
+	priv_attrp->ca_mode = S_IFDIR;
+	if (type == DIR_HARDLINKS) {
+		priv_attrp->ca_mode |= S_ISVTX | S_IRUSR | S_IXUSR | S_IRGRP |
+		                       S_IXGRP | S_IROTH | S_IXOTH;
+	}
+	priv_attrp->ca_linkcount = 1;
+	priv_attrp->ca_itime = hfsmp->hfs_itime;
+	priv_attrp->ca_recflags = kHFSHasFolderCountMask;
+	
+	fndrinfo = (struct FndrDirInfo *)&priv_attrp->ca_finderinfo;
+	fndrinfo->frLocation.v = SWAP_BE16(16384);
+	fndrinfo->frLocation.h = SWAP_BE16(16384);
+	fndrinfo->frFlags = SWAP_BE16(kIsInvisible + kNameLocked);		
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+		goto exit;
+	}
+	trans = 1;
+
+	/* Need the catalog and EA b-trees for CNID acquisition */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+	/* Make sure there's space in the Catalog file. */
+	if (cat_preflight(hfsmp, CAT_CREATE, NULL, 0) != 0) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		goto exit;
+	}
+
+	/* Get the CNID for use */
+	cnid_t new_id;
+	if ((error = cat_acquire_cnid(hfsmp, &new_id))) {
+		hfs_systemfile_unlock (hfsmp, lockflags);
+		goto exit;
+	}
+	
+	/* Create the private directory on disk. */
+	error = cat_create(hfsmp, new_id, priv_descp, priv_attrp, NULL);
+	if (error == 0) {
+		priv_descp->cd_cnid = priv_attrp->ca_fileid;
+
+		/* Update the parent directory */
+		dcp->c_entries++;
+		INC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+		dcp->c_dirchangecnt++;
+		hfs_incr_gencount(dcp);
+		microtime(&tv);
+		dcp->c_ctime = tv.tv_sec;
+		dcp->c_mtime = tv.tv_sec;
+		(void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+	}
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	
+	if (error) {
+		goto exit;
+	}
+	if (type == FILE_HARDLINKS) {
+		hfsmp->hfs_metadata_createdate = priv_attrp->ca_itime;
+	}
+	hfs_volupdate(hfsmp, VOL_MKDIR, 1);
+exit:
+	if (trans) {
+		hfs_end_transaction(hfsmp);
+	}
+	if (dvp) {
+		hfs_unlock(dcp);
+		vnode_put(dvp);
+	}
+	if ((error == 0) && (type == DIR_HARDLINKS)) {
+		hfs_xattr_init(hfsmp);
+	}
+}
+
+
+/*
+ * Lookup a hardlink link (from chain)
+ */
+int
+hfs_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
+{
+	int lockflags;
+	int error;
+
+	*prevlinkid = 0;
+	*nextlinkid = 0;
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	error = cat_lookup_siblinglinks(hfsmp, linkfileid, prevlinkid, nextlinkid);
+	if (error == ENOLINK) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+		error = getfirstlink(hfsmp, linkfileid, nextlinkid);
+	}
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	return (error);
+}
+
+
+/* Find the oldest / last hardlink in the link chain */
+int 
+hfs_lookup_lastlink (struct hfsmount *hfsmp, cnid_t linkfileid, 
+		cnid_t *lastid, struct cat_desc *cdesc) {
+	int lockflags;
+	int error;
+
+	*lastid = 0;
+	
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	error = cat_lookup_lastlink(hfsmp, linkfileid, lastid, cdesc);
+	
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	
+	/*
+	 * cat_lookup_lastlink will zero out the lastid/cdesc arguments as needed
+	 * upon error cases.
+	 */ 	
+	return error;
+}
+
+
+/*
+ * Cache the origin of a directory or file hard link
+ *
+ * cnode must be lock on entry
+ */
+void
+hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid)
+{
+	linkorigin_t *origin = NULL;
+	thread_t thread = current_thread();
+	int count = 0;
+	int maxorigins = (S_ISDIR(cp->c_mode)) ? MAX_CACHED_ORIGINS : MAX_CACHED_FILE_ORIGINS;
+	/*
+	 *  Look for an existing origin first.  If not found, create/steal one.
+	 */
+	TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+		++count;
+		if (origin->lo_thread == thread) {
+			TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
+			break;
+		}
+	}
+	if (origin == NULL) {
+		/* Recycle the last (i.e., the oldest) if we have too many. */
+		if (count > maxorigins) {
+			origin = TAILQ_LAST(&cp->c_originlist, hfs_originhead);
+			TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
+		} else {
+			origin = hfs_malloc(sizeof(linkorigin_t));
+		}
+		origin->lo_thread = thread;
+	}
+	origin->lo_cnid = cp->c_cnid;
+	origin->lo_parentcnid = parentcnid;
+	TAILQ_INSERT_HEAD(&cp->c_originlist, origin, lo_link);
+}
+
+/*
+ * Release any cached origins for a directory or file hard link
+ *
+ * cnode must be lock on entry
+ */
+void
+hfs_relorigins(struct cnode *cp)
+{
+	linkorigin_t *origin, *prev;
+
+	TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) {
+		hfs_free(origin, sizeof(*origin));
+	}
+	TAILQ_INIT(&cp->c_originlist);
+}
+
+/*
+ * Release a specific origin for a directory or file hard link
+ *
+ * cnode must be lock on entry
+ */
+void
+hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
+{
+	linkorigin_t *origin, *prev;
+	thread_t thread = current_thread();
+
+	TAILQ_FOREACH_SAFE(origin, &cp->c_originlist, lo_link, prev) {
+		if (origin->lo_thread == thread) {
+			TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
+			hfs_free(origin, sizeof(*origin));
+			break;
+		} else if (origin->lo_parentcnid == parentcnid) {
+			/*
+			 * If the threads don't match, then we don't want to
+			 * delete the entry because that might cause other threads
+			 * to fall back and use whatever happens to be in
+			 * c_parentcnid or the wrong link ID.  By setting the
+			 * values to zero here, it should serve as an indication
+			 * that the path is no longer valid and that's better than
+			 * using a random parent ID or link ID.
+			 */
+			origin->lo_parentcnid = 0;
+			origin->lo_cnid = 0;
+		}
+	}
+}
+
+/*
+ * Test if a directory or file hard link has a cached origin
+ *
+ * cnode must be lock on entry
+ */
+int
+hfs_haslinkorigin(cnode_t *cp)
+{
+	if (cp->c_flag & C_HARDLINK) {
+		linkorigin_t *origin;
+		thread_t thread = current_thread();
+	
+		TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+			if (origin->lo_thread == thread) {
+				return origin->lo_cnid != 0;
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * Obtain the current parent cnid of a directory or file hard link
+ *
+ * cnode must be lock on entry
+ */
+cnid_t
+hfs_currentparent(cnode_t *cp, bool have_lock)
+{
+	if (cp->c_flag & C_HARDLINK) {
+		if (!have_lock)
+			hfs_lock_always(cp, HFS_SHARED_LOCK);
+
+		linkorigin_t *origin;
+		thread_t thread = current_thread();
+
+		TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+			if (origin->lo_thread == thread) {
+				if (!have_lock)
+					hfs_unlock(cp);
+				return (origin->lo_parentcnid);
+			}
+		}
+
+		if (!have_lock)
+			hfs_unlock(cp);
+	}
+	return (cp->c_parentcnid);
+}
+
+/*
+ * Obtain the current cnid of a directory or file hard link
+ *
+ * cnode must be lock on entry
+ */
+cnid_t
+hfs_currentcnid(cnode_t *cp)
+{
+	if (cp->c_flag & C_HARDLINK) {
+		linkorigin_t *origin;
+		thread_t thread = current_thread();
+	
+		TAILQ_FOREACH(origin, &cp->c_originlist, lo_link) {
+			if (origin->lo_thread == thread) {
+				return (origin->lo_cnid);
+			}
+		}
+	}
+	return (cp->c_cnid);
+}
+
+
+/*
+ * Set the first link attribute for a given file id.
+ *
+ * The attributes b-tree must already be locked.
+ * If journaling is enabled, a transaction must already be started.
+ */
+static int
+setfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t firstlink)
+{
+	FCB * btfile;
+	BTreeIterator * iterator;
+	FSBufferDescriptor btdata;
+	u_int8_t attrdata[FIRST_LINK_XATTR_REC_SIZE];
+	HFSPlusAttrData *dataptr;
+	int result;
+	u_int16_t datasize;
+
+	if (hfsmp->hfs_attribute_cp == NULL) {
+		return (EPERM);
+	}
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	result = hfs_buildattrkey(fileid, FIRST_LINK_XATTR_NAME, (HFSPlusAttrKey *)&iterator->key);
+	if (result) {
+		goto out;
+	}
+	dataptr = (HFSPlusAttrData *)&attrdata[0];
+	dataptr->recordType = kHFSPlusAttrInlineData;
+	dataptr->reserved[0] = 0;
+	dataptr->reserved[1] = 0;
+
+	/*
+	 * Since attrData is variable length, we calculate the size of
+	 * attrData by subtracting the size of all other members of
+	 * structure HFSPlusAttData from the size of attrdata.
+	 */
+	(void)snprintf((char *)&dataptr->attrData[0],
+			sizeof(dataptr) - (4 * sizeof(uint32_t)),
+		        "%lu", (unsigned long)firstlink);
+	dataptr->attrSize = 1 + strlen((char *)&dataptr->attrData[0]);
+
+	/* Calculate size of record rounded up to multiple of 2 bytes. */
+	datasize = sizeof(HFSPlusAttrData) - 2 + dataptr->attrSize + ((dataptr->attrSize & 1) ? 1 : 0);
+
+	btdata.bufferAddress = dataptr;
+	btdata.itemSize = datasize;
+	btdata.itemCount = 1;
+
+	btfile = hfsmp->hfs_attribute_cp->c_datafork;
+
+	/* Insert the attribute. */
+	result = BTInsertRecord(btfile, iterator, &btdata, datasize);
+	if (result == btExists) {
+		result = BTReplaceRecord(btfile, iterator, &btdata, datasize);
+	}
+	(void) BTFlushPath(btfile);
+out:
+	hfs_free(iterator, sizeof(*iterator));
+
+	return MacToVFSError(result);
+}
+
+/*
+ * Get the first link attribute for a given file id.
+ *
+ * The attributes b-tree must already be locked.
+ */
+static int
+getfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t *firstlink)
+{
+	FCB * btfile;
+	BTreeIterator * iterator;
+	FSBufferDescriptor btdata;
+	u_int8_t attrdata[FIRST_LINK_XATTR_REC_SIZE];
+	HFSPlusAttrData *dataptr;
+	int result;
+	u_int16_t datasize;
+
+	if (hfsmp->hfs_attribute_cp == NULL) {
+		return (EPERM);
+	}
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	result = hfs_buildattrkey(fileid, FIRST_LINK_XATTR_NAME, (HFSPlusAttrKey *)&iterator->key);
+	if (result)
+		goto out;
+
+	dataptr = (HFSPlusAttrData *)&attrdata[0];
+	datasize = sizeof(attrdata);
+
+	btdata.bufferAddress = dataptr;
+	btdata.itemSize = sizeof(attrdata);
+	btdata.itemCount = 1;
+
+	btfile = hfsmp->hfs_attribute_cp->c_datafork;
+
+	result = BTSearchRecord(btfile, iterator, &btdata, NULL, NULL);
+	if (result)
+		goto out;
+
+	if (dataptr->attrSize < 3) {
+		result = ENOENT;
+		goto out;
+	}
+	*firstlink = strtoul((char*)&dataptr->attrData[0], NULL, 10);
+out:
+	hfs_free(iterator, sizeof(*iterator));
+
+	return MacToVFSError(result);
+}
+
+errno_t hfs_first_link(hfsmount_t *hfsmp, cnode_t *cp, cnid_t *link_id)
+{
+	errno_t error = 0;
+
+	if (S_ISDIR(cp->c_mode)) {
+		int lockf = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+		error = getfirstlink(hfsmp, cp->c_fileid, link_id);
+
+		hfs_systemfile_unlock(hfsmp, lockf);
+	} else {
+		if (cp->c_attr.ca_firstlink)
+			*link_id = cp->c_attr.ca_firstlink;
+		else {
+			// This can happen if the cnode has been deleted
+			error = ENOENT;
+		}
+	}
+
+	return error;
+}
diff --git a/core/hfs_lookup.c b/core/hfs_lookup.c
new file mode 100644
index 0000000..943d194
--- /dev/null
+++ b/core/hfs_lookup.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *	  notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *	  notice, this list of conditions and the following disclaimer in the
+ *	  documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *	  must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *	  may be used to endorse or promote products derived from this software
+ *	  without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.	IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hfs_lookup.c	1.0
+ *	derived from @(#)ufs_lookup.c	8.15 (Berkeley) 6/16/95
+ *
+ *	(c) 1998-1999   Apple Inc.	 All Rights Reserved
+ *	(c) 1990, 1992 	NeXT Computer, Inc.	All Rights Reserved
+ *	
+ *
+ *	hfs_lookup.c -- code to handle directory traversal on HFS/HFS+ volume
+ */
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/kdebug.h>
+#include <sys/kauth.h>
+#include <sys/namei.h>
+#include <sys/user.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+
+
+/*	
+ * FROM FREEBSD 3.1
+ * Convert a component of a pathname into a pointer to a locked cnode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
+ * on whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * Notice that these are the only operations that can affect the directory of the target.
+ *
+ * LOCKPARENT and WANTPARENT actually refer to the parent of the last item,
+ * so if ISLASTCN is not set, they should be ignored. Also they are mutually exclusive, or
+ * WANTPARENT really implies DONTLOCKPARENT. Either of them set means that the calling
+ * routine wants to access the parent of the target, locked or unlocked.
+ *
+ * Keeping the parent locked as long as possible protects from other processes
+ * looking up the same item, so it has to be locked until the cnode is totally finished
+ *
+ * hfs_cache_lookup() performs the following for us:
+ *	check that it is a directory
+ *	check accessibility of directory
+ *	check for modification attempts on read-only mounts
+ *	if name found in cache
+ *		if at end of path and deleting or creating
+ *		drop it
+ *		 else
+ *		return name.
+ *	return hfs_lookup()
+ *
+ * Overall outline of hfs_lookup:
+ *
+ *	handle simple cases of . and ..
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  cnode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ */
+
+
+/*	
+ *	Lookup *cnp in directory *dvp, return it in *vpp.
+ *	**vpp is held on exit.
+ *	We create a cnode for the file, but we do NOT open the file here.
+
+#% lookup	dvp L ? ?
+#% lookup	vpp - L -
+
+	IN struct vnode *dvp - Parent node of file;
+	INOUT struct vnode **vpp - node of target file, its a new node if
+		the target vnode did not exist;
+	IN struct componentname *cnp - Name of file;
+
+ *	When should we lock parent_hp in here ??
+ */
+static int
+hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int *cnode_locked, int force_casesensitive_lookup)
+{
+	struct cnode *dcp;	/* cnode for directory being searched */
+	struct vnode *tvp;	/* target vnode */
+	struct hfsmount *hfsmp;
+	int flags;
+	int nameiop;
+	int retval = 0;
+	int isDot;
+	struct cat_desc desc;
+	struct cat_desc cndesc;
+	struct cat_attr attr;
+	struct cat_fork fork;
+	int lockflags;
+	int newvnode_flags;
+
+  retry:
+	newvnode_flags = 0;
+	dcp = NULL;
+	hfsmp = VTOHFS(dvp);
+	*vpp = NULL;
+	*cnode_locked = 0;
+	isDot = FALSE;
+	tvp = NULL;
+	nameiop = cnp->cn_nameiop;
+	flags = cnp->cn_flags;
+	bzero(&desc, sizeof(desc));
+
+	/*
+	 * First check to see if it is a . or .., else look it up.
+	 */
+	if (flags & ISDOTDOT) {		/* Wanting the parent */
+		cnp->cn_flags &= ~MAKEENTRY;
+		goto found;	/* .. is always defined */
+	} else if ((cnp->cn_nameptr[0] == '.') && (cnp->cn_namelen == 1)) {
+		isDot = TRUE;
+		cnp->cn_flags &= ~MAKEENTRY;
+		goto found;	/* We always know who we are */
+	} else {
+		if (hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+			retval = ENOENT;  /* The parent no longer exists ? */
+			goto exit;
+		}
+		dcp = VTOC(dvp);
+
+		if (dcp->c_flag & C_DIR_MODIFICATION) {
+			// This needs to be changed to sleep on c_flag using assert_wait.
+		    // msleep((caddr_t)&dcp->c_flag, &dcp->c_rwlock, PINOD, "hfs_vnop_lookup", 0);
+		    hfs_unlock(dcp);
+		    tsleep((caddr_t)dvp, PRIBIO, "hfs_lookup", 1);
+
+		    goto retry;
+		}
+
+
+		/*
+		 * We shouldn't need to go to the catalog if there are no children.
+		 * However, in the face of a minor disk corruption where the valence of
+		 * the directory is off, we could infinite loop here if we return ENOENT
+		 * even though there are actually items in the directory.  (create will
+		 * see the ENOENT, try to create something, which will return with 
+		 * EEXIST over and over again).  As a result, always check the catalog.
+		 */
+
+		bzero(&cndesc, sizeof(cndesc));
+		cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+		cndesc.cd_namelen = cnp->cn_namelen;
+		cndesc.cd_parentcnid = dcp->c_fileid;
+		cndesc.cd_hint = dcp->c_childhint;
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+		retval = cat_lookup(hfsmp, &cndesc, 0, force_casesensitive_lookup, &desc, &attr, &fork, NULL);
+		
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+		if (retval == 0) {
+			dcp->c_childhint = desc.cd_hint;
+			/*
+			 * Note: We must drop the parent lock here before calling
+			 * hfs_getnewvnode (which takes the child lock).
+			 */
+			hfs_unlock(dcp);
+			dcp = NULL;
+			
+			/* Verify that the item just looked up isn't one of the hidden directories. */
+			if (desc.cd_cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+				desc.cd_cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+				retval = ENOENT;
+				goto exit;
+			}
+			
+			goto found;
+		}
+		
+		if (retval == HFS_ERESERVEDNAME) {
+			/*
+			 * We found the name in the catalog, but it is unavailable
+			 * to us. The exact error to return to our caller depends
+			 * on the operation, and whether we've already reached the
+			 * last path component. In all cases, avoid a negative
+			 * cache entry, since someone else may be able to access
+			 * the name if their lookup is configured differently.
+			 */
+
+			cnp->cn_flags &= ~MAKEENTRY;
+
+			if (((flags & ISLASTCN) == 0) || ((nameiop == LOOKUP) || (nameiop == DELETE))) {
+				/* A reserved name for a pure lookup is the same as the path not being present */
+				retval = ENOENT;
+			} else {
+				/* A reserved name with intent to create must be rejected as impossible */
+				retval = EEXIST;
+			}
+		}
+		if (retval != ENOENT)
+			goto exit;
+		/*
+		 * This is a non-existing entry
+		 *
+		 * If creating, and at end of pathname and current
+		 * directory has not been removed, then can consider
+		 * allowing file to be created.
+		 */
+		if ((nameiop == CREATE || nameiop == RENAME) &&
+		    (flags & ISLASTCN) &&
+		    !(ISSET(dcp->c_flag, C_DELETED | C_NOEXISTS))) {
+			retval = EJUSTRETURN;
+			goto exit;
+		}
+		/*
+		 * Insert name into the name cache (as non-existent).
+		 */
+#if CONFIG_HFS_STD
+		if ((hfsmp->hfs_flags & HFS_STANDARD) == 0)
+#endif
+		{
+			if ((cnp->cn_flags & MAKEENTRY) &&
+				(nameiop != CREATE)) {
+				cache_enter(dvp, NULL, cnp);
+				dcp->c_flag |= C_NEG_ENTRIES;
+			}
+		}
+		goto exit;
+	}
+
+found:
+	if (flags & ISLASTCN) {
+		switch(nameiop) {
+		case DELETE:
+			cnp->cn_flags &= ~MAKEENTRY;
+			break;
+
+		case RENAME:
+			cnp->cn_flags &= ~MAKEENTRY;
+			if (isDot) {
+				retval = EISDIR;
+				goto exit;
+			}
+			break;
+		}
+	}
+
+	if (isDot) {
+		if ((retval = vnode_get(dvp)))
+			goto exit;
+		*vpp = dvp;
+	} else if (flags & ISDOTDOT) {
+		/*
+		 * Directory hard links can have multiple parents so
+		 * find the appropriate parent for the current thread.
+		 */
+		if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp),
+									/* have_lock: */ false), &tvp, 0, 0))) {
+			goto exit;
+		}
+		*cnode_locked = 1;
+		*vpp = tvp;
+	} else {
+		int type = (attr.ca_mode & S_IFMT);
+
+		if (!(flags & ISLASTCN) && (type != S_IFDIR) && (type != S_IFLNK)) {
+			retval = ENOTDIR;
+			goto exit;
+		}
+		/* Don't cache directory hardlink names. */
+		if (attr.ca_recflags & kHFSHasLinkChainMask) {
+			cnp->cn_flags &= ~MAKEENTRY;
+		}
+		/* Names with composed chars are not cached. */
+		if (cnp->cn_namelen != desc.cd_namelen)
+			cnp->cn_flags &= ~MAKEENTRY;
+
+		retval = hfs_getnewvnode(hfsmp, dvp, cnp, &desc, 0, &attr, &fork, &tvp, &newvnode_flags);
+
+		if (retval) {
+			/*
+			 * If this was a create/rename operation lookup, then by this point
+			 * we expected to see the item returned from hfs_getnewvnode above.  
+			 * In the create case, it would probably eventually bubble out an EEXIST 
+			 * because the item existed when we were trying to create it.  In the 
+			 * rename case, it would let us know that we need to go ahead and 
+			 * delete it as part of the rename.  However, if we hit the condition below
+			 * then it means that we found the element during cat_lookup above, but 
+			 * it is now no longer there.  We simply behave as though we never found
+			 * the element at all and return EJUSTRETURN.
+			 */  
+			if ((retval == ENOENT) &&
+					((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
+					(flags & ISLASTCN)) {
+				retval = EJUSTRETURN;
+			}
+			
+			/*
+			 * If this was a straight lookup operation, we may need to redrive the entire 
+			 * lookup starting from cat_lookup if the element was deleted as the result of 
+			 * a rename operation.  Since rename is supposed to guarantee atomicity, then
+			 * lookups cannot fail because the underlying element is deleted as a result of
+			 * the rename call -- either they returned the looked up element prior to rename
+			 * or return the newer element.  If we are in this region, then all we can do is add
+			 * workarounds to guarantee the latter case. The element has already been deleted, so
+			 * we just re-try the lookup to ensure the caller gets the most recent element.
+			 */
+			if ((retval == ENOENT) && (cnp->cn_nameiop == LOOKUP) &&
+				(newvnode_flags & (GNV_CHASH_RENAMED | GNV_CAT_DELETED))) {
+				if (dcp) {
+					hfs_unlock (dcp);
+				}
+				/* get rid of any name buffers that may have lingered from the cat_lookup call */
+				cat_releasedesc (&desc);
+				goto retry;
+			}
+
+			/* Also, re-drive the lookup if the item we looked up was a hardlink, and the number 
+			 * or name of hardlinks has changed in the interim between the cat_lookup above, and
+			 * our call to hfs_getnewvnode.  hfs_getnewvnode will validate the cattr we passed it
+			 * against what is actually in the catalog after the cnode is created.  If there were
+			 * any issues, it will bubble out ERECYCLE, which we need to swallow and use as the
+			 * key to redrive as well.  We need to special case this below because in this case, 
+			 * it needs to occur regardless of the type of lookup we're doing here.  
+			 */
+			if ((retval == ERECYCLE) && (newvnode_flags & GNV_CAT_ATTRCHANGED)) {
+				if (dcp) {
+					hfs_unlock (dcp);
+				}
+				/* get rid of any name buffers that may have lingered from the cat_lookup call */
+				cat_releasedesc (&desc);
+				retval = 0;
+				goto retry;
+			}
+
+			/* skip to the error-handling code if we can't retry */
+			goto exit;
+		}
+
+		/* 
+		 * Save the origin info for file and directory hardlinks.  Directory hardlinks 
+		 * need the origin for '..' lookups, and file hardlinks need it to ensure that 
+		 * competing lookups do not cause us to vend different hardlinks than the ones requested.
+		 */
+		if (ISSET(VTOC(tvp)->c_flag, C_HARDLINK))
+			hfs_savelinkorigin(VTOC(tvp), VTOC(dvp)->c_fileid);
+		*cnode_locked = 1;
+		*vpp = tvp;
+	}
+exit:
+	if (dcp) {
+		hfs_unlock(dcp);
+	}
+	cat_releasedesc(&desc);
+	return (retval);
+}
+
+
+
+/*
+ * Name caching works as follows:
+ *
+ * Names found by directory scans are retained in a cache
+ * for future reference.  It is managed LRU, so frequently
+ * used names will hang around.	 Cache is indexed by hash value
+ * obtained from (vp, name) where vp refers to the directory
+ * containing name.
+ *
+ * If it is a "negative" entry, (i.e. for a name that is known NOT to
+ * exist) the vnode pointer will be NULL.
+ *
+ * Upon reaching the last segment of a path, if the reference
+ * is for DELETE, or NOCACHE is set (rewrite), and the
+ * name is located in the cache, it will be dropped.
+ *
+ */
+
+int
+hfs_vnop_lookup(struct vnop_lookup_args *ap)
+{
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode *vp;
+	struct cnode *cp;
+	struct cnode *dcp;
+	struct hfsmount *hfsmp;
+	int error;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	int flags = cnp->cn_flags;
+	struct proc *p = vfs_context_proc(ap->a_context);
+	int force_casesensitive_lookup = proc_is_forcing_hfs_case_sensitivity(p);
+	int cnode_locked;
+	int fastdev_candidate = 0;
+	int auto_candidate = 0;
+
+	*vpp = NULL;
+	dcp = VTOC(dvp);
+	hfsmp = VTOHFS(dvp);
+
+	if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) || (dcp->c_attr.ca_recflags & kHFSFastDevCandidateMask)) ){
+		fastdev_candidate = 1;
+		auto_candidate = (vnode_isautocandidate(dvp) || (dcp->c_attr.ca_recflags & kHFSAutoCandidateMask));
+	}
+	
+
+	/*
+	 * Lookup an entry in the cache
+	 *
+	 * If the lookup succeeds, the vnode is returned in *vpp,
+	 * and a status of -1 is returned.
+	 *
+	 * If the lookup determines that the name does not exist
+	 * (negative cacheing), a status of ENOENT is returned.
+	 *
+	 * If the lookup fails, a status of zero is returned.
+	 */
+	error = cache_lookup(dvp, vpp, cnp);
+	if (error != -1) {
+		if ((error == ENOENT) && (cnp->cn_nameiop != CREATE))		
+			goto exit;	/* found a negative cache entry */
+		goto lookup;		/* did not find it in the cache */
+	}
+	/*
+	 * We have a name that matched
+	 * cache_lookup returns the vp with an iocount reference already taken
+	 */
+	error = 0;
+	vp = *vpp;
+	cp = VTOC(vp);
+	
+	/* We aren't allowed to vend out vp's via lookup to the hidden directory */
+	if (cp->c_cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+		cp->c_cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+		/* Drop the iocount from cache_lookup */
+		vnode_put (vp);
+		error = ENOENT;
+		goto exit;
+	}
+	
+	if (cp->c_attr.ca_recflags & kHFSDoNotFastDevPinMask) {
+		fastdev_candidate = 0;
+	}
+
+	/*
+	 * If this is a hard-link vnode then we need to update
+	 * the name (of the link), the parent ID, the cnid, the
+	 * text encoding and the catalog hint.  This enables
+	 * getattrlist calls to return the correct link info.
+	 */
+
+	/*
+	 * Alternatively, if we are forcing a case-sensitive lookup
+	 * on a case-insensitive volume, the namecache entry
+	 * may have been for an incorrect case. Since we cannot
+	 * determine case vs. normalization, redrive the catalog
+	 * lookup based on any byte mismatch.
+	 */
+	if (((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK))
+		|| (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE))) {
+		int stale_link = 0;
+
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);	
+		if ((cp->c_parentcnid != dcp->c_cnid) ||
+		    (cnp->cn_namelen != cp->c_desc.cd_namelen) ||
+		    (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0)) {
+			struct cat_desc desc;
+			struct cat_attr lookup_attr;
+			int lockflags;
+
+			if (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) {
+				/*
+				 * Since the name in the cnode doesn't match our lookup
+				 * string exactly, do a full lookup.
+				 */
+				hfs_unlock (cp);
+
+				vnode_put(vp);
+				goto lookup;
+			}
+
+			/*
+			 * Get an updated descriptor
+			 */
+			desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+			desc.cd_namelen = cnp->cn_namelen;
+			desc.cd_parentcnid = dcp->c_fileid;
+			desc.cd_hint = dcp->c_childhint;
+			desc.cd_encoding = 0;
+			desc.cd_cnid = 0;
+			desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0;
+
+			/*
+			 * Because lookups call replace_desc to put a new descriptor in
+			 * the cnode we are modifying it is possible that this cnode's 
+			 * descriptor is out of date for the parent ID / name that
+			 * we are trying to look up. (It may point to a different hardlink).
+			 *
+			 * We need to be cautious that when re-supplying the 
+			 * descriptor below that the results of the catalog lookup
+			 * still point to the same raw inode for the hardlink.  This would 
+			 * not be the case if we found something in the cache above but 
+			 * the vnode it returned no longer has a valid hardlink for the 
+			 * parent ID/filename combo we are requesting.  (This is because 
+			 * hfs_unlink does not directly trigger namecache removal). 
+			 *
+			 * As a result, before vending out the vnode (and replacing
+			 * its descriptor) verify that the fileID is the same by comparing
+			 * the in-cnode attributes vs. the one returned from the lookup call
+			 * below.  If they do not match, treat this lookup as if we never hit
+			 * in the cache at all.
+			 */
+
+			lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK);		
+		
+			error = cat_lookup(VTOHFS(vp), &desc, 0, 0, &desc, &lookup_attr, NULL, NULL);	
+			
+			hfs_systemfile_unlock(VTOHFS(dvp), lockflags);
+
+			/* 
+			 * Note that cat_lookup may fail to find something with the name provided in the
+			 * stack-based descriptor above. In that case, an ENOENT is a legitimate errno
+			 * to be placed in error, which will get returned in the fastpath below.
+			 */
+			if (error == 0) {
+				if (lookup_attr.ca_fileid == cp->c_attr.ca_fileid) {
+					/* It still points to the right raw inode.  Replacing the descriptor is fine */
+					replace_desc (cp, &desc);
+
+					/* 
+					 * Save the origin info for file and directory hardlinks.  Directory hardlinks 
+					 * need the origin for '..' lookups, and file hardlinks need it to ensure that 
+					 * competing lookups do not cause us to vend different hardlinks than the ones requested.
+					 */
+					hfs_savelinkorigin(cp, dcp->c_fileid);
+				}
+				else {
+					/* If the fileID does not match then do NOT replace the descriptor! */
+					stale_link = 1;
+				}	
+			}
+		}
+		hfs_unlock (cp);
+		
+		if (stale_link) {
+			/* 
+			 * If we had a stale_link, then we need to pretend as though
+			 * we never found this vnode and force a lookup through the 
+			 * traditional path.  Drop the iocount acquired through 
+			 * cache_lookup above and force a cat lookup / getnewvnode
+			 */
+			vnode_put(vp);
+			goto lookup;
+		}
+		
+		if (error) {
+			/* 
+			 * If the cat_lookup failed then the caller will not expect 
+			 * a vnode with an iocount on it.
+			 */
+			vnode_put(vp);
+		}
+
+	}	
+	goto exit;
+	
+lookup:
+	/*
+	 * The vnode was not in the name cache or it was stale.
+	 *
+	 * So we need to do a real lookup.
+	 */
+	cnode_locked = 0;
+
+	error = hfs_lookup(dvp, vpp, cnp, &cnode_locked, force_casesensitive_lookup);
+	
+	if (*vpp && (VTOC(*vpp)->c_attr.ca_recflags & kHFSDoNotFastDevPinMask)) {
+		fastdev_candidate = 0;
+	}
+
+	if (*vpp && (VTOC(*vpp)->c_attr.ca_recflags & kHFSAutoCandidateMask)) {
+		//printf("vp %s / %d is an auto-candidate\n", (*vpp)->v_name ? (*vpp)->v_name : "no-name", VTOC(*vpp)->c_fileid);
+		auto_candidate = 1;
+	}
+
+	if (cnode_locked)
+		hfs_unlock(VTOC(*vpp));
+exit:
+	if (*vpp && fastdev_candidate && !vnode_isfastdevicecandidate(*vpp)) {
+		vnode_setfastdevicecandidate(*vpp);
+		if (auto_candidate) {
+			vnode_setautocandidate(*vpp);
+		}
+	}
+
+	/*
+	 * check to see if we issued any I/O while completing this lookup and
+	 * this thread/task is throttleable... if so, throttle now
+	 *
+	 * this allows us to throttle in between multiple meta data reads that
+	 * might result due to looking up a long pathname (since we'll have to
+	 * re-enter hfs_vnop_lookup for each component of the pathnam not in
+	 * the VFS cache), instead of waiting until the entire path lookup has
+	 * completed and throttling at the systemcall return
+	 */
+	if (__builtin_expect(throttle_lowpri_window(), 0))
+		throttle_lowpri_io(1);
+
+	return (error);
+}
+
+
diff --git a/core/hfs_macos_defs.h b/core/hfs_macos_defs.h
new file mode 100644
index 0000000..029262d
--- /dev/null
+++ b/core/hfs_macos_defs.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __HFS_MACOS_TYPES__
+#define __HFS_MACOS_TYPES__
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+
+#include <sys/param.h>
+
+#include <libkern/OSTypes.h>
+#include <libkern/libkern.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+
+
+#define TARGET_OS_WIN32			0
+#define TARGET_OS_UNIX			0
+
+#define PRAGMA_IMPORT			0
+#define PRAGMA_STRUCT_ALIGN		1
+#define PRAGMA_ONCE			0
+#define PRAGMA_STRUCT_PACK		0
+#define PRAGMA_STRUCT_PACKPUSH		0
+
+#if __GNUC__ >= 2
+	#define TYPE_LONGLONG		1
+#else
+	#define TYPE_LONGLONG		0
+#endif
+#ifdef __cplusplus
+	#define TYPE_BOOL		1
+#else
+	#define TYPE_BOOL		0
+#endif
+
+#define EXTERN_API(_type)		extern _type
+#define EXTERN_API_C(_type)		extern _type
+
+#define CALLBACK_API_C(_type, _name)	_type ( * _name)
+
+#define TARGET_API_MACOS_X 1
+#define TARGET_API_MAC_OS8 0
+#define TARGET_API_MAC_CARBON 0
+	
+	
+
+/****** START OF MACOSTYPES *********/
+
+
+/*
+   4.4BSD's sys/types.h defines size_t without defining __size_t__:
+   Things are a lot clearer from here on if we define __size_t__ now.
+ */
+#define __size_t__
+
+/********************************************************************************
+
+	Special values in C
+	
+		NULL		The C standard for an impossible pointer value
+		nil			A carry over from pascal, NULL is prefered for C
+		
+*********************************************************************************/
+#ifndef NULL
+	#define	NULL 0
+#endif
+
+#ifndef nil
+	#define nil NULL
+#endif
+
+typedef char *			Ptr;
+typedef long 			Size;
+
+typedef int16_t 			OSErr;
+typedef u_int32_t 			ItemCount;
+typedef u_int32_t 			ByteCount;
+typedef u_int8_t *			BytePtr;
+typedef u_int32_t 			ByteOffset;
+
+typedef u_int16_t 			UniChar;
+typedef unsigned char 		Str255[256];
+typedef unsigned char 		Str31[32];
+typedef unsigned char *		StringPtr;
+typedef const unsigned char *	ConstStr255Param;
+typedef const unsigned char *	ConstStr31Param;
+typedef const unsigned char *	ConstUTF8Param;
+
+typedef u_int8_t 			Byte;
+
+typedef u_int32_t 			TextEncoding;
+typedef UniChar *			UniCharArrayPtr;
+typedef const UniChar *		ConstUniCharArrayPtr;
+
+
+/********************************************************************************
+
+	Boolean types and values
+	
+		Boolean			A one byte value, holds "false" (0) or "true" (1)
+		false			The Boolean value of zero (0)
+		true			The Boolean value of one (1)
+		
+*********************************************************************************/
+/*
+	The identifiers "true" and "false" are becoming keywords in C++
+	and work with the new built-in type "bool"
+	"Boolean" will remain an unsigned char for compatibility with source
+	code written before "bool" existed.
+*/
+#if !TYPE_BOOL && !__bool_true_false_are_defined
+
+enum {
+	false						= 0,
+	true						= 1
+};
+
+#endif  /*  !TYPE_BOOL */
+
+
+EXTERN_API( void ) DebugStr(const char * debuggerMsg);
+
+/*********************************************************************************
+
+	Added types for HFSPlus MacOS X functionality. Needs to be incorporated to
+	other places
+		
+*********************************************************************************/
+
+typedef struct vnode* FileReference;
+
+
+/***** START OF MACOSSTUBS ********/
+
+
+/*
+	SizeTDef.h -- Common definitions
+	
+	size_t - this type is defined by several ANSI headers.
+*/
+#if ! defined (__size_t__)
+	#define __size_t__
+        #if defined (__xlc) || defined (__xlC) || defined (__xlC__) || defined (__MWERKS__)
+		typedef unsigned long size_t;
+        #else	/* __xlC */
+		typedef unsigned int size_t;
+	#endif	/* __xlC */
+#endif	/* __size_t__ */
+
+
+/*
+ 	File:		Errors.h
+ 
+*/
+enum {
+	noErr			= 0,
+	dskFulErr		= -34,		/*disk full*/
+	bdNamErr		= -37,		/*there may be no bad names in the final system!*/
+	paramErr		= -50,		/*error in user parameter list*/
+	memFullErr		= -108,		/*Not enough room in heap zone*/
+	fileBoundsErr		= -1309,	/*file's EOF, offset, mark or size is too big*/
+	kTECUsedFallbacksStatus	= -8783,
+
+};
+
+
+enum {
+	/* Finder Flags */
+	kHasBeenInited		= 0x0100,
+	kHasCustomIcon		= 0x0400,
+	kIsStationery		= 0x0800,
+	kNameLocked		= 0x1000,
+	kHasBundle		= 0x2000,
+	kIsInvisible		= 0x4000,
+	kIsAlias		= 0x8000
+};
+
+enum {
+	fsRtParID	= 1,
+	fsRtDirID	= 2
+};
+
+
+enum {
+	/* Mac OS encodings*/
+	kTextEncodingMacRoman		= 0L,
+	kTextEncodingMacJapanese	= 1,
+	kTextEncodingMacChineseTrad	= 2,
+	kTextEncodingMacKorean		= 3,
+	kTextEncodingMacArabic		= 4,
+	kTextEncodingMacHebrew		= 5,
+	kTextEncodingMacGreek		= 6,
+	kTextEncodingMacCyrillic	= 7,
+	kTextEncodingMacDevanagari	= 9,
+	kTextEncodingMacGurmukhi	= 10,
+	kTextEncodingMacGujarati	= 11,
+	kTextEncodingMacOriya		= 12,
+	kTextEncodingMacBengali		= 13,
+	kTextEncodingMacTamil		= 14,
+	kTextEncodingMacTelugu		= 15,
+	kTextEncodingMacKannada		= 16,
+	kTextEncodingMacMalayalam	= 17,
+	kTextEncodingMacSinhalese	= 18,
+	kTextEncodingMacBurmese		= 19,
+	kTextEncodingMacKhmer		= 20,
+	kTextEncodingMacThai		= 21,
+	kTextEncodingMacLaotian		= 22,
+	kTextEncodingMacGeorgian	= 23,
+	kTextEncodingMacArmenian	= 24,
+	kTextEncodingMacChineseSimp	= 25,
+	kTextEncodingMacTibetan		= 26,
+	kTextEncodingMacMongolian	= 27,
+	kTextEncodingMacEthiopic	= 28,
+	kTextEncodingMacCentralEurRoman = 29,
+	kTextEncodingMacVietnamese	= 30,
+	kTextEncodingMacExtArabic	= 31,	/* The following use script code 0, smRoman*/
+	kTextEncodingMacSymbol		= 33,
+	kTextEncodingMacDingbats	= 34,
+	kTextEncodingMacTurkish		= 35,
+	kTextEncodingMacCroatian	= 36,
+	kTextEncodingMacIcelandic	= 37,
+	kTextEncodingMacRomanian	= 38,					
+	kTextEncodingMacUnicode		= 0x7E,
+
+	kTextEncodingMacFarsi		= 0x8C,	/* Like MacArabic but uses Farsi digits */														/* The following use script code 7, smCyrillic */
+	kTextEncodingMacUkrainian	= 0x98,	/* The following use script code 32, smUnimplemented */
+};
+
+
+/* PROTOTYPES */
+
+#if DEBUG
+	extern void RequireFileLock(FileReference vp, int shareable);
+	#define REQUIRE_FILE_LOCK(vp,s) RequireFileLock((vp),(s))
+#else
+	#define REQUIRE_FILE_LOCK(vp,s)
+#endif
+
+
+EXTERN_API( void )
+BlockMoveData(const void * srcPtr, void * destPtr, Size byteCount);
+
+#define BlockMoveData(src, dest, len)	bcopy((src), (dest), (len))
+
+EXTERN_API_C( void )
+ClearMemory(void * start, u_int32_t length);
+
+#define ClearMemory(start, length)	bzero((start), (size_t)(length));
+
+/*
+ * The maximum number UTF-16 code units required to represent a HFS
+ * standard file name.  The derivation for this number is not
+ * documented; it has been this value for some time.  Mark, our
+ * resident Unicode expert, says "I'm not entirely certain, but I
+ * think it is the worst case for Korean Hangul conjoining jamos. The
+ * '15' is because a Str31 can contain at most 15 two-byte characters
+ * (in MacKorean encoding).  Worst case, each one of those characters
+ * gets normalized to up to 5 UTF-16 code points.  Each character is
+ * composed of up to three jamos; up to two of those jamos might not
+ * be in Unicode plane 0, which means they can take two UTF-16 code
+ * points (each) to represent. So your '5' is '2 + 2 + 1'."  Sounds
+ * plausible!  Safe to ship it, I say!
+ */
+#define MAX_HFS_UNICODE_CHARS	(15*5)
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif	/* __HFS_MACOS_TYPES__ */
diff --git a/core/hfs_mount.h b/core/hfs_mount.h
new file mode 100644
index 0000000..abbe61a
--- /dev/null
+++ b/core/hfs_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1997-2002 Apple Inc. All Rights Reserved
+ *
+ */
+
+#ifndef _HFS_MOUNT_H_
+#define _HFS_MOUNT_H_
+
+#include <sys/appleapiopts.h>
+
+#include <sys/mount.h>
+#include <sys/time.h>
+
+/*
+ * Arguments to mount HFS-based filesystems
+ */
+
+#define OVERRIDE_UNKNOWN_PERMISSIONS 0
+
+#define UNKNOWNUID ((uid_t)99)
+#define UNKNOWNGID ((gid_t)99)
+#define UNKNOWNPERMISSIONS (S_IRWXU | S_IROTH | S_IXOTH)		/* 705 */
+
+#ifdef __APPLE_API_UNSTABLE
+struct hfs_mount_args {
+#ifndef KERNEL
+	char	*fspec;			/* block special device to mount */
+#endif
+	uid_t	hfs_uid;		/* uid that owns hfs files (standard HFS only) */
+	gid_t	hfs_gid;		/* gid that owns hfs files (standard HFS only) */
+	mode_t	hfs_mask;		/* mask to be applied for hfs perms  (standard HFS only) */
+	u_int32_t hfs_encoding;	/* encoding for this volume (standard HFS only) */
+	struct	timezone hfs_timezone;	/* user time zone info (standard HFS only) */
+	int		flags;			/* mounting flags, see below */
+	int     journal_tbuffer_size;   /* size in bytes of the journal transaction buffer */
+	int		journal_flags;          /* flags to pass to journal_open/create */
+	int		journal_disable;        /* don't use journaling (potentially dangerous) */
+};
+
+#define HFSFSMNT_NOXONFILES	0x1	/* disable execute permissions for files */
+#define HFSFSMNT_WRAPPER	0x2	/* mount HFS wrapper (if it exists) */
+#define HFSFSMNT_EXTENDED_ARGS  0x4     /* indicates new fields after "flags" are valid */
+
+/*
+ * Sysctl values for HFS
+ */
+#define HFS_ENCODINGBIAS	1	    /* encoding matching CJK bias */
+#define HFS_EXTEND_FS		2
+#define HFS_ENABLE_JOURNALING   0x082969
+#define HFS_DISABLE_JOURNALING  0x031272
+#define HFS_REPLAY_JOURNAL	0x6a6e6c72
+#define HFS_ENABLE_RESIZE_DEBUG 4	/* enable debug code for volume resizing */
+
+#endif /* __APPLE_API_UNSTABLE */
+
+#endif /* ! _HFS_MOUNT_H_ */
diff --git a/core/hfs_notification.c b/core/hfs_notification.c
new file mode 100644
index 0000000..614c32d
--- /dev/null
+++ b/core/hfs_notification.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2003-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/ubc.h>
+#include <sys/quota.h>
+
+#include <sys/kdebug.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+#include "hfs_dbg.h"
+#include "hfs_mount.h"
+#include "hfs_quota.h"
+#include "hfs_endian.h"
+
+#include "BTreesInternal.h"
+#include "FileMgrInternal.h"
+
+
+
+void hfs_generate_volume_notifications(struct hfsmount *hfsmp) 
+{
+	fsid_t fsid;
+	u_int32_t freeblks, state=999;
+
+	/* Do not generate low disk notifications for read-only volumes */
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		return;
+	}
+
+	fsid.val[0] = hfsmp->hfs_raw_dev;
+	fsid.val[1] = vfs_typenum(HFSTOVFS(hfsmp));
+	
+	freeblks = hfs_freeblks(hfsmp, 1);
+
+	/*
+	 * Find the theshold the number of free blocks fits into.
+	 * We fire upon reaching a level below desired only once,
+	 * except for when we reach the low disk or near low disk levels
+	 * from below, in which case we do not fire unless we have also
+	 * reached the desired disk level (hysteresis).
+	 * This is illustrated in the following diagram:
+	 *
+	 * fire ^
+	 * --------- desired level
+	 *      |
+	 *
+	 *
+	 *      |
+	 * --------- near low disk level
+	 * fire v
+	 *
+	 *
+	 *      |
+	 * --------- low disk level
+	 * fire v
+	 *
+	 *
+	 *      | ^ fire
+	 * --------- very low disk level
+	 * fire v |
+	 *
+	 */
+	if (freeblks < hfsmp->hfs_freespace_notify_dangerlimit) {
+		state = 4;
+	} else if (freeblks < hfsmp->hfs_freespace_notify_warninglimit) {
+		state = 3;
+	} else if (freeblks < hfsmp->hfs_freespace_notify_nearwarninglimit) {
+		state = 2;
+	} else if (freeblks < hfsmp->hfs_freespace_notify_desiredlevel) {
+		/* We are between the near low disk and desired levels */
+		state = 1;
+	} else if (freeblks >= hfsmp->hfs_freespace_notify_desiredlevel) {
+		state = 0;
+	}
+
+	/* Free blocks are less than dangerlimit for the first time */
+	if (state == 4 && !(hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK)) {
+		/* Dump some logging to track down intermittent issues */
+		printf("hfs: set VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_dangerlimit);
+
+#if HFS_SPARSE_DEV
+		// If we're a sparse device, dump some info about the backing store..
+		hfs_lock_mount(hfsmp);
+		vnode_t backing_vp = hfsmp->hfs_backingvp;
+		if (backing_vp && vnode_get(backing_vp) != 0)
+			backing_vp = NULL;
+		hfs_unlock_mount(hfsmp);
+
+		if (backing_vp) {
+			struct vfsstatfs *sfs = vfs_statfs(vnode_mount(backing_vp));
+			printf("hfs: set VeryLowDisk: vol:%s, backingstore b_avail:%lld, tag:%d\n",
+				   hfsmp->vcbVN, sfs->f_bavail, vnode_tag(backing_vp));
+			vnode_put(backing_vp);
+		}
+#endif
+
+		hfsmp->hfs_notification_conditions |= (VQ_VERYLOWDISK|VQ_LOWDISK|VQ_NEARLOWDISK);
+		vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+	} else if (state == 3) {
+		/* Free blocks are less than warning limit for the first time */
+		if (!(hfsmp->hfs_notification_conditions & VQ_LOWDISK)) {
+			printf("hfs: set LowDisk: vol:%s, freeblks:%d, warninglimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_warninglimit);
+			hfsmp->hfs_notification_conditions |= (VQ_LOWDISK|VQ_NEARLOWDISK);
+			vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+		} else if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) {
+			/* Free blocks count has increased from danger limit to warning limit, so just clear VERYLOWDISK warning */
+			printf("hfs: clear VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_dangerlimit);
+			hfsmp->hfs_notification_conditions &= ~VQ_VERYLOWDISK;
+			vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+		}
+	} else if (state == 2) {
+		/* Free blocks are less than the near warning limit for the first time */
+		if (!(hfsmp->hfs_notification_conditions & VQ_NEARLOWDISK)) {
+			printf("hfs: set NearLowDisk: vol:%s, freeblks:%d, nearwarninglimit:%d\n", hfsmp->vcbVN, freeblks,
+				   hfsmp->hfs_freespace_notify_nearwarninglimit);
+
+			hfsmp->hfs_notification_conditions |= VQ_NEARLOWDISK;
+			vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+		} else {
+			/* Free blocks count has increased from warning/danger limit to near warning limit,
+			 * so clear VERYLOWDISK / LOWDISK warnings, and signal if we clear VERYLOWDISK */
+			hfsmp->hfs_notification_conditions &= ~VQ_LOWDISK;
+			if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) {
+				printf("hfs: clear VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks,
+					   hfsmp->hfs_freespace_notify_dangerlimit);
+
+				hfsmp->hfs_notification_conditions &= ~VQ_VERYLOWDISK;
+				vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+			}
+		}
+	} else if (state == 1) {
+		/* Free blocks are less than the desireable level, but more than the near warning level
+		 * In this case, we may have to notify if we were previously underneath the danger limit */
+		if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) {
+			printf("hfs: clear VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks,
+				   hfsmp->hfs_freespace_notify_dangerlimit);
+
+			hfsmp->hfs_notification_conditions &= ~VQ_VERYLOWDISK;
+			vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+		}
+	} else if (state == 0) {
+		/* Free blocks count has increased to desirable level, so clear all conditions */
+		if (hfsmp->hfs_notification_conditions & (VQ_NEARLOWDISK|VQ_LOWDISK|VQ_VERYLOWDISK)) {
+			if (hfsmp->hfs_notification_conditions & VQ_NEARLOWDISK) {
+				printf("hfs: clear NearLowDisk: vol:%s, freeblks:%d, nearwarninglimit:%d, desiredlevel:%d\n", hfsmp->vcbVN,
+					   freeblks, hfsmp->hfs_freespace_notify_nearwarninglimit, hfsmp->hfs_freespace_notify_desiredlevel);
+			}
+			if (hfsmp->hfs_notification_conditions & VQ_LOWDISK) {
+				printf("hfs: clear LowDisk: vol:%s, freeblks:%d, warninglimit:%d, desiredlevel:%d\n", hfsmp->vcbVN, freeblks,
+					   hfsmp->hfs_freespace_notify_warninglimit, hfsmp->hfs_freespace_notify_desiredlevel);
+			}
+			if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) {
+				printf("hfs: clear VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_warninglimit);
+			} 
+			hfsmp->hfs_notification_conditions &= ~(VQ_VERYLOWDISK|VQ_LOWDISK|VQ_NEARLOWDISK);
+			if (hfsmp->hfs_notification_conditions == 0) {
+				vfs_event_signal(&fsid, VQ_UPDATE|VQ_DESIRED_DISK, (intptr_t)NULL);
+			} else {
+				vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
+			}
+		}
+	}
+}
diff --git a/core/hfs_quota.c b/core/hfs_quota.c
new file mode 100644
index 0000000..da47b9c
--- /dev/null
+++ b/core/hfs_quota.c
@@ -0,0 +1,1014 @@
+/*
+ * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hfs_quota.c
+ *	derived from @(#)ufs_quota.c	8.5 (Berkeley) 5/20/95
+ */
+
+#if QUOTA
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/kauth.h>
+#include <sys/vnode.h>
+#include <sys/quota.h>
+#include <sys/vm.h>
+
+#include "hfs.h"
+#include "hfs_cnode.h"
+#include "hfs_quota.h"
+#include "hfs_mount.h"
+
+
+/*
+ * Quota name to error message mapping.
+ */
+#if 0
+static char *quotatypes[] = INITQFNAMES;
+#endif
+
+/*
+ * Set up the quotas for a cnode.
+ *
+ * This routine completely defines the semantics of quotas.
+ * If other criterion want to be used to establish quotas, the
+ * MAXQUOTAS value in quotas.h should be increased, and the
+ * additional dquots set up here.
+ */
+int
+hfs_getinoquota(cp)
+	register struct cnode *cp;
+{
+	struct hfsmount *hfsmp;
+	struct vnode *vp;
+	int error;
+	int drop_usrquota = false;
+
+	vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp;
+	hfsmp = VTOHFS(vp);
+	/*
+	 * Set up the user quota based on file uid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if (cp->c_dquot[USRQUOTA] == NODQUOT) {
+		error = dqget(cp->c_uid, &hfsmp->hfs_qfiles[USRQUOTA], USRQUOTA, &cp->c_dquot[USRQUOTA]);
+		if ((error != 0) && (error != EINVAL)) {
+			return error;
+		} else if (error == 0) {
+			drop_usrquota = true;
+		}
+	}
+
+	/*
+	 * Set up the group quota based on file gid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if (cp->c_dquot[GRPQUOTA] == NODQUOT) {
+	       error = dqget(cp->c_gid, &hfsmp->hfs_qfiles[GRPQUOTA], GRPQUOTA, &cp->c_dquot[GRPQUOTA]);
+	       if ((error != 0) && (error != EINVAL)) {
+		       if (drop_usrquota == true) {
+			       dqrele(cp->c_dquot[USRQUOTA]);
+			       cp->c_dquot[USRQUOTA] = NODQUOT;
+		       }
+		       return error;
+	       }
+	}
+
+	return (0);
+}
+
+/*
+ * Update disk usage, and take corrective action.
+ */
+int
+hfs_chkdq(cp, change, cred, flags)
+	register struct cnode *cp;
+	int64_t change;
+	kauth_cred_t cred;
+	int flags;
+{
+	register struct dquot *dq;
+	register int i;
+	int64_t ncurbytes;
+	int error=0;
+	struct proc *p;
+
+#if DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		hfs_chkdquot(cp);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = cp->c_dquot[i]) == NODQUOT)
+				continue;
+			dqlock(dq);
+
+			ncurbytes = dq->dq_curbytes + change;
+			if (ncurbytes >= 0)
+				dq->dq_curbytes = ncurbytes;
+			else
+				dq->dq_curbytes = 0;
+			dq->dq_flags &= ~DQ_BLKS;
+			dq->dq_flags |= DQ_MOD;
+
+			dqunlock(dq);
+		}
+		return (0);
+	}
+	p = current_proc();
+	/*
+	 * This use of proc_ucred() is safe because kernproc credential never
+	 * changes.
+	 */
+	if (!IS_VALID_CRED(cred))
+		cred = proc_ucred(kernproc);
+	if (suser(cred, NULL) || proc_forcequota(p)) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = cp->c_dquot[i]) == NODQUOT)
+				continue;
+			error = hfs_chkdqchg(cp, change, cred, i);
+			if (error) {
+				break;
+			}
+		}
+	}
+	if ((flags & FORCE) || error == 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = cp->c_dquot[i]) == NODQUOT)
+				continue;
+			dqlock(dq);
+
+			dq->dq_curbytes += change;
+			dq->dq_flags |= DQ_MOD;
+
+			dqunlock(dq);
+		}
+	}
+	return (error);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message and vfs event if appropriate.
+ */
+int
+hfs_chkdqchg(cp, change, cred, type)
+	struct cnode *cp;
+	int64_t change;
+	kauth_cred_t cred;
+	int type;
+{
+	register struct dquot *dq = cp->c_dquot[type];
+	u_int64_t ncurbytes;
+	struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp;
+
+	fsid_t fsid;
+	fsid.val[0] = VTOHFS(vp)->hfs_raw_dev;
+	fsid.val[1] = vfs_typenum(VTOVFS(vp));
+
+	dqlock(dq);
+	
+	ncurbytes = dq->dq_curbytes + change;
+	/*
+	 * If user would exceed their hard limit, disallow space allocation.
+	 */
+	if (ncurbytes >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
+		if ((dq->dq_flags & DQ_BLKS) == 0 &&
+		    cp->c_uid == kauth_cred_getuid(cred)) {
+#if 0	
+			printf("\nhfs: write failed, %s disk limit reached\n",
+			    quotatypes[type]);
+#endif
+			dq->dq_flags |= DQ_BLKS;
+			vfs_event_signal(&fsid, VQ_QUOTA, (intptr_t)NULL);
+		}
+		dqunlock(dq);
+
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow space
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurbytes >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
+		struct timeval tv;
+
+		microuptime(&tv);
+		if (dq->dq_curbytes < dq->dq_bsoftlimit) {
+			dq->dq_btime = tv.tv_sec +
+			    VTOHFS(vp)->hfs_qfiles[type].qf_btime;
+#if 0
+			if (cp->c_uid == kauth_cred_getuid(cred))
+				printf("\nhfs: warning, %s %s\n",
+				    quotatypes[type], "disk quota exceeded");
+#endif
+			vfs_event_signal(&fsid, VQ_QUOTA, (intptr_t)NULL);
+			dqunlock(dq);
+
+			return (0);
+		}
+		if (tv.tv_sec > (time_t)dq->dq_btime) {
+			if ((dq->dq_flags & DQ_BLKS) == 0 &&
+			    cp->c_uid == kauth_cred_getuid(cred)) {
+#if 0
+				printf("\nhfs: write failed, %s %s\n",
+				    quotatypes[type],
+				    "disk quota exceeded for too long");
+#endif
+				dq->dq_flags |= DQ_BLKS;
+				vfs_event_signal(&fsid, VQ_QUOTA, (intptr_t)NULL);
+			}
+			dqunlock(dq);
+
+			return (EDQUOT);
+		}
+	}
+	dqunlock(dq);
+
+	return (0);
+}
+
+/*
+ * Check the inode limit, applying corrective action.
+ */
+int
+hfs_chkiq(cp, change, cred, flags)
+	register struct cnode *cp;
+	int32_t change;
+	kauth_cred_t cred;
+	int flags;
+{
+	register struct dquot *dq;
+	register int i;
+	int ncurinodes, error=0;
+	struct proc *p;
+
+#if DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		hfs_chkdquot(cp);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = cp->c_dquot[i]) == NODQUOT)
+				continue;
+			dqlock(dq);
+
+			ncurinodes = dq->dq_curinodes + change;
+			if (ncurinodes >= 0)
+				dq->dq_curinodes = ncurinodes;
+			else
+				dq->dq_curinodes = 0;
+			dq->dq_flags &= ~DQ_INODS;
+			dq->dq_flags |= DQ_MOD;
+
+			dqunlock(dq);
+		}
+		return (0);
+	}
+	p = current_proc();
+	/*
+	 * This use of proc_ucred() is safe because kernproc credential never
+	 * changes.
+	 */
+	if (!IS_VALID_CRED(cred))
+		cred = proc_ucred(kernproc);
+	if (suser(cred, NULL) || proc_forcequota(p)) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = cp->c_dquot[i]) == NODQUOT)
+				continue;
+			error = hfs_chkiqchg(cp, change, cred, i);
+			if (error) {
+				break;
+			}
+		}
+	}
+	if ((flags & FORCE) || error == 0) { 
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = cp->c_dquot[i]) == NODQUOT)
+				continue;
+			dqlock(dq);
+
+			dq->dq_curinodes += change;
+			dq->dq_flags |= DQ_MOD;
+
+			dqunlock(dq);
+		}
+	}
+	return (error);
+}
+
+
+/*
+ * Check to see if a change to a user's allocation should be permitted or not.
+ * Issue an error message if it should not be permitted.  Return 0 if 
+ * it should be allowed.
+ */
+int hfs_isiqchg_allowed(dq, hfsmp, change, cred, type, uid)
+	struct dquot* dq;
+	struct hfsmount* hfsmp;
+	int32_t change;
+	kauth_cred_t cred;
+	int type;
+	uid_t uid;
+{
+	u_int32_t ncurinodes;
+
+	fsid_t fsid;
+	fsid.val[0] = hfsmp->hfs_raw_dev;
+	fsid.val[1] = vfs_typenum(HFSTOVFS(hfsmp));
+
+	dqlock(dq);
+
+	ncurinodes = dq->dq_curinodes + change;
+	/*
+	 * If user would exceed their hard limit, disallow cnode allocation.
+	 */
+	if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
+		if ((dq->dq_flags & DQ_INODS) == 0 &&
+		    uid == kauth_cred_getuid(cred)) {
+			dq->dq_flags |= DQ_INODS;
+			vfs_event_signal(&fsid, VQ_QUOTA, (intptr_t)NULL);
+		}
+		dqunlock(dq);
+
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow cnode
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
+		struct timeval tv;
+		
+		microuptime(&tv);
+		if (dq->dq_curinodes < dq->dq_isoftlimit) {
+			dq->dq_itime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_itime;
+			vfs_event_signal(&fsid, VQ_QUOTA, (intptr_t)NULL);
+			dqunlock(dq);
+			return (0);
+		}
+		if (tv.tv_sec > (time_t)dq->dq_itime) {
+			if (((dq->dq_flags & DQ_INODS) == 0) &&
+			    (uid == kauth_cred_getuid(cred))) {
+				dq->dq_flags |= DQ_INODS;
+				vfs_event_signal(&fsid, VQ_QUOTA, (intptr_t)NULL);
+			}
+			dqunlock(dq);
+
+			return (EDQUOT);
+		}
+	}
+	dqunlock(dq);
+
+	return (0);
+}
+
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+hfs_chkiqchg(cp, change, cred, type)
+	struct cnode *cp;
+	int32_t change;
+	kauth_cred_t cred;
+	int type;
+{
+	register struct dquot *dq = cp->c_dquot[type];
+	u_int32_t ncurinodes;
+	struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp;
+
+	dqlock(dq);
+
+	ncurinodes = dq->dq_curinodes + change;
+	/*
+	 * If user would exceed their hard limit, disallow cnode allocation.
+	 */
+	if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
+		if ((dq->dq_flags & DQ_INODS) == 0 &&
+		    cp->c_uid == kauth_cred_getuid(cred)) {
+#if 0
+			printf("\nhfs: write failed, %s cnode limit reached\n",
+			    quotatypes[type]);
+#endif
+			dq->dq_flags |= DQ_INODS;
+		}
+		dqunlock(dq);
+
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow cnode
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
+		struct timeval tv;
+		
+		microuptime(&tv);
+		if (dq->dq_curinodes < dq->dq_isoftlimit) {
+			dq->dq_itime = tv.tv_sec +
+			    VTOHFS(vp)->hfs_qfiles[type].qf_itime;
+#if 0
+			if (cp->c_uid == kauth_cred_getuid(cred))
+				printf("\nhfs: warning, %s %s\n",
+				    quotatypes[type], "cnode quota exceeded");
+#endif
+			dqunlock(dq);
+
+			return (0);
+		}
+		if (tv.tv_sec > (time_t)dq->dq_itime) {
+			if ((dq->dq_flags & DQ_INODS) == 0 &&
+			    cp->c_uid == kauth_cred_getuid(cred)) {
+#if 0
+				printf("\nhfs: write failed, %s %s\n",
+				    quotatypes[type],
+				    "cnode quota exceeded for too long");
+#endif
+				dq->dq_flags |= DQ_INODS;
+			}
+			dqunlock(dq);
+
+			return (EDQUOT);
+		}
+	}
+	dqunlock(dq);
+
+	return (0);
+}
+
+#if DIAGNOSTIC
+/*
+ * On filesystems with quotas enabled, it is an error for a file to change
+ * size and not to have a dquot structure associated with it.
+ */
+void
+hfs_chkdquot(cp)
+	register struct cnode *cp;
+{
+	struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	register int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
+			continue;
+		if (cp->c_dquot[i] == NODQUOT) {
+			vprint("chkdquot: missing dquot", vp);
+			panic("missing dquot");
+		}
+	}
+}
+#endif
+
+/*
+ * Code to process quotactl commands.
+ */
+
+/*
+ * Q_QUOTAON - set up a quota file for a particular file system.
+ */
+struct hfs_quotaon_cargs {
+        int	error;
+};
+
+static int
+hfs_quotaon_callback(struct vnode *vp, void *cargs)
+{
+	struct hfs_quotaon_cargs *args;
+
+	args = (struct hfs_quotaon_cargs *)cargs;
+
+	args->error = hfs_getinoquota(VTOC(vp));
+	if (args->error)
+	        return (VNODE_RETURNED_DONE);
+
+	return (VNODE_RETURNED);
+}
+
+int
+hfs_quotaon(p, mp, type, fnamep)
+	struct proc *p;
+	struct mount *mp;
+	register int type;
+	caddr_t fnamep;
+{
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	struct quotafile *qfp;
+	struct vnode *vp;
+	int error = 0;
+	struct hfs_quotaon_cargs args;
+
+	/* Finish setting up quota structures. */
+	dqhashinit();
+
+	qfp = &hfsmp->hfs_qfiles[type];
+
+	if ( (qf_get(qfp, QTF_OPENING)) )
+	        return (0);
+
+	error = vnode_open(fnamep, FREAD|FWRITE, 0, 0, &vp, NULL);
+	if (error) {
+		goto out;
+	}
+	if (!vnode_isreg(vp)) {
+		(void) vnode_close(vp, FREAD|FWRITE, NULL);
+		error = EACCES;
+		goto out;
+	}
+	vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_QUOTA));
+	hfs_lock_mount (hfsmp);
+	hfsmp->hfs_flags |= HFS_QUOTAS;
+	hfs_unlock_mount (hfsmp);
+	vnode_setnoflush(vp);
+	/*
+	 * Save the credential of the process that turned on quotas.
+	 */
+	qfp->qf_cred = kauth_cred_proc_ref(p);
+	qfp->qf_vp = vp;
+	/*
+	 * Finish initializing the quota file
+	 */
+	error = dqfileopen(qfp, type);
+	if (error) {
+		(void) vnode_close(vp, FREAD|FWRITE, NULL);
+
+		if (IS_VALID_CRED(qfp->qf_cred))
+		        kauth_cred_unref(&qfp->qf_cred);
+	        qfp->qf_vp = NULLVP;
+		goto out;
+	}
+	qf_put(qfp, QTF_OPENING);
+
+	/*
+	 * Search vnodes associated with this mount point,
+	 * adding references to quota file being opened.
+	 * NB: only need to add dquot's for cnodes being modified.
+	 *
+	 * hfs_quota_callback will be called for each vnode open for
+	 * 'write' (VNODE_WRITEABLE) hung off of this mount point
+	 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and 
+	 * properly referenced and unreferenced around the callback
+	 */
+	args.error = 0;
+
+	vnode_iterate(mp, VNODE_WRITEABLE | VNODE_WAIT, hfs_quotaon_callback, (void *)&args);
+	
+	error = args.error;
+
+	if (error) {
+		hfs_quotaoff(p, mp, type);
+	}
+	return (error);
+
+out:
+	qf_put(qfp, QTF_OPENING);
+
+	return (error);
+}
+
+
+/*
+ * Q_QUOTAOFF - turn off disk quotas for a filesystem.
+ */
+struct hfs_quotaoff_cargs {
+        int	type;
+};
+
+static int
+hfs_quotaoff_callback(struct vnode *vp, void *cargs)
+{
+	struct hfs_quotaoff_cargs *args;
+	struct cnode *cp;
+	struct dquot *dq;
+
+	args = (struct hfs_quotaoff_cargs *)cargs;
+
+	cp = VTOC(vp);
+
+	dq = cp->c_dquot[args->type];
+	cp->c_dquot[args->type] = NODQUOT;
+
+	dqrele(dq);
+
+	return (VNODE_RETURNED);
+}
+
+int
+hfs_quotaoff(__unused struct proc *p, struct mount *mp, register int type)
+{
+	struct vnode *qvp;
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	struct quotafile *qfp;
+	int error;
+	struct hfs_quotaoff_cargs args;
+
+	/*
+	 * If quotas haven't been initialized, there's no work to be done.
+	 */
+	if (!dqisinitialized())
+		return (0);
+
+	qfp = &hfsmp->hfs_qfiles[type];
+	
+	if ( (qf_get(qfp, QTF_CLOSING)) )
+	        return (0);
+	qvp = qfp->qf_vp;
+
+	/*
+	 * Sync out any orpaned dirty dquot entries.
+	 */
+	dqsync_orphans(qfp);
+
+	/*
+	 * Search vnodes associated with this mount point,
+	 * deleting any references to quota file being closed.
+     *
+	 * hfs_quotaoff_callback will be called for each vnode
+	 * hung off of this mount point
+	 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and 
+	 * properly referenced and unreferenced around the callback
+	 */
+	args.type = type;
+
+	vnode_iterate(mp, VNODE_WAIT, hfs_quotaoff_callback, (void *)&args);
+
+	dqflush(qvp);
+	/* Finish tearing down the quota file */
+	dqfileclose(qfp, type);
+
+	vnode_clearnoflush(qvp);
+	error = vnode_close(qvp, FREAD|FWRITE, NULL);
+
+	qfp->qf_vp = NULLVP;
+
+	if (IS_VALID_CRED(qfp->qf_cred))
+		kauth_cred_unref(&qfp->qf_cred);
+	for (type = 0; type < MAXQUOTAS; type++)
+		if (hfsmp->hfs_qfiles[type].qf_vp != NULLVP)
+			break;
+	if (type == MAXQUOTAS) {
+		vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_QUOTA));
+		hfs_lock_mount (hfsmp);
+		hfsmp->hfs_flags &= ~HFS_QUOTAS;
+		hfs_unlock_mount (hfsmp);
+	}
+
+	qf_put(qfp, QTF_CLOSING);
+
+	return (error);
+}
+
+/*
+ * hfs_quotacheck - checks quotas mountwide for a hypothetical situation.  It probes
+ * the quota data structures to see if adding an inode would be allowed or not.  If it
+ * will be allowed, the change is made.  Otherwise, it reports an error back out so the
+ * caller will know not to proceed with inode allocation in the HFS Catalog.
+ * 
+ * Note that this function ONLY tests for addition of inodes, not subtraction.
+ */
+int hfs_quotacheck(hfsmp, change, uid, gid, cred)
+	struct hfsmount *hfsmp;
+	int change;
+	uid_t uid;
+	gid_t gid;
+	kauth_cred_t cred;
+{
+	struct dquot *dq = NULL;
+	struct proc *p;
+	int error = 0;
+	int i;
+	id_t id = uid;
+
+	p = current_proc();
+	if (!IS_VALID_CRED(cred)) {
+		/* This use of proc_ucred() is safe because kernproc credential never changes */
+		cred = proc_ucred(kernproc);
+	}
+
+	if (suser(cred, NULL) || proc_forcequota(p)) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			/* Select if user or group id should be used */
+			if (i == USRQUOTA)
+				id = uid;
+			else if (i == GRPQUOTA)
+				id = gid;
+
+			error = dqget(id, &hfsmp->hfs_qfiles[i], i, &dq);
+			if (error && (error != EINVAL))
+				break;
+
+			error = 0;
+			if (dq == NODQUOT)
+				continue;
+
+			/* Check quota information */
+			error = hfs_isiqchg_allowed(dq, hfsmp, change, cred, i, id);
+			if (error) {
+				dqrele(dq);
+				break;
+			}
+			
+			dqlock(dq);
+			/* Update quota information */
+			dq->dq_curinodes += change;
+			dqunlock(dq);
+			dqrele(dq);
+		}
+	}
+
+	return error;
+}
+
+
+/*
+ * Q_GETQUOTA - return current values in a dqblk structure.
+ */
+int
+hfs_getquota(mp, id, type, datap)
+	struct mount *mp;
+	u_int32_t id;
+	int type;
+	caddr_t datap;
+{
+	struct dquot *dq;
+	int error;
+
+	error = dqget(id, &VFSTOHFS(mp)->hfs_qfiles[type], type, &dq);
+	if (error)
+		return (error);
+	dqlock(dq);
+
+	bcopy(&dq->dq_dqb, datap, sizeof(dq->dq_dqb));
+	
+	dqunlock(dq);
+	dqrele(dq);
+
+	return (error);
+}
+
+/*
+ * Q_SETQUOTA - assign an entire dqblk structure.
+ */
+int
+hfs_setquota(mp, id, type, datap)
+	struct mount *mp;
+	u_int32_t id;
+	int type;
+	caddr_t datap;
+{
+	struct dquot *dq;
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	struct dqblk * newlimp = (struct dqblk *) datap;
+	struct timeval tv;
+	int error;
+
+	error = dqget(id, &hfsmp->hfs_qfiles[type], type, &dq);
+	if (error)
+		return (error);
+	dqlock(dq);
+
+	/*
+	 * Copy all but the current values.
+	 * Reset time limit if previously had no soft limit or were
+	 * under it, but now have a soft limit and are over it.
+	 */
+	newlimp->dqb_curbytes = dq->dq_curbytes;
+	newlimp->dqb_curinodes = dq->dq_curinodes;
+	if (dq->dq_id != 0) {
+		newlimp->dqb_btime = dq->dq_btime;
+		newlimp->dqb_itime = dq->dq_itime;
+	}
+	if (newlimp->dqb_bsoftlimit &&
+	    dq->dq_curbytes >= newlimp->dqb_bsoftlimit &&
+	    (dq->dq_bsoftlimit == 0 || dq->dq_curbytes < dq->dq_bsoftlimit)) {
+		microuptime(&tv);
+		newlimp->dqb_btime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_btime;
+	}
+	if (newlimp->dqb_isoftlimit &&
+	    dq->dq_curinodes >= newlimp->dqb_isoftlimit &&
+	    (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) {
+		microuptime(&tv);
+		newlimp->dqb_itime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_itime;
+	}
+	bcopy(newlimp, &dq->dq_dqb, sizeof(dq->dq_dqb));
+	if (dq->dq_curbytes < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	else
+		dq->dq_flags &= ~DQ_FAKE;
+	dq->dq_flags |= DQ_MOD;
+
+	dqunlock(dq);
+	dqrele(dq);
+
+	return (0);
+}
+
+/*
+ * Q_SETUSE - set current cnode and byte usage.
+ */
+int
+hfs_setuse(mp, id, type, datap)
+	struct mount *mp;
+	u_int32_t id;
+	int type;
+	caddr_t datap;
+{
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	struct dquot *dq;
+	struct timeval tv;
+	int error;
+	struct dqblk *quotablkp = (struct dqblk *) datap;
+
+	error = dqget(id, &hfsmp->hfs_qfiles[type], type, &dq);
+	if (error)
+	        return (error);
+	dqlock(dq);
+
+	/*
+	 * Reset time limit if have a soft limit and were
+	 * previously under it, but are now over it.
+	 */
+	if (dq->dq_bsoftlimit && dq->dq_curbytes < dq->dq_bsoftlimit &&
+	    quotablkp->dqb_curbytes >= dq->dq_bsoftlimit) {
+		microuptime(&tv);
+		dq->dq_btime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_btime;
+	}
+	if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
+	    quotablkp->dqb_curinodes >= dq->dq_isoftlimit) {
+		microuptime(&tv);
+		dq->dq_itime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_itime;
+	}
+	dq->dq_curbytes = quotablkp->dqb_curbytes;
+	dq->dq_curinodes = quotablkp->dqb_curinodes;
+	if (dq->dq_curbytes < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	dq->dq_flags |= DQ_MOD;
+
+	dqunlock(dq);
+	dqrele(dq);
+
+	return (0);
+}
+
+
+/*
+ * Q_SYNC - sync quota files to disk.
+ */
+static int
+hfs_qsync_callback(struct vnode *vp, __unused void *cargs)
+{
+	struct cnode *cp;
+	struct dquot *dq;
+	int 	i;
+
+	cp = VTOC(vp);
+		    
+	for (i = 0; i < MAXQUOTAS; i++) {
+	        dq = cp->c_dquot[i];
+		if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
+		        dqsync(dq);
+	}
+	return (VNODE_RETURNED);
+}
+
+int
+hfs_qsync(mp)
+	struct mount *mp;
+{
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	int i;
+
+	if (!dqisinitialized())
+		return (0);
+
+	/*
+	 * Check if the mount point has any quotas.
+	 * If not, simply return.
+	 */
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
+			break;
+	if (i == MAXQUOTAS)
+		return (0);
+
+	/*
+	 * Sync out any orpaned dirty dquot entries.
+	 */
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
+			dqsync_orphans(&hfsmp->hfs_qfiles[i]);
+
+	/*
+	 * Search vnodes associated with this mount point,
+	 * synchronizing any modified dquot structures.
+	 *
+	 * hfs_qsync_callback will be called for each vnode
+	 * hung off of this mount point
+	 * the vnode will be
+	 * properly referenced and unreferenced around the callback
+	 */
+	vnode_iterate(mp, 0, hfs_qsync_callback, (void *)NULL);
+
+	return (0);
+}
+
+/*
+ * Q_QUOTASTAT - get quota on/off status 
+ */
+int
+hfs_quotastat(mp, type, datap)
+	struct mount *mp;
+	register int type;
+	caddr_t datap;
+{
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	int error = 0;
+	int qstat;
+
+	if ((((unsigned int)vfs_flags(mp)) & MNT_QUOTA) && (hfsmp->hfs_qfiles[type].qf_vp != NULLVP))
+	  qstat = 1;   /* quotas are on for this type */
+	else
+	  qstat = 0;   /* quotas are off for this type */
+	
+	*((int *)datap) = qstat;
+	return (error);
+}
+
+#endif // QUOTA
diff --git a/core/hfs_quota.h b/core/hfs_quota.h
new file mode 100644
index 0000000..27ee6a5
--- /dev/null
+++ b/core/hfs_quota.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2002 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hfs_quota.h
+ *	derived from @(#)quota.h	8.3 (Berkeley) 8/19/94
+ */
+
+#ifndef _HFS_QUOTA_H_
+#define _HFS_QUOTA_H_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <sys/queue.h>
+
+#include <sys/cdefs.h>
+
+struct cnode;
+struct mount;
+struct proc;
+#ifndef _KAUTH_CRED_T
+#define	_KAUTH_CRED_T
+struct ucred;
+typedef struct ucred *kauth_cred_t;
+#endif	/* !_KAUTH_CRED_T */
+__BEGIN_DECLS
+int	hfs_chkdq(struct cnode *, int64_t, kauth_cred_t, int);
+int	hfs_chkdqchg(struct cnode *, int64_t, kauth_cred_t, int);
+int	hfs_chkiq(struct cnode *, int32_t, kauth_cred_t, int);
+int	hfs_chkiqchg(struct cnode *, int32_t, kauth_cred_t, int);
+int	hfs_getinoquota(struct cnode *);
+int	hfs_getquota(struct mount *, u_int32_t, int, caddr_t);
+int	hfs_qsync(struct mount *mp);
+int	hfs_quotaoff(struct proc *, struct mount *, int);
+int	hfs_quotaon(struct proc *, struct mount *, int, caddr_t);
+int hfs_quotastat(struct mount *, int, caddr_t);
+int	hfs_setquota(struct mount *, u_int32_t, int, caddr_t);
+int	hfs_setuse(struct mount *, u_int32_t, int, caddr_t);
+int hfs_isiqchg_allowed(struct dquot *, struct hfsmount *, int32_t, kauth_cred_t, int, uid_t);
+int hfs_quotacheck (struct hfsmount *, int , uid_t, gid_t, kauth_cred_t);
+__END_DECLS
+
+#if DIAGNOSTIC
+__BEGIN_DECLS
+void	hfs_chkdquot(struct cnode *);
+__END_DECLS
+#endif
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+
+#endif /* ! _HFS_QUOTA_H_ */
diff --git a/core/hfs_readwrite.c b/core/hfs_readwrite.c
new file mode 100644
index 0000000..99092aa
--- /dev/null
+++ b/core/hfs_readwrite.c
@@ -0,0 +1,5876 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*	@(#)hfs_readwrite.c	1.0
+ *
+ *	(c) 1998-2001 Apple Inc.  All Rights Reserved
+ *	
+ *	hfs_readwrite.c -- vnode operations to deal with reading and writing files.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/kauth.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/vfs_context.h>
+#include <sys/disk.h>
+#include <sys/sysctl.h>
+#include <sys/fsctl.h>
+#include <sys/ubc.h>
+#include <sys/fsevents.h>
+#include <uuid/uuid.h>
+
+#include <libkern/OSDebug.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <sys/ubc.h>
+
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+
+#include <IOKit/IOBSD.h>
+
+#include <sys/kdebug.h>
+
+#include	"hfs.h"
+#include	"hfs_attrlist.h"
+#include	"hfs_endian.h"
+#include  	"hfs_fsctl.h"
+#include	"hfs_quota.h"
+#include	"FileMgrInternal.h"
+#include	"BTreesInternal.h"
+#include	"hfs_cnode.h"
+#include	"hfs_dbg.h"
+
+#if HFS_CONFIG_KEY_ROLL
+#include	"hfs_key_roll.h"
+#endif
+
+#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
+
+enum {
+	MAXHFSFILESIZE = 0x7FFFFFFF		/* this needs to go in the mount structure */
+};
+
+/* from bsd/hfs/hfs_vfsops.c */
+extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
+
+/* from hfs_hotfiles.c */
+extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
+		                              uint8_t forktype, uint32_t *pinned);
+
+static int  hfs_clonefile(struct vnode *, int, int, int);
+static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
+static int  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
+
+
+/*
+ * Read data from a file.
+ */
+int
+hfs_vnop_read(struct vnop_read_args *ap)
+{
+	/*
+	   struct vnop_read_args {
+	   struct vnodeop_desc *a_desc;
+	   vnode_t a_vp;
+	   struct uio *a_uio;
+	   int a_ioflag;
+	   vfs_context_t a_context;
+	   };
+	 */
+
+	uio_t uio = ap->a_uio;
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp;
+	struct hfsmount *hfsmp;
+	off_t filesize;
+	off_t filebytes;
+	off_t start_resid = uio_resid(uio);
+	off_t offset = uio_offset(uio);
+	int retval = 0;
+	int took_truncate_lock = 0;
+	int io_throttle = 0;
+	int throttled_count = 0;
+
+	/* Preflight checks */
+	if (!vnode_isreg(vp)) {
+		/* can only read regular files */
+		if (vnode_isdir(vp))
+			return (EISDIR);
+		else
+			return (EPERM);
+	}
+	if (start_resid == 0)
+		return (0);		/* Nothing left to do */
+	if (offset < 0)
+		return (EINVAL);	/* cant read from a negative offset */
+
+#if SECURE_KERNEL
+	if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+						(IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+		/* Don't allow unencrypted io request from user space */
+		return EPERM;
+	}
+#endif
+
+#if HFS_COMPRESSION
+	if (VNODE_IS_RSRC(vp)) {
+		if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
+			return 0;
+		}
+		/* otherwise read the resource fork normally */
+	} else {
+		int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+		if (compressed) {
+			retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
+			if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
+				(void) hfs_addhotfile(vp);
+			}
+			if (compressed) {
+				if (retval == 0) {
+					/* successful read, update the access time */
+					VTOC(vp)->c_touch_acctime = TRUE;
+					
+					//
+					// compressed files are not traditional hot file candidates
+					// but they may be for CF (which ignores the ff_bytesread
+					// field)
+					//
+					if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+						VTOF(vp)->ff_bytesread = 0;
+					}
+				}
+				return retval;
+			}
+			/* otherwise the file was converted back to a regular file while we were reading it */
+			retval = 0;
+		} else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+			int error;
+			
+			error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+			if (error) {
+				return error;
+			}
+
+		}
+	}
+#endif /* HFS_COMPRESSION */
+
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+	hfsmp = VTOHFS(vp);
+
+#if CONFIG_PROTECT
+	if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
+		goto exit;
+	}
+
+#if HFS_CONFIG_KEY_ROLL
+	if (ISSET(ap->a_ioflag, IO_ENCRYPTED)) {
+		off_rsrc_t off_rsrc = off_rsrc_make(offset + start_resid,
+											VNODE_IS_RSRC(vp));
+
+		retval = hfs_key_roll_up_to(ap->a_context, vp, off_rsrc);
+		if (retval)
+			goto exit;
+	}
+#endif // HFS_CONFIG_KEY_ROLL
+#endif // CONFIG_PROTECT
+
+	/* 
+	 * If this read request originated from a syscall (as opposed to 
+	 * an in-kernel page fault or something), then set it up for 
+	 * throttle checks
+	 */
+	if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
+		io_throttle = IO_RETURN_ON_THROTTLE;
+	}
+
+read_again:
+
+	/* Protect against a size change. */
+	hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	took_truncate_lock = 1;
+
+	filesize = fp->ff_size;
+	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+
+	/*
+	 * Check the file size. Note that per POSIX spec, we return 0 at 
+	 * file EOF, so attempting a read at an offset that is too big
+	 * should just return 0 on HFS+. Since the return value was initialized
+	 * to 0 above, we just jump to exit.  HFS Standard has its own behavior.
+	 */
+	if (offset > filesize) {
+#if CONFIG_HFS_STD
+		if ((hfsmp->hfs_flags & HFS_STANDARD) &&
+		    (offset > (off_t)MAXHFSFILESIZE)) {
+			retval = EFBIG;
+		}
+#endif
+		goto exit;
+	}
+
+	KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
+		(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
+
+	retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
+
+	cp->c_touch_acctime = TRUE;
+
+	KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
+		(int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
+
+	/*
+	 * Keep track blocks read
+	 */
+	if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
+		int took_cnode_lock = 0;
+		off_t bytesread;
+
+		bytesread = start_resid - uio_resid(uio);
+
+		/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+		if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
+			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+			took_cnode_lock = 1;
+		}
+		/*
+		 * If this file hasn't been seen since the start of
+		 * the current sampling period then start over.
+		 */
+		if (cp->c_atime < hfsmp->hfc_timebase) {
+			struct timeval tv;
+
+			fp->ff_bytesread = bytesread;
+			microtime(&tv);
+			cp->c_atime = tv.tv_sec;
+		} else {
+			fp->ff_bytesread += bytesread;
+		}
+
+		if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
+			//
+			// We don't add hotfiles for processes doing IO_EVTONLY I/O
+			// on the assumption that they're system processes such as
+			// mdworker which scan everything in the system (and thus
+			// do not represent user-initiated access to files)
+			//
+			(void) hfs_addhotfile(vp);
+		}
+		if (took_cnode_lock)
+			hfs_unlock(cp);
+	}
+exit:
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	}
+	if (retval == EAGAIN) {
+		throttle_lowpri_io(1);
+		throttled_count++;
+
+		retval = 0;
+		goto read_again;
+	}
+	if (throttled_count)
+		throttle_info_reset_window(NULL);
+	return (retval);
+}
+
+/*
+ * Ideally, this wouldn't be necessary; the cluster code should be
+ * able to handle this on the read-side.  See <rdar://20420068>.
+ */
+static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
+{
+	hfs_assert(VTOC(vp)->c_lockowner != current_thread());
+	hfs_assert(VTOC(vp)->c_truncatelockowner == current_thread());
+
+	struct filefork *fp = VTOF(vp);
+
+	if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
+		// Nothing to do
+		return 0;
+	}
+
+	zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
+
+	/* N.B. At present, @zero_up_to is not important because the cluster
+	   code will always zero up to the end of the page anyway. */
+	return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
+						 fp->ff_size, 0, IO_HEADZEROFILL);
+}
+
+/*
+ * Write data to a file.
+ */
+int
+hfs_vnop_write(struct vnop_write_args *ap)
+{
+	uio_t uio = ap->a_uio;
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp;
+	struct hfsmount *hfsmp;
+	kauth_cred_t cred = NULL;
+	off_t origFileSize;
+	off_t writelimit;
+	off_t bytesToAdd = 0;
+	off_t actualBytesAdded;
+	off_t filebytes;
+	off_t offset;
+	ssize_t resid;
+	int eflags;
+	int ioflag = ap->a_ioflag;
+	int retval = 0;
+	int lockflags;
+	int cnode_locked = 0;
+	int partialwrite = 0;
+	int do_snapshot = 1;
+	time_t orig_ctime=VTOC(vp)->c_ctime;
+	int took_truncate_lock = 0;
+	int io_return_on_throttle = 0;
+	int throttled_count = 0;
+
+#if HFS_COMPRESSION
+	if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+		int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+		switch(state) {
+			case FILE_IS_COMPRESSED:
+				return EACCES;
+			case FILE_IS_CONVERTING:
+				/* if FILE_IS_CONVERTING, we allow writes but do not
+				   bother with snapshots or else we will deadlock.
+				*/
+				do_snapshot = 0;
+				break;
+			default:
+				printf("invalid state %d for compressed file\n", state);
+				/* fall through */
+		}
+	} else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+		int error;
+		
+		error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
+		if (error != 0) {
+			return error;
+		}
+	}
+
+	if (do_snapshot) {
+		nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
+	}
+
+#endif
+
+#if SECURE_KERNEL
+	if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
+						(IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
+		/* Don't allow unencrypted io request from user space */
+		return EPERM;
+	}
+#endif
+
+	resid = uio_resid(uio);
+	offset = uio_offset(uio);
+
+	if (offset < 0)
+		return (EINVAL);
+	if (resid == 0)
+		return (E_NONE);
+	if (!vnode_isreg(vp))
+		return (EPERM);  /* Can only write regular files */
+
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+	hfsmp = VTOHFS(vp);
+
+#if CONFIG_PROTECT
+	if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
+		goto exit;
+	}
+#endif
+
+	eflags = kEFDeferMask;	/* defer file block allocations */
+#if HFS_SPARSE_DEV
+	/* 
+	 * When the underlying device is sparse and space
+	 * is low (< 8MB), stop doing delayed allocations
+	 * and begin doing synchronous I/O.
+	 */
+	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+	    (hfs_freeblks(hfsmp, 0) < 2048)) {
+		eflags &= ~kEFDeferMask;
+		ioflag |= IO_SYNC;
+	}
+#endif /* HFS_SPARSE_DEV */
+
+	if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) == 
+			(IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
+		io_return_on_throttle = IO_RETURN_ON_THROTTLE;
+	}
+
+again:
+	/*
+	 * Protect against a size change.
+	 *
+	 * Note: If took_truncate_lock is true, then we previously got the lock shared
+	 * but needed to upgrade to exclusive.  So try getting it exclusive from the
+	 * start.
+	 */
+	if (ioflag & IO_APPEND || took_truncate_lock) {
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	}	
+	else {
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	}
+	took_truncate_lock = 1;
+
+	/* Update UIO */
+	if (ioflag & IO_APPEND) {
+		uio_setoffset(uio, fp->ff_size);
+		offset = fp->ff_size;
+	}
+	if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
+		retval = EPERM;
+		goto exit;
+	}
+
+	cred = vfs_context_ucred(ap->a_context);
+	if (cred && suser(cred, NULL) != 0)
+		eflags |= kEFReserveMask;
+
+	origFileSize = fp->ff_size;
+	writelimit = offset + resid;
+
+	/*
+	 * We may need an exclusive truncate lock for several reasons, all
+	 * of which are because we may be writing to a (portion of a) block
+	 * for the first time, and we need to make sure no readers see the
+	 * prior, uninitialized contents of the block.  The cases are:
+	 *
+	 * 1. We have unallocated (delayed allocation) blocks.  We may be
+	 *    allocating new blocks to the file and writing to them.
+	 *    (A more precise check would be whether the range we're writing
+	 *    to contains delayed allocation blocks.)
+	 * 2. We need to extend the file.  The bytes between the old EOF
+	 *    and the new EOF are not yet initialized.  This is important
+	 *    even if we're not allocating new blocks to the file.  If the
+	 *    old EOF and new EOF are in the same block, we still need to
+	 *    protect that range of bytes until they are written for the
+	 *    first time.
+	 *
+	 * If we had a shared lock with the above cases, we need to try to upgrade
+	 * to an exclusive lock.  If the upgrade fails, we will lose the shared
+	 * lock, and will need to take the truncate lock again; the took_truncate_lock
+	 * flag will still be set, causing us to try for an exclusive lock next time.
+	 */
+	if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
+	    ((fp->ff_unallocblocks != 0) ||
+	     (writelimit > origFileSize))) {
+		if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
+			/*
+			 * Lock upgrade failed and we lost our shared lock, try again.
+			 * Note: we do not set took_truncate_lock=0 here.  Leaving it
+			 * set to 1 will cause us to try to get the lock exclusive.
+			 */
+			goto again;
+		} 
+		else {
+			/* Store the owner in the c_truncatelockowner field if we successfully upgrade */
+			cp->c_truncatelockowner = current_thread();  
+		}
+	}
+
+	if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		goto exit;
+	}
+	cnode_locked = 1;
+
+	filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
+
+	if (offset > filebytes
+		&& (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
+							 hfsmp->blockSize) < offset - filebytes)) {
+		retval = ENOSPC;
+		goto exit;
+	}
+
+	KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
+		     (int)offset, uio_resid(uio), (int)fp->ff_size,
+		     (int)filebytes, 0);
+
+	/* Check if we do not need to extend the file */
+	if (writelimit <= filebytes) {
+		goto sizeok;
+	}
+
+	bytesToAdd = writelimit - filebytes;
+
+#if QUOTA
+	retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 
+			   cred, 0);
+	if (retval)
+		goto exit;
+#endif /* QUOTA */
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+		retval = EINVAL;
+		goto exit;
+	}
+
+	while (writelimit > filebytes) {
+		bytesToAdd = writelimit - filebytes;
+
+		/* Protect extents b-tree and allocation bitmap */
+		lockflags = SFL_BITMAP;
+		if (overflow_extents(fp))
+			lockflags |= SFL_EXTENTS;
+		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+	
+		/* Files that are changing size are not hot file candidates. */
+		if (hfsmp->hfc_stage == HFC_RECORDING) {
+			fp->ff_bytesread = 0;
+		}
+		retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
+				0, eflags, &actualBytesAdded));
+
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+		if ((actualBytesAdded == 0) && (retval == E_NONE))
+			retval = ENOSPC;
+		if (retval != E_NONE)
+			break;
+		filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+		KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
+			(int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
+	}
+	(void) hfs_update(vp, 0);
+	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+	(void) hfs_end_transaction(hfsmp);
+
+	/*
+	 * If we didn't grow the file enough try a partial write.
+	 * POSIX expects this behavior.
+	 */
+	if ((retval == ENOSPC) && (filebytes > offset)) {
+		retval = 0;
+		partialwrite = 1;
+		uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
+		resid -= bytesToAdd;
+		writelimit = filebytes;
+	}
+sizeok:
+	if (retval == E_NONE) {
+		off_t filesize;
+		off_t head_off;
+		int lflag;
+
+		if (writelimit > fp->ff_size) {
+			filesize = writelimit;
+			struct timeval tv;
+			rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
+			microuptime(&tv);
+			cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+		} else
+			filesize = fp->ff_size;
+
+		lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
+
+		/*
+		 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
+		 * for one case below).  For the regions that lie before the
+		 * beginning and after the end of this write that are in the
+		 * same page, we let the cluster code handle zeroing that out
+		 * if necessary.  If those areas are not cached, the cluster
+		 * code will try and read those areas in, and in the case
+		 * where those regions have never been written to,
+		 * hfs_vnop_blockmap will consult the invalid ranges and then
+		 * indicate that.  The cluster code will zero out those areas.
+		 */
+
+		head_off = trunc_page_64(offset);
+
+		if (head_off < offset && head_off >= fp->ff_size) {
+			/*
+			 * The first page is beyond current EOF, so as an
+			 * optimisation, we can pass IO_HEADZEROFILL.
+			 */
+			lflag |= IO_HEADZEROFILL;
+		}
+
+		hfs_unlock(cp);
+		cnode_locked = 0;
+
+		/*
+		 * We need to tell UBC the fork's new size BEFORE calling
+		 * cluster_write, in case any of the new pages need to be
+		 * paged out before cluster_write completes (which does happen
+		 * in embedded systems due to extreme memory pressure).
+		 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+		 * will be, so that it can pass that on to cluster_pageout, and
+		 * allow those pageouts.
+		 *
+		 * We don't update ff_size yet since we don't want pageins to
+		 * be able to see uninitialized data between the old and new
+		 * EOF, until cluster_write has completed and initialized that
+		 * part of the file.
+		 *
+		 * The vnode pager relies on the file size last given to UBC via
+		 * ubc_setsize.  hfs_vnop_pageout relies on fp->ff_new_size or
+		 * ff_size (whichever is larger).  NOTE: ff_new_size is always
+		 * zero, unless we are extending the file via write.
+		 */
+		if (filesize > fp->ff_size) {
+			retval = hfs_zero_eof_page(vp, offset);
+			if (retval)
+				goto exit;
+			fp->ff_new_size = filesize;
+			ubc_setsize(vp, filesize);
+		}
+		retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
+							   0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
+		if (retval) {
+			fp->ff_new_size = 0;	/* no longer extending; use ff_size */
+			
+			if (retval == EAGAIN) {
+				/*
+				 * EAGAIN indicates that we still have I/O to do, but
+				 * that we now need to be throttled
+				 */
+				if (resid != uio_resid(uio)) {
+					/*
+					 * did manage to do some I/O before returning EAGAIN
+					 */
+					resid = uio_resid(uio);
+					offset = uio_offset(uio);
+
+					cp->c_touch_chgtime = TRUE;
+					cp->c_touch_modtime = TRUE;
+					hfs_incr_gencount(cp);
+				}
+				if (filesize > fp->ff_size) {
+					/*
+					 * we called ubc_setsize before the call to
+					 * cluster_write... since we only partially
+					 * completed the I/O, we need to 
+					 * re-adjust our idea of the filesize based
+					 * on our interim EOF
+					 */
+					ubc_setsize(vp, offset);
+
+					fp->ff_size = offset;
+				}
+				goto exit;
+			}
+			if (filesize > origFileSize) {
+				ubc_setsize(vp, origFileSize);
+			}
+			goto ioerr_exit;
+		}
+		
+		if (filesize > origFileSize) {
+			fp->ff_size = filesize;
+			
+			/* Files that are changing size are not hot file candidates. */
+			if (hfsmp->hfc_stage == HFC_RECORDING) {
+				fp->ff_bytesread = 0;
+			}
+		}
+		fp->ff_new_size = 0;	/* ff_size now has the correct size */		
+	}
+	if (partialwrite) {
+		uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
+		resid += bytesToAdd;
+	}
+
+	if (vnode_should_flush_after_write(vp, ioflag))
+		hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+
+ioerr_exit:
+	if (!cnode_locked) {
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+		cnode_locked = 1;
+	}
+
+	if (resid > uio_resid(uio)) {
+		cp->c_touch_chgtime = TRUE;
+		cp->c_touch_modtime = TRUE;
+		hfs_incr_gencount(cp);
+
+		/*
+		 * If we successfully wrote any data, and we are not the superuser
+		 * we clear the setuid and setgid bits as a precaution against
+		 * tampering.
+		 */
+		if (cp->c_mode & (S_ISUID | S_ISGID)) {
+			cred = vfs_context_ucred(ap->a_context);
+			if (cred && suser(cred, NULL)) {
+				cp->c_mode &= ~(S_ISUID | S_ISGID);
+			}
+		}
+	}
+	if (retval) {
+		if (ioflag & IO_UNIT) {
+			(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
+			                   0, ap->a_context);
+			uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
+			uio_setresid(uio, resid);
+			filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
+		}
+	} else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
+		retval = hfs_update(vp, 0);
+
+	/* Updating vcbWrCnt doesn't need to be atomic. */
+	hfsmp->vcbWrCnt++;
+
+	KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
+		(int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
+exit:
+	if (retval && took_truncate_lock
+		&& cp->c_truncatelockowner == current_thread()) {
+		fp->ff_new_size = 0;
+		rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
+	}
+
+	if (cnode_locked)
+		hfs_unlock(cp);
+
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	}
+	if (retval == EAGAIN) {
+		throttle_lowpri_io(1);
+		throttled_count++;
+
+		retval = 0;
+		goto again;
+	}
+	if (throttled_count)
+		throttle_info_reset_window(NULL);
+	return (retval);
+}
+
+/* support for the "bulk-access" fcntl */
+
+#define CACHE_LEVELS 16
+#define NUM_CACHE_ENTRIES (64*16)
+#define PARENT_IDS_FLAG 0x100
+
+struct access_cache {
+       int numcached;
+       int cachehits; /* these two for statistics gathering */
+       int lookups;
+       unsigned int *acache;
+       unsigned char *haveaccess;
+};
+
+struct access_t {
+	uid_t     uid;              /* IN: effective user id */
+	short     flags;            /* IN: access requested (i.e. R_OK) */
+	short     num_groups;       /* IN: number of groups user belongs to */
+	int       num_files;        /* IN: number of files to process */
+	int       *file_ids;        /* IN: array of file ids */
+	gid_t     *groups;          /* IN: array of groups */
+	short     *access;          /* OUT: access info for each file (0 for 'has access') */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_access_t {
+	uid_t     uid;              /* IN: effective user id */
+	short     flags;            /* IN: access requested (i.e. R_OK) */
+	short     num_groups;       /* IN: number of groups user belongs to */
+	int       num_files;        /* IN: number of files to process */
+	user32_addr_t      file_ids;        /* IN: array of file ids */
+	user32_addr_t      groups;          /* IN: array of groups */
+	user32_addr_t      access;          /* OUT: access info for each file (0 for 'has access') */
+};
+
+struct user64_access_t {
+	uid_t		uid;			/* IN: effective user id */
+	short		flags;			/* IN: access requested (i.e. R_OK) */
+	short		num_groups;		/* IN: number of groups user belongs to */
+	int		num_files;		/* IN: number of files to process */
+	user64_addr_t	file_ids;		/* IN: array of file ids */
+	user64_addr_t	groups;			/* IN: array of groups */
+	user64_addr_t	access;			/* OUT: access info for each file (0 for 'has access') */
+};
+
+
+// these are the "extended" versions of the above structures
+// note that it is crucial that they be different sized than
+// the regular version
+struct ext_access_t {
+	uint32_t   flags;           /* IN: access requested (i.e. R_OK) */
+	uint32_t   num_files;       /* IN: number of files to process */
+	uint32_t   map_size;        /* IN: size of the bit map */
+	uint32_t  *file_ids;        /* IN: Array of file ids */
+	char      *bitmap;          /* OUT: hash-bitmap of interesting directory ids */
+	short     *access;          /* OUT: access info for each file (0 for 'has access') */
+	uint32_t   num_parents;   /* future use */
+	cnid_t      *parents;   /* future use */
+} __attribute__((unavailable)); // this structure is for reference purposes only
+
+struct user32_ext_access_t {
+	uint32_t   flags;           /* IN: access requested (i.e. R_OK) */
+	uint32_t   num_files;       /* IN: number of files to process */
+	uint32_t   map_size;        /* IN: size of the bit map */
+	user32_addr_t  file_ids;        /* IN: Array of file ids */
+	user32_addr_t     bitmap;          /* OUT: hash-bitmap of interesting directory ids */
+	user32_addr_t access;          /* OUT: access info for each file (0 for 'has access') */
+	uint32_t   num_parents;   /* future use */
+	user32_addr_t parents;   /* future use */
+};
+
+struct user64_ext_access_t {
+	uint32_t      flags;        /* IN: access requested (i.e. R_OK) */
+	uint32_t      num_files;    /* IN: number of files to process */
+	uint32_t      map_size;     /* IN: size of the bit map */
+	user64_addr_t   file_ids;     /* IN: array of file ids */
+	user64_addr_t   bitmap;       /* IN: array of groups */
+	user64_addr_t   access;       /* OUT: access info for each file (0 for 'has access') */
+	uint32_t      num_parents;/* future use */
+	user64_addr_t   parents;/* future use */
+};
+
+
+/*
+ * Perform a binary search for the given parent_id. Return value is 
+ * the index if there is a match.  If no_match_indexp is non-NULL it
+ * will be assigned with the index to insert the item (even if it was
+ * not found).
+ */
+static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
+{
+    int index=-1;
+    unsigned int lo=0;
+	
+    do {
+	unsigned int mid = ((hi - lo)/2) + lo;
+	unsigned int this_id = array[mid];
+		
+	if (parent_id == this_id) {
+	    hi = mid;
+	    break;
+	}
+		
+	if (parent_id < this_id) {
+	    hi = mid;
+	    continue;
+	}
+		
+	if (parent_id > this_id) {
+	    lo = mid + 1;
+	    continue;
+	}
+    } while(lo < hi);
+
+    /* check if lo and hi converged on the match */
+    if (parent_id == array[hi]) {
+	index = hi;
+    }
+	
+    if (no_match_indexp) {
+	*no_match_indexp = hi;
+    }
+
+    return index;
+}
+ 
+ 
+static int
+lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
+{
+    unsigned int hi;
+    int matches = 0;
+    int index, no_match_index;
+	
+    if (cache->numcached == 0) {
+	*indexp = 0;
+	return 0; // table is empty, so insert at index=0 and report no match
+    }
+	
+    if (cache->numcached > NUM_CACHE_ENTRIES) {
+	cache->numcached = NUM_CACHE_ENTRIES;
+    }
+	
+    hi = cache->numcached - 1;
+	
+    index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
+
+    /* if no existing entry found, find index for new one */
+    if (index == -1) {
+	index = no_match_index;
+	matches = 0;
+    } else {
+	matches = 1;
+    }
+	
+    *indexp = index;
+    return matches;
+}
+
+/*
+ * Add a node to the access_cache at the given index (or do a lookup first
+ * to find the index if -1 is passed in). We currently do a replace rather
+ * than an insert if the cache is full.
+ */
+static void
+add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
+{
+    int lookup_index = -1;
+
+    /* need to do a lookup first if -1 passed for index */
+    if (index == -1) {
+	if (lookup_bucket(cache, &lookup_index, nodeID)) {
+	    if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
+		// only update an entry if the previous access was ESRCH (i.e. a scope checking error)
+		cache->haveaccess[lookup_index] = access;
+	    }
+
+	    /* mission accomplished */
+	    return;
+	} else {
+	    index = lookup_index;
+	}
+
+    }
+
+    /* if the cache is full, do a replace rather than an insert */
+    if (cache->numcached >= NUM_CACHE_ENTRIES) {
+	cache->numcached = NUM_CACHE_ENTRIES-1;
+
+	if (index > cache->numcached) {
+	    index = cache->numcached;
+	}
+    }
+
+    if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
+	index++;
+    }
+
+    if (index >= 0 && index < cache->numcached) {
+	/* only do bcopy if we're inserting */
+	bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
+	bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
+    }
+
+    cache->acache[index] = nodeID;
+    cache->haveaccess[index] = access;
+    cache->numcached++;
+}
+
+
+struct cinfo {
+    uid_t   uid;
+    gid_t   gid;
+    mode_t  mode;
+    cnid_t  parentcnid;
+    u_int16_t recflags;
+};
+
+static int
+snoop_callback(const cnode_t *cp, void *arg)
+{
+    struct cinfo *cip = arg;
+
+    cip->uid = cp->c_uid;
+    cip->gid = cp->c_gid;
+    cip->mode = cp->c_mode;
+    cip->parentcnid = cp->c_parentcnid;
+    cip->recflags = cp->c_attr.ca_recflags;
+	
+    return (0);
+}
+
+/*
+ * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
+ * isn't incore, then go to the catalog.
+ */ 
+static int
+do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, 
+    struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
+{
+    int error = 0;
+
+    /* if this id matches the one the fsctl was called with, skip the lookup */
+    if (cnid == skip_cp->c_cnid) {
+		cnattrp->ca_uid = skip_cp->c_uid;
+		cnattrp->ca_gid = skip_cp->c_gid;
+		cnattrp->ca_mode = skip_cp->c_mode;
+		cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
+		keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
+    } else {
+		struct cinfo c_info;
+
+		/* otherwise, check the cnode hash incase the file/dir is incore */
+		error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
+
+		if (error == EACCES) {
+			// File is deleted
+			return ENOENT;
+		} else if (!error) {
+			cnattrp->ca_uid = c_info.uid;
+			cnattrp->ca_gid = c_info.gid;
+			cnattrp->ca_mode = c_info.mode;
+			cnattrp->ca_recflags = c_info.recflags;
+			keyp->hfsPlus.parentID = c_info.parentcnid;
+		} else {
+			int lockflags;
+
+			if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
+				throttle_lowpri_io(1);
+
+			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+			/* lookup this cnid in the catalog */
+			error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
+			
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			
+			cache->lookups++;
+		}
+    }
+	
+    return (error);
+}
+
+
+/*
+ * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
+ * up to CACHE_LEVELS as we progress towards the root.
+ */
+static int 
+do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 
+    struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
+    struct vfs_context *my_context,
+    char *bitmap,
+    uint32_t map_size,
+    cnid_t* parents,
+    uint32_t num_parents)
+{
+    int                     myErr = 0;
+    int                     myResult;
+    HFSCatalogNodeID        thisNodeID;
+    unsigned int            myPerms;
+    struct cat_attr         cnattr;
+    int                     cache_index = -1, scope_index = -1, scope_idx_start = -1;
+    CatalogKey              catkey;
+
+    int i = 0, ids_to_cache = 0;
+    int parent_ids[CACHE_LEVELS];
+
+    thisNodeID = nodeID;
+    while (thisNodeID >=  kRootDirID) {
+	myResult = 0;   /* default to "no access" */
+	       
+	/* check the cache before resorting to hitting the catalog */
+
+	/* ASSUMPTION: access info of cached entries is "final"... i.e. no need
+	 * to look any further after hitting cached dir */
+
+	if (lookup_bucket(cache, &cache_index, thisNodeID)) {
+	    cache->cachehits++;
+	    myErr = cache->haveaccess[cache_index];
+	    if (scope_index != -1) {
+		if (myErr == ESRCH) {
+		    myErr = 0;
+		}
+	    } else {
+		scope_index = 0;   // so we'll just use the cache result 
+		scope_idx_start = ids_to_cache;
+	    }
+	    myResult = (myErr == 0) ? 1 : 0;
+	    goto ExitThisRoutine;
+	}
+
+
+	if (parents) {
+	    int tmp;
+	    tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
+	    if (scope_index == -1)
+		scope_index = tmp;
+	    if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
+		scope_idx_start = ids_to_cache;
+	    }
+	}	   
+
+	/* remember which parents we want to cache */
+	if (ids_to_cache < CACHE_LEVELS) {
+	    parent_ids[ids_to_cache] = thisNodeID;
+	    ids_to_cache++;
+	}
+	// Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
+	if (bitmap && map_size) {
+	    bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));	       
+	}
+	       
+
+	/* do the lookup (checks the cnode hash, then the catalog) */
+	myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
+	if (myErr) {
+	    goto ExitThisRoutine; /* no access */
+	}
+
+	/* Root always gets access. */
+	if (suser(myp_ucred, NULL) == 0) {
+		thisNodeID = catkey.hfsPlus.parentID;
+		myResult = 1;
+		continue;
+	}
+
+	// if the thing has acl's, do the full permission check
+	if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+	    struct vnode *vp;
+
+	    /* get the vnode for this cnid */
+	    myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
+	    if ( myErr ) {
+		myResult = 0;
+		goto ExitThisRoutine;
+	    }
+
+	    thisNodeID = VTOC(vp)->c_parentcnid;
+
+	    hfs_unlock(VTOC(vp));
+
+	    if (vnode_vtype(vp) == VDIR) {
+		myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
+	    } else {
+		myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
+	    }
+
+	    vnode_put(vp);
+	    if (myErr) {
+		myResult = 0;
+		goto ExitThisRoutine;
+	    }
+	} else {
+	    unsigned int flags;
+		int mode = cnattr.ca_mode & S_IFMT;   
+		myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
+
+		if (mode == S_IFDIR) {
+			flags = R_OK | X_OK;
+		} else {
+			flags = R_OK;
+		}
+		if ( (myPerms & flags) != flags) {
+			myResult = 0;
+			myErr = EACCES;
+			goto ExitThisRoutine;   /* no access */
+		}
+
+	    /* up the hierarchy we go */
+	    thisNodeID = catkey.hfsPlus.parentID;
+	}
+    }
+
+    /* if here, we have access to this node */
+    myResult = 1;
+
+  ExitThisRoutine:
+    if (parents && myErr == 0 && scope_index == -1) {
+	myErr = ESRCH;
+    }
+				
+    if (myErr) {
+	myResult = 0;
+    }
+    *err = myErr;
+
+    /* cache the parent directory(ies) */
+    for (i = 0; i < ids_to_cache; i++) {
+	if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
+	    add_node(cache, -1, parent_ids[i], ESRCH);
+	} else {
+	    add_node(cache, -1, parent_ids[i], myErr);
+	}
+    }
+
+    return (myResult);
+}
+
+static int
+do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
+    struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
+{
+    boolean_t is64bit;
+
+    /*
+     * NOTE: on entry, the vnode has an io_ref. In case this vnode
+     * happens to be in our list of file_ids, we'll note it
+     * avoid calling hfs_chashget_nowait() on that id as that
+     * will cause a "locking against myself" panic.
+     */
+    Boolean check_leaf = true;
+		
+    struct user64_ext_access_t *user_access_structp;
+    struct user64_ext_access_t tmp_user_access;
+    struct access_cache cache;
+		
+    int error = 0, prev_parent_check_ok=1;
+    unsigned int i;
+		
+    short flags;
+    unsigned int num_files = 0;
+    int map_size = 0;
+    int num_parents = 0;
+    int *file_ids=NULL;
+    short *access=NULL;
+    char *bitmap=NULL;
+    cnid_t *parents=NULL;
+    int leaf_index;
+	
+    cnid_t cnid;
+    cnid_t prevParent_cnid = 0;
+    unsigned int myPerms;
+    short myaccess = 0;
+    struct cat_attr cnattr;
+    CatalogKey catkey;
+    struct cnode *skip_cp = VTOC(vp);
+    kauth_cred_t cred = vfs_context_ucred(context);
+    proc_t p = vfs_context_proc(context);
+
+    is64bit = proc_is64bit(p);
+
+    /* initialize the local cache and buffers */
+    cache.numcached = 0;
+    cache.cachehits = 0;
+    cache.lookups = 0;
+    cache.acache = NULL;
+    cache.haveaccess = NULL;
+		
+    /* struct copyin done during dispatch... need to copy file_id array separately */
+    if (ap->a_data == NULL) {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+
+    if (is64bit) {
+	if (arg_size != sizeof(struct user64_ext_access_t)) {
+	    error = EINVAL;
+	    goto err_exit_bulk_access;
+	}
+
+	user_access_structp = (struct user64_ext_access_t *)ap->a_data;
+
+    } else if (arg_size == sizeof(struct user32_access_t)) {
+	struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
+
+	// convert an old style bulk-access struct to the new style
+	tmp_user_access.flags     = accessp->flags;
+	tmp_user_access.num_files = accessp->num_files;
+	tmp_user_access.map_size  = 0;
+	tmp_user_access.file_ids  = CAST_USER_ADDR_T(accessp->file_ids);
+	tmp_user_access.bitmap    = USER_ADDR_NULL;
+	tmp_user_access.access    = CAST_USER_ADDR_T(accessp->access);
+	tmp_user_access.num_parents = 0;
+	user_access_structp = &tmp_user_access;
+
+    } else if (arg_size == sizeof(struct user32_ext_access_t)) {
+	struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
+
+	// up-cast from a 32-bit version of the struct
+	tmp_user_access.flags     = accessp->flags;
+	tmp_user_access.num_files = accessp->num_files;
+	tmp_user_access.map_size  = accessp->map_size;
+	tmp_user_access.num_parents  = accessp->num_parents;
+
+	tmp_user_access.file_ids  = CAST_USER_ADDR_T(accessp->file_ids);
+	tmp_user_access.bitmap    = CAST_USER_ADDR_T(accessp->bitmap);
+	tmp_user_access.access    = CAST_USER_ADDR_T(accessp->access);
+	tmp_user_access.parents    = CAST_USER_ADDR_T(accessp->parents);
+
+	user_access_structp = &tmp_user_access;
+    } else {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+		
+    map_size = user_access_structp->map_size;
+
+    num_files = user_access_structp->num_files;
+
+    num_parents= user_access_structp->num_parents;
+
+    if (num_files < 1) {
+	goto err_exit_bulk_access;
+    }
+    if (num_files > 1024) {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+
+    if (num_parents > 1024) {
+	error = EINVAL;
+	goto err_exit_bulk_access;
+    }
+		
+    file_ids = hfs_malloc(sizeof(int) * num_files);
+    access = hfs_malloc(sizeof(short) * num_files);
+    if (map_size) {
+		bitmap = hfs_mallocz(sizeof(char) * map_size);
+    }
+
+    if (num_parents) {
+		parents = hfs_malloc(sizeof(cnid_t) * num_parents);
+    }
+
+    cache.acache = hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES);
+    cache.haveaccess = hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+
+    if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
+		num_files * sizeof(int)))) {
+	goto err_exit_bulk_access;
+    }
+	
+    if (num_parents) {
+	if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
+		    num_parents * sizeof(cnid_t)))) {
+	    goto err_exit_bulk_access;
+	}
+    }
+	
+    flags = user_access_structp->flags;
+    if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
+	flags = R_OK;
+    }
+		
+    /* check if we've been passed leaf node ids or parent ids */
+    if (flags & PARENT_IDS_FLAG) {
+	check_leaf = false;
+    }
+		
+    /* Check access to each file_id passed in */
+    for (i = 0; i < num_files; i++) {
+	leaf_index=-1;
+	cnid = (cnid_t) file_ids[i];
+			
+	/* root always has access */
+	if ((!parents) && (!suser(cred, NULL))) {
+	    access[i] = 0;
+	    continue;
+	}
+			
+	if (check_leaf) {
+	    /* do the lookup (checks the cnode hash, then the catalog) */
+	    error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
+	    if (error) {
+		access[i] = (short) error;
+		continue;
+	    }
+	    
+	    if (parents) {
+		// Check if the leaf matches one of the parent scopes
+		leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
+ 		if (leaf_index >= 0 && parents[leaf_index] == cnid)
+ 		    prev_parent_check_ok = 0;
+ 		else if (leaf_index >= 0)
+ 		    prev_parent_check_ok = 1;
+	    }
+
+	    // if the thing has acl's, do the full permission check
+	    if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
+		struct vnode *cvp;
+		int myErr = 0;
+		/* get the vnode for this cnid */
+		myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
+		if ( myErr ) {
+		    access[i] = myErr;
+		    continue;
+		}
+		
+		hfs_unlock(VTOC(cvp));
+		
+		if (vnode_vtype(cvp) == VDIR) {
+		    myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
+		} else {
+		    myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
+		}
+		
+		vnode_put(cvp);
+		if (myErr) {
+		    access[i] = myErr;
+		    continue;
+		}
+	    } else {
+		/* before calling CheckAccess(), check the target file for read access */
+		myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
+		    cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
+		
+		/* fail fast if no access */ 
+		if ((myPerms & flags) == 0) {
+		    access[i] = EACCES;
+		    continue;
+		}		  					
+	    }
+	} else {
+	    /* we were passed an array of parent ids */
+	    catkey.hfsPlus.parentID = cnid;
+	}
+			
+	/* if the last guy had the same parent and had access, we're done */
+ 	if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
+	    cache.cachehits++;
+	    access[i] = 0;
+	    continue;
+	}
+	
+	myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 
+	    skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
+			
+	if (myaccess || (error == ESRCH && leaf_index != -1)) {
+	    access[i] = 0; // have access.. no errors to report
+	} else {
+	    access[i] = (error != 0 ? (short) error : EACCES);
+	}
+			
+	prevParent_cnid = catkey.hfsPlus.parentID;
+    }
+		
+    /* copyout the access array */
+    if ((error = copyout((caddr_t)access, user_access_structp->access, 
+		num_files * sizeof (short)))) {
+	goto err_exit_bulk_access;
+    }
+    if (map_size && bitmap) {
+	if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap, 
+		    map_size * sizeof (char)))) {
+	    goto err_exit_bulk_access;
+	}
+    }
+	
+		
+  err_exit_bulk_access:
+		
+	hfs_free(file_ids, sizeof(int) * num_files);
+	hfs_free(parents, sizeof(cnid_t) * num_parents);
+	hfs_free(bitmap, sizeof(char) * map_size);
+	hfs_free(access, sizeof(short) * num_files);
+	hfs_free(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
+	hfs_free(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
+		
+    return (error);
+}
+
+
+/* end "bulk-access" support */
+
+
+/*
+ * Control filesystem operating characteristics.
+ */
+int
+hfs_vnop_ioctl( struct vnop_ioctl_args /* {
+		vnode_t a_vp;
+		long  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	struct vnode * vp = ap->a_vp;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	vfs_context_t context = ap->a_context;
+	kauth_cred_t cred = vfs_context_ucred(context);
+	proc_t p = vfs_context_proc(context);
+	struct vfsstatfs *vfsp;
+	boolean_t is64bit;
+	off_t jnl_start, jnl_size;
+	struct hfs_journal_info *jip;
+#if HFS_COMPRESSION
+	int compressed = 0;
+	off_t uncompressed_size = -1;
+	int decmpfs_error = 0;
+	
+	if (ap->a_command == F_RDADVISE) {
+		/* we need to inspect the decmpfs state of the file as early as possible */
+		compressed = hfs_file_is_compressed(VTOC(vp), 0);
+		if (compressed) {
+			if (VNODE_IS_RSRC(vp)) {
+				/* if this is the resource fork, treat it as if it were empty */
+				uncompressed_size = 0;
+			} else {
+				decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
+				if (decmpfs_error != 0) {
+					/* failed to get the uncompressed size, we'll check for this later */
+					uncompressed_size = -1;
+				}
+			}
+		}
+	}
+#endif /* HFS_COMPRESSION */
+
+	is64bit = proc_is64bit(p);
+
+#if CONFIG_PROTECT
+#if HFS_CONFIG_KEY_ROLL
+	// The HFSIOC_KEY_ROLL fsctl does its own access checks
+	if (ap->a_command != HFSIOC_KEY_ROLL)
+#endif
+	{
+		int error = 0;
+		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+			return error;
+		}
+	}
+#endif /* CONFIG_PROTECT */
+
+	switch (ap->a_command) {
+
+	case HFSIOC_GETPATH:
+	{
+		struct vnode *file_vp;
+		cnid_t  cnid;
+		int  outlen;
+		char *bufptr;
+		int error;
+		int flags = 0;
+
+		/* Caller must be owner of file system. */
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES);
+		}
+		/* Target vnode must be file system's root. */
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		bufptr = (char *)ap->a_data;
+		cnid = strtoul(bufptr, NULL, 10);
+		if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
+			flags |= BUILDPATH_VOLUME_RELATIVE; 
+		}
+
+		/* We need to call hfs_vfs_vget to leverage the code that will
+		 * fix the origin list for us if needed, as opposed to calling
+		 * hfs_vget, since we will need the parent for build_path call.
+		 */
+
+		if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
+			return (error);
+		}
+
+		error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
+		vnode_put(file_vp);
+
+		return (error);
+	}
+	
+	case HFSIOC_SET_MAX_DEFRAG_SIZE:
+	{
+		int error = 0;		/* Assume success */
+		u_int32_t maxsize = 0;
+
+		if (vnode_vfsisrdonly(vp)) {
+			return (EROFS);
+		}
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (!kauth_cred_issuser(cred)) {
+			return (EACCES); /* must be root */
+		}
+		
+		maxsize = *(u_int32_t *)ap->a_data;
+		
+		hfs_lock_mount(hfsmp);
+		if (maxsize > HFS_MAX_DEFRAG_SIZE) {
+			error = EINVAL;
+		}
+		else {
+			hfsmp->hfs_defrag_max = maxsize;
+		}
+		hfs_unlock_mount(hfsmp);
+
+		return (error);
+	}
+
+	case HFSIOC_FORCE_ENABLE_DEFRAG:
+	{
+		int error = 0;		/* Assume success */
+		u_int32_t do_enable = 0;
+
+		if (vnode_vfsisrdonly(vp)) {
+			return (EROFS);
+		}
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (!kauth_cred_issuser(cred)) {
+			return (EACCES); /* must be root */
+		}
+		
+		do_enable = *(u_int32_t *)ap->a_data;
+		
+		hfs_lock_mount(hfsmp);
+		if (do_enable != 0) {	
+			hfsmp->hfs_defrag_nowait = 1;
+		}	
+		else {
+			error = EINVAL;
+		}
+
+		hfs_unlock_mount(hfsmp);
+
+		return (error);
+	}
+
+
+	case HFSIOC_TRANSFER_DOCUMENT_ID:
+	{
+		struct cnode *cp = NULL;
+		int error;
+		u_int32_t to_fd = *(u_int32_t *)ap->a_data;
+		struct fileproc *to_fp;
+		struct vnode *to_vp;
+		struct cnode *to_cp;
+
+		cp = VTOC(vp);
+
+		if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
+			//printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
+			return error;
+		}
+		if ( (error = vnode_getwithref(to_vp)) ) {
+			file_drop(to_fd);
+			return error;
+		}
+
+		if (VTOHFS(to_vp) != hfsmp) {
+			error = EXDEV;
+			goto transfer_cleanup;
+		}
+
+		int need_unlock = 1;
+		to_cp = VTOC(to_vp);
+		error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+		if (error != 0) {
+			//printf("could not lock the pair of cnodes (error %d)\n", error);
+			goto transfer_cleanup;
+		}
+			
+		if (!(cp->c_bsdflags & UF_TRACKED)) {
+			error = EINVAL;
+		} else if (to_cp->c_bsdflags & UF_TRACKED) {
+			//
+			// if the destination is already tracked, return an error
+			// as otherwise it's a silent deletion of the target's
+			// document-id
+			//
+			error = EEXIST;
+		} else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+			//
+			// we can use the FndrExtendedFileInfo because the doc-id is the first
+			// thing in both it and the ExtendedDirInfo struct which is fixed in
+			// format and can not change layout
+			//
+			struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
+			struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
+
+			if (f_extinfo->document_id == 0) {
+				uint32_t new_id;
+
+				hfs_unlockpair(cp, to_cp);  // have to unlock to be able to get a new-id
+				
+				if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
+					//
+					// re-lock the pair now that we have the document-id
+					//
+					hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
+					f_extinfo->document_id = new_id;
+				} else {
+					goto transfer_cleanup;
+				}
+			}
+					
+			to_extinfo->document_id = f_extinfo->document_id;
+			f_extinfo->document_id = 0;
+			//printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
+
+			// make sure the destination is also UF_TRACKED
+			to_cp->c_bsdflags |= UF_TRACKED;
+			cp->c_bsdflags &= ~UF_TRACKED;
+
+			// mark the cnodes dirty
+			cp->c_flag |= C_MODIFIED;
+			to_cp->c_flag |= C_MODIFIED;
+
+			int lockflags;
+			if ((error = hfs_start_transaction(hfsmp)) == 0) {
+
+				lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+				(void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+				(void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
+
+				hfs_systemfile_unlock (hfsmp, lockflags);
+				(void) hfs_end_transaction(hfsmp);
+			}
+
+			add_fsevent(FSE_DOCID_CHANGED, context,
+				    FSE_ARG_DEV,   hfsmp->hfs_raw_dev,
+				    FSE_ARG_INO,   (ino64_t)cp->c_fileid,       // src inode #
+				    FSE_ARG_INO,   (ino64_t)to_cp->c_fileid,    // dst inode #
+				    FSE_ARG_INT32, to_extinfo->document_id,
+				    FSE_ARG_DONE);
+
+			hfs_unlockpair(cp, to_cp);    // unlock this so we can send the fsevents
+			need_unlock = 0;
+
+			if (need_fsevent(FSE_STAT_CHANGED, vp)) {
+				add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+			}
+			if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
+				add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
+			}
+		}
+		
+		if (need_unlock) {
+			hfs_unlockpair(cp, to_cp);
+		}
+
+	transfer_cleanup:
+		vnode_put(to_vp);
+		file_drop(to_fd);
+
+		return error;
+	}
+
+
+
+	case HFSIOC_PREV_LINK:
+	case HFSIOC_NEXT_LINK:
+	{
+		cnid_t linkfileid;
+		cnid_t nextlinkid;
+		cnid_t prevlinkid;
+		int error;
+
+		/* Caller must be owner of file system. */
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES);
+		}
+		/* Target vnode must be file system's root. */
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		linkfileid = *(cnid_t *)ap->a_data;
+		if (linkfileid < kHFSFirstUserCatalogNodeID) {
+			return (EINVAL);
+		}
+		if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+			return (error);
+		}
+		if (ap->a_command == HFSIOC_NEXT_LINK) {
+			*(cnid_t *)ap->a_data = nextlinkid;
+		} else {
+			*(cnid_t *)ap->a_data = prevlinkid;
+		}
+		return (0);
+	}
+
+	case HFSIOC_RESIZE_PROGRESS: {
+
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES); /* must be owner of file system */
+		}
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		/* file system must not be mounted read-only */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+		return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
+	}
+
+	case HFSIOC_RESIZE_VOLUME: {
+		u_int64_t newsize;
+		u_int64_t cursize;
+		int ret;
+
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES); /* must be owner of file system */
+		}
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		
+		/* filesystem must not be mounted read only */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		newsize = *(u_int64_t *)ap->a_data;
+		cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+
+		if (newsize == cursize) {
+			return (0);
+		}
+		IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
+		if (newsize > cursize) {
+			ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
+		} else {
+			ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
+		}
+		IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
+		return (ret);
+	}
+	case HFSIOC_CHANGE_NEXT_ALLOCATION: {
+		int error = 0;		/* Assume success */
+		u_int32_t location;
+
+		if (vnode_vfsisrdonly(vp)) {
+			return (EROFS);
+		}
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES); /* must be owner of file system */
+		}
+		if (!vnode_isvroot(vp)) {
+			return (EINVAL);
+		}
+		hfs_lock_mount(hfsmp);
+		location = *(u_int32_t *)ap->a_data;
+		if ((location >= hfsmp->allocLimit) &&
+			(location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
+			error = EINVAL;
+			goto fail_change_next_allocation;
+		}
+		/* Return previous value. */
+		*(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
+		if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
+			/* On magic value for location, set nextAllocation to next block
+			 * after metadata zone and set flag in mount structure to indicate 
+			 * that nextAllocation should not be updated again.
+			 */
+			if (hfsmp->hfs_metazone_end != 0) {
+				HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+			}
+			hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; 
+		} else {
+			hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; 
+			HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
+		}
+		MarkVCBDirty(hfsmp);
+fail_change_next_allocation:
+		hfs_unlock_mount(hfsmp);
+		return (error);
+	}
+
+#if HFS_SPARSE_DEV
+	case HFSIOC_SETBACKINGSTOREINFO: {
+		struct vnode * di_vp;
+		struct hfs_backingstoreinfo *bsdata;
+		int error = 0;
+		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			return (EALREADY);
+		}
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES); /* must be owner of file system */
+		}
+		bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
+		if (bsdata == NULL) {
+			return (EINVAL);
+		}
+		if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
+			return (error);
+		}
+		if ((error = vnode_getwithref(di_vp))) {
+			file_drop(bsdata->backingfd);
+			return(error);
+		}
+
+		if (vnode_mount(vp) == vnode_mount(di_vp)) {
+			(void)vnode_put(di_vp);
+			file_drop(bsdata->backingfd);
+			return (EINVAL);
+		}
+
+		// Dropped in unmount
+		vnode_ref(di_vp);
+
+		hfs_lock_mount(hfsmp);
+		hfsmp->hfs_backingvp = di_vp;
+		hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+		hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
+		hfs_unlock_mount(hfsmp);
+
+		/* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
+
+		/*
+		 * If the sparse image is on a sparse image file (as opposed to a sparse
+		 * bundle), then we may need to limit the free space to the maximum size
+		 * of a file on that volume.  So we query (using pathconf), and if we get
+		 * a meaningful result, we cache the number of blocks for later use in
+		 * hfs_freeblks().
+		 */
+		hfsmp->hfs_backingfs_maxblocks = 0;
+		if (vnode_vtype(di_vp) == VREG) {
+			int terr;
+			int hostbits;
+			terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
+			if (terr == 0 && hostbits != 0 && hostbits < 64) {
+				u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
+				
+				hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
+			}
+		}
+				
+		/* The free extent cache is managed differently for sparse devices.  
+		 * There is a window between which the volume is mounted and the 
+		 * device is marked as sparse, so the free extent cache for this 
+		 * volume is currently initialized as normal volume (sorted by block 
+		 * count).  Reset the cache so that it will be rebuilt again 
+		 * for sparse device (sorted by start block).
+		 */
+		ResetVCBFreeExtCache(hfsmp);
+
+		(void)vnode_put(di_vp);
+		file_drop(bsdata->backingfd);
+		return (0);
+	}
+
+	case HFSIOC_CLRBACKINGSTOREINFO: {
+		struct vnode * tmpvp;
+
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES); /* must be owner of file system */
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+		if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
+		    hfsmp->hfs_backingvp) {
+
+			hfs_lock_mount(hfsmp);
+			hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+			tmpvp = hfsmp->hfs_backingvp;
+			hfsmp->hfs_backingvp = NULLVP;
+			hfsmp->hfs_sparsebandblks = 0;
+			hfs_unlock_mount(hfsmp);
+
+			vnode_rele(tmpvp);
+		}
+		return (0);
+	}
+#endif /* HFS_SPARSE_DEV */
+
+	/* Change the next CNID stored in the VH */
+	case HFSIOC_CHANGE_NEXTCNID: {
+		int error = 0;		/* Assume success */
+		u_int32_t fileid;
+		int wraparound = 0;
+		int lockflags = 0;
+
+		if (vnode_vfsisrdonly(vp)) {
+			return (EROFS);
+		}
+		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
+		if (suser(cred, NULL) &&
+			kauth_cred_getuid(cred) != vfsp->f_owner) {
+			return (EACCES); /* must be owner of file system */
+		}
+		
+		fileid = *(u_int32_t *)ap->a_data;
+
+		/* Must have catalog lock excl. to advance the CNID pointer */
+		lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
+
+		hfs_lock_mount(hfsmp);
+
+		/* If it is less than the current next CNID, force the wraparound bit to be set */
+		if (fileid < hfsmp->vcbNxtCNID) {
+			wraparound=1;
+		}
+
+		/* Return previous value. */
+		*(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
+
+		hfsmp->vcbNxtCNID = fileid;
+
+		if (wraparound) {
+			hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
+		}
+		
+		MarkVCBDirty(hfsmp);
+		hfs_unlock_mount(hfsmp);
+		hfs_systemfile_unlock (hfsmp, lockflags);
+
+		return (error);
+	}
+	
+	case F_FREEZE_FS: {
+		struct mount *mp;
+ 
+		mp = vnode_mount(vp);
+		hfsmp = VFSTOHFS(mp);
+
+		if (!(hfsmp->jnl))
+			return (ENOTSUP);
+
+		vfsp = vfs_statfs(mp);
+	
+		if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+			!kauth_cred_issuser(cred))
+			return (EACCES);
+
+		return hfs_freeze(hfsmp);
+	}
+
+	case F_THAW_FS: {
+		vfsp = vfs_statfs(vnode_mount(vp));
+		if (kauth_cred_getuid(cred) != vfsp->f_owner &&
+			!kauth_cred_issuser(cred))
+			return (EACCES);
+
+		return hfs_thaw(hfsmp, current_proc());
+	}
+
+	case HFSIOC_EXT_BULKACCESS32:
+	case HFSIOC_EXT_BULKACCESS64: {
+	    int size;
+#if CONFIG_HFS_STD
+	    if (hfsmp->hfs_flags & HFS_STANDARD) {
+			return EINVAL;
+	    }
+#endif
+
+	    if (is64bit) {
+		size = sizeof(struct user64_ext_access_t);
+	    } else {
+		size = sizeof(struct user32_ext_access_t);
+	    }
+	    
+	    return do_bulk_access_check(hfsmp, vp, ap, size, context);
+	} 
+
+	case HFSIOC_SET_XATTREXTENTS_STATE: {
+		int state;
+
+		if (ap->a_data == NULL) {
+			return (EINVAL);
+		}
+
+		state = *(int *)ap->a_data;
+		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+		/* Super-user can enable or disable extent-based extended 
+		 * attribute support on a volume 
+		 * Note: Starting Mac OS X 10.7, extent-based extended attributes
+		 * are enabled by default, so any change will be transient only 
+		 * till the volume is remounted.
+		 */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return (EPERM);
+		}
+		if (state == 0 || state == 1)
+			return hfs_set_volxattr(hfsmp, HFSIOC_SET_XATTREXTENTS_STATE, state);
+		else
+			return (EINVAL);	
+	}
+
+	case F_SETSTATICCONTENT: {
+		int error;
+		int enable_static = 0;
+		struct cnode *cp = NULL;
+		/* 
+		 * lock the cnode, decorate the cnode flag, and bail out.
+		 * VFS should have already authenticated the caller for us.
+		 */
+
+		if (ap->a_data) {
+			/* 
+			 * Note that even though ap->a_data is of type caddr_t,
+			 * the fcntl layer at the syscall handler will pass in NULL
+			 * or 1 depending on what the argument supplied to the fcntl
+			 * was.  So it is in fact correct to check the ap->a_data 
+			 * argument for zero or non-zero value when deciding whether or not
+			 * to enable the static bit in the cnode.
+			 */
+			enable_static = 1;
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return EROFS;
+		}
+		cp = VTOC(vp);
+
+		error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			if (enable_static) {
+				cp->c_flag |= C_SSD_STATIC;
+			}
+			else {
+				cp->c_flag &= ~C_SSD_STATIC;
+			}
+			hfs_unlock (cp);
+		}
+		return error;
+	}
+
+	case F_SET_GREEDY_MODE: {
+		int error;
+		int enable_greedy_mode = 0;
+		struct cnode *cp = NULL;
+		/* 
+		 * lock the cnode, decorate the cnode flag, and bail out.
+		 * VFS should have already authenticated the caller for us.
+		 */
+
+		if (ap->a_data) {
+			/* 
+			 * Note that even though ap->a_data is of type caddr_t,
+			 * the fcntl layer at the syscall handler will pass in NULL
+			 * or 1 depending on what the argument supplied to the fcntl
+			 * was.  So it is in fact correct to check the ap->a_data 
+			 * argument for zero or non-zero value when deciding whether or not
+			 * to enable the greedy mode bit in the cnode.
+			 */
+			enable_greedy_mode = 1;
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return EROFS;
+		}
+		cp = VTOC(vp);
+
+		error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			if (enable_greedy_mode) {
+				cp->c_flag |= C_SSD_GREEDY_MODE;
+			}
+			else {
+				cp->c_flag &= ~C_SSD_GREEDY_MODE;
+			}
+			hfs_unlock (cp);
+		}
+		return error;
+	}
+
+	case F_SETIOTYPE: {
+		int error;
+		uint32_t iotypeflag = 0;
+		
+		struct cnode *cp = NULL;
+		/* 
+		 * lock the cnode, decorate the cnode flag, and bail out.
+		 * VFS should have already authenticated the caller for us.
+		 */
+
+		if (ap->a_data == NULL) {
+			return EINVAL;
+		}
+
+		/* 
+		 * Note that even though ap->a_data is of type caddr_t, we
+		 * can only use 32 bits of flag values.
+		 */
+		iotypeflag = (uint32_t) ap->a_data;
+		switch (iotypeflag) {
+			case F_IOTYPE_ISOCHRONOUS:
+				break;
+			default:
+				return EINVAL;
+		}
+
+
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return EROFS;
+		}
+		cp = VTOC(vp);
+
+		error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			switch (iotypeflag) {
+				case F_IOTYPE_ISOCHRONOUS:
+					cp->c_flag |= C_IO_ISOCHRONOUS;
+					break;
+				default:
+					break;
+			}
+			hfs_unlock (cp);
+		}
+		return error;
+	}
+
+	case F_MAKECOMPRESSED: {
+		int error = 0;
+		uint32_t gen_counter;
+		struct cnode *cp = NULL;
+		int reset_decmp = 0;
+
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return EROFS;
+		}
+
+		/* 
+		 * acquire & lock the cnode.
+		 * VFS should have already authenticated the caller for us.
+		 */
+
+		if (ap->a_data) {
+			/* 
+			 * Cast the pointer into a uint32_t so we can extract the 
+			 * supplied generation counter.
+			 */
+			gen_counter = *((uint32_t*)ap->a_data);
+		}
+		else {
+			return EINVAL;
+		}
+
+#if HFS_COMPRESSION
+		cp = VTOC(vp);
+		/* Grab truncate lock first; we may truncate the file */
+		hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+		error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error) {
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			return error;
+		}
+
+		/* Are there any other usecounts/FDs? */
+		if (vnode_isinuse(vp, 1)) {
+			hfs_unlock(cp);
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			return EBUSY;
+		}
+
+		/* now we have the cnode locked down; Validate arguments */
+		if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
+			/* EINVAL if you are trying to manipulate an IMMUTABLE file */
+			hfs_unlock(cp);
+			hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+			return EINVAL;
+		}
+
+		if ((hfs_get_gencount (cp)) == gen_counter) {
+			/* 
+			 * OK, the gen_counter matched.  Go for it:
+			 * Toggle state bits, truncate file, and suppress mtime update 
+			 */
+			reset_decmp = 1;
+			cp->c_bsdflags |= UF_COMPRESSED;				
+
+			error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
+								 ap->a_context);
+		}
+		else {
+			error = ESTALE;
+		}
+
+		/* Unlock cnode before executing decmpfs ; they may need to get an EA */
+		hfs_unlock(cp);
+
+		/*
+		 * Reset the decmp state while still holding the truncate lock. We need to 
+		 * serialize here against a listxattr on this node which may occur at any 
+		 * time. 
+		 * 
+		 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
+		 * that will still potentially require getting the com.apple.decmpfs EA. If the 
+	 	 * EA is required, then we can't hold the cnode lock, because the getxattr call is
+		 * generic(through VFS), and can't pass along any info telling it that we're already
+		 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
+		 * and trying to fill in the hfs_file_is_compressed info during the callback
+		 * operation, which will result in deadlock against the b-tree node.
+		 * 
+		 * So, to serialize against listxattr (which will grab buf_t meta references on
+		 * the b-tree blocks), we hold the truncate lock as we're manipulating the 
+		 * decmpfs payload. 
+		 */
+		if ((reset_decmp) && (error == 0)) {
+			decmpfs_cnode *dp = VTOCMP (vp);
+			if (dp != NULL) {
+				decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
+			}
+
+			/* Initialize the decmpfs node as needed */
+			(void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
+		}
+
+		hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+
+#endif
+		return error;
+	}
+
+	case F_SETBACKINGSTORE: {
+
+		int error = 0;
+
+		/* 
+		 * See comment in F_SETSTATICCONTENT re: using
+	     * a null check for a_data
+  		 */
+		if (ap->a_data) {
+			error = hfs_set_backingstore (vp, 1);
+		}
+		else {
+			error = hfs_set_backingstore (vp, 0);
+		}		
+
+		return error;
+	}
+
+	case F_GETPATH_MTMINFO: {
+		int error = 0;
+
+		int *data = (int*) ap->a_data;	
+
+		/* Ask if this is a backingstore vnode */
+		error = hfs_is_backingstore (vp, data);
+
+		return error;
+	}
+
+	case F_FULLFSYNC: {
+		int error;
+		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
+			hfs_unlock(VTOC(vp));
+		}
+
+		return error;
+	}
+
+	case F_BARRIERFSYNC: {
+		int error;
+
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
+			hfs_unlock(VTOC(vp));
+		}
+
+		return error;
+	}
+
+	case F_CHKCLEAN: {
+		register struct cnode *cp;
+		int error;
+
+		if (!vnode_isreg(vp))
+			return EINVAL;
+ 
+		error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error == 0) {
+			cp = VTOC(vp);
+			/*
+			 * used by regression test to determine if 
+			 * all the dirty pages (via write) have been cleaned
+			 * after a call to 'fsysnc'.
+			 */
+			error = is_file_clean(vp, VTOF(vp)->ff_size);
+			hfs_unlock(cp);
+		}
+		return (error);
+	}
+
+	case F_RDADVISE: {
+		register struct radvisory *ra;
+		struct filefork *fp;
+		int error;
+
+		if (!vnode_isreg(vp))
+			return EINVAL;
+ 
+		ra = (struct radvisory *)(ap->a_data);
+		fp = VTOF(vp);
+
+		/* Protect against a size change. */
+		hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+#if HFS_COMPRESSION
+		if (compressed) {
+			if (uncompressed_size == -1) {
+				/* fetching the uncompressed size failed above, so return the error */
+				error = decmpfs_error;
+			} else if (ra->ra_offset >= uncompressed_size) {
+				error = EFBIG;
+			} else {
+				error = advisory_read(vp, uncompressed_size, ra->ra_offset, ra->ra_count);
+			}
+		} else
+#endif /* HFS_COMPRESSION */
+		if (ra->ra_offset >= fp->ff_size) {
+			error = EFBIG;
+		} else {
+			error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
+		}
+
+		hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
+		return (error);
+	}
+
+	case HFSIOC_GET_VOL_CREATE_TIME_32: {
+		*(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+		return 0;
+	}
+
+	case HFSIOC_GET_VOL_CREATE_TIME_64: {
+		*(user64_time_t *)(ap->a_data) = (user64_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
+		return 0;
+	}
+
+	case SPOTLIGHT_IOC_GET_MOUNT_TIME:
+	    *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
+	    break;
+
+	case SPOTLIGHT_IOC_GET_LAST_MTIME:
+	    *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
+	    break;
+
+	case HFSIOC_GET_VERY_LOW_DISK:
+	    *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
+	    break;
+
+	case HFSIOC_SET_VERY_LOW_DISK:
+	    if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
+		return EINVAL;
+	    }
+
+	    hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
+	    break;
+
+	case HFSIOC_GET_LOW_DISK:
+	    *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
+	    break;
+
+	case HFSIOC_SET_LOW_DISK:
+	    if (   *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+		|| *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
+
+		return EINVAL;
+	    }
+
+	    hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
+	    break;
+
+	/* The following two fsctls were ported from apfs. */
+	case APFSIOC_GET_NEAR_LOW_DISK:
+		*(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_nearwarninglimit;
+		break;
+
+	case APFSIOC_SET_NEAR_LOW_DISK:
+		if (   *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
+		|| *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+			return EINVAL;
+		}
+
+		hfsmp->hfs_freespace_notify_nearwarninglimit = *(uint32_t *)ap->a_data;
+		break;
+
+	case HFSIOC_GET_DESIRED_DISK:
+	    *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
+	    break;
+
+	case HFSIOC_SET_DESIRED_DISK:
+	    if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
+		return EINVAL;
+	    }
+
+	    hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
+	    break;
+
+	case HFSIOC_VOLUME_STATUS:
+	    *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
+	    break;
+
+	case HFS_SET_BOOT_INFO:
+		if (!vnode_isvroot(vp))
+			return(EINVAL);
+		if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
+			return(EACCES);	/* must be superuser or owner of filesystem */
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		hfs_lock_mount (hfsmp);
+		bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
+		/* Null out the cached UUID, to be safe */
+		uuid_clear (hfsmp->hfs_full_uuid);
+		hfs_unlock_mount (hfsmp);
+		(void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+		break;
+		
+	case HFS_GET_BOOT_INFO:
+		if (!vnode_isvroot(vp))
+			return(EINVAL);
+		hfs_lock_mount (hfsmp);
+		bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
+		hfs_unlock_mount(hfsmp);
+		break;
+
+	/* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
+	case HFSIOC_MARK_BOOT_CORRUPT:
+		/* Mark the boot volume corrupt by setting 
+		 * kHFSVolumeInconsistentBit in the volume header.  This will 
+		 * force fsck_hfs on next mount.
+		 */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+			
+		/* Allowed only on the root vnode of the boot volume */
+		if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || 
+		    !vnode_isvroot(vp)) {
+			return EINVAL;
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+		printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
+		hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
+		break;
+
+	case HFSIOC_GET_JOURNAL_INFO:
+		jip = (struct hfs_journal_info*)ap->a_data;
+
+		if (vp == NULLVP)
+		        return EINVAL;
+
+	    if (hfsmp->jnl == NULL) {
+			jnl_start = 0;
+			jnl_size  = 0;
+	    } else {
+			jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
+			jnl_size  = hfsmp->jnl_size;
+	    }
+
+		jip->jstart = jnl_start;
+		jip->jsize = jnl_size;
+		break;
+
+	case HFSIOC_SET_ALWAYS_ZEROFILL: {
+	    struct cnode *cp = VTOC(vp);
+
+	    if (*(int *)ap->a_data) {
+		cp->c_flag |= C_ALWAYS_ZEROFILL;
+	    } else {
+		cp->c_flag &= ~C_ALWAYS_ZEROFILL;
+	    }
+	    break;
+	}    
+
+	/* case HFS_DISABLE_METAZONE: _IO are the same */
+	case HFSIOC_DISABLE_METAZONE: {
+		/* Only root can disable metadata zone */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+		/* Disable metadata zone now */
+		(void) hfs_metadatazone_init(hfsmp, true);
+		printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+		break;
+	}
+
+
+	case HFSIOC_FSINFO_METADATA_BLOCKS: {
+		int error;
+		struct hfsinfo_metadata *hinfo;
+
+		hinfo = (struct hfsinfo_metadata *)ap->a_data;
+
+		/* Get information about number of metadata blocks */
+		error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
+		if (error) {
+			return error;
+		}
+
+		break;
+	}
+
+	case HFSIOC_GET_FSINFO: {
+		hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
+
+		/* Only root is allowed to get fsinfo */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+
+		/*
+		 * Make sure that the caller's version number matches with
+		 * the kernel's version number.  This will make sure that
+		 * if the structures being read/written into are changed
+		 * by the kernel, the caller will not read incorrect data.
+		 *
+		 * The first three fields --- request_type, version and
+		 * flags are same for all the hfs_fsinfo structures, so
+		 * we can access the version number by assuming any
+		 * structure for now.
+		 */
+		if (fsinfo->header.version != HFS_FSINFO_VERSION) {
+			return ENOTSUP;
+		}
+
+		/* Make sure that the current file system is not marked inconsistent */
+		if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
+			return EIO;
+		}
+
+		return hfs_get_fsinfo(hfsmp, ap->a_data);
+	}
+
+	case HFSIOC_CS_FREESPACE_TRIM: {
+		int error = 0;
+		int lockflags = 0;
+
+		/* Only root allowed */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+
+		/* 
+		 * This core functionality is similar to hfs_scan_blocks().  
+		 * The main difference is that hfs_scan_blocks() is called 
+		 * as part of mount where we are assured that the journal is 
+		 * empty to start with.  This fcntl() can be called on a 
+		 * mounted volume, therefore it has to flush the content of 
+		 * the journal as well as ensure the state of summary table. 
+		 * 
+		 * This fcntl scans over the entire allocation bitmap,
+		 * creates list of all the free blocks, and issues TRIM 
+		 * down to the underlying device.  This can take long time 
+		 * as it can generate up to 512MB of read I/O.
+		 */
+
+		if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
+			error = hfs_init_summary(hfsmp);
+			if (error) {
+				printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
+				return error;
+			}
+		}
+
+		/* 
+		 * The journal maintains list of recently deallocated blocks to 
+		 * issue DKIOCUNMAPs when the corresponding journal transaction is 
+		 * flushed to the disk.  To avoid any race conditions, we only 
+		 * want one active trim list and only one thread issuing DKIOCUNMAPs.
+		 * Therefore we make sure that the journal trim list is sync'ed, 
+		 * empty, and not modifiable for the duration of our scan.
+		 * 
+		 * Take the journal lock before flushing the journal to the disk. 
+		 * We will keep on holding the journal lock till we don't get the 
+		 * bitmap lock to make sure that no new journal transactions can 
+		 * start.  This will make sure that the journal trim list is not 
+		 * modified after the journal flush and before getting bitmap lock.
+		 * We can release the journal lock after we acquire the bitmap 
+		 * lock as it will prevent any further block deallocations.
+		 */
+		hfs_journal_lock(hfsmp);
+
+		/* Flush the journal and wait for all I/Os to finish up */
+		error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+		if (error) {
+			hfs_journal_unlock(hfsmp);
+			return error;
+		}
+
+		/* Take bitmap lock to ensure it is not being modified */
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+		/* Release the journal lock */
+		hfs_journal_unlock(hfsmp);
+
+		/* 
+		 * ScanUnmapBlocks reads the bitmap in large block size 
+		 * (up to 1MB) unlike the runtime which reads the bitmap 
+		 * in the 4K block size.  This can cause buf_t collisions 
+		 * and potential data corruption.  To avoid this, we 
+		 * invalidate all the existing buffers associated with 
+		 * the bitmap vnode before scanning it.
+		 *
+		 * Note: ScanUnmapBlock() cleans up all the buffers 
+		 * after itself, so there won't be any large buffers left 
+		 * for us to clean up after it returns.
+		 */
+		error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+		if (error) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			return error;
+		}
+
+		/* Traverse bitmap and issue DKIOCUNMAPs */
+		error = ScanUnmapBlocks(hfsmp);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			return error;
+		}
+
+		break;
+	}
+
+	case HFSIOC_SET_HOTFILE_STATE: {
+		int error;
+		struct cnode *cp = VTOC(vp);
+		uint32_t hf_state = *((uint32_t*)ap->a_data);
+		uint32_t num_unpinned = 0;
+		
+		error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error) {
+			return error;
+		}
+
+		// printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
+		if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
+			vnode_setfastdevicecandidate(vp);
+
+			cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
+			cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
+			cp->c_flag |= C_MODIFIED;
+		} else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
+			vnode_clearfastdevicecandidate(vp);
+			hfs_removehotfile(vp);
+
+			if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
+				hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned);
+			}
+				
+			if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
+				cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
+			}
+			cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
+			cp->c_flag |= C_MODIFIED;
+
+		} else {
+			error = EINVAL;
+		}
+
+		if (num_unpinned != 0) {
+			lck_mtx_lock(&hfsmp->hfc_mutex);
+			hfsmp->hfs_hotfile_freeblks += num_unpinned;
+			lck_mtx_unlock(&hfsmp->hfc_mutex);
+		}
+
+		hfs_unlock(cp);
+		return error;
+	}
+
+	case HFSIOC_REPIN_HOTFILE_STATE: {
+		int error=0;
+		uint32_t repin_what = *((uint32_t*)ap->a_data);
+
+		/* Only root allowed */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return EACCES;
+		}
+
+		if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
+			// this system is neither regular Fusion or Cooperative Fusion
+			// so this fsctl makes no sense.
+			return EINVAL;
+		}
+
+		//
+		// After a converting a CoreStorage volume to be encrypted, the
+		// extents could have moved around underneath us.  This call
+		// allows corestoraged to re-pin everything that should be
+		// pinned (it would happen on the next reboot too but that could
+		// be a long time away).
+		//
+		if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
+			hfs_pin_fs_metadata(hfsmp);
+		}
+		if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
+			hfs_repin_hotfiles(hfsmp);
+		}
+		if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
+			//XXX Swapfiles (marked SWAP_PINNED) may have moved too.
+			//XXX Do we care? They have a more transient/dynamic nature/lifetime.
+		}
+
+		return error;
+	}
+
+#if HFS_CONFIG_KEY_ROLL
+
+	case HFSIOC_KEY_ROLL: {
+		if (!kauth_cred_issuser(kauth_cred_get()))
+			return EACCES;
+
+		hfs_key_roll_args_t *args = (hfs_key_roll_args_t *)ap->a_data;
+
+		return hfs_key_roll_op(ap->a_context, ap->a_vp, args);
+	}
+
+	case HFSIOC_GET_KEY_AUTO_ROLL: {
+		if (!kauth_cred_issuser(kauth_cred_get()))
+			return EACCES;
+
+		hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
+		if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
+			return ENOTSUP;
+		args->flags = (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
+					   ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION : 0);
+		args->min_key_os_version = hfsmp->hfs_auto_roll_min_key_os_version;
+		args->max_key_os_version = hfsmp->hfs_auto_roll_max_key_os_version;
+		break;
+	}
+
+	case HFSIOC_SET_KEY_AUTO_ROLL: {
+		if (!kauth_cred_issuser(kauth_cred_get()))
+			return EACCES;
+
+		hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
+		if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
+			return ENOTSUP;
+		return cp_set_auto_roll(hfsmp, args);
+	}
+
+#endif // HFS_CONFIG_KEY_ROLL
+
+#if CONFIG_PROTECT
+	case F_TRANSCODEKEY:
+		/*
+		 * This API is only supported when called via kernel so
+		 * a_fflag must be set to 1 (it's not possible to get here
+		 * with it set to 1 via fsctl).
+		 */
+		if (ap->a_fflag != 1)
+			return ENOTTY;
+		return cp_vnode_transcode(vp, (cp_key_t *)ap->a_data);
+
+	case F_GETPROTECTIONLEVEL:
+		return cp_get_root_major_vers (vp, (uint32_t *)ap->a_data);
+
+	case F_GETDEFAULTPROTLEVEL:
+		return cp_get_default_level(vp, (uint32_t *)ap->a_data);
+#endif // CONFIG_PROTECT
+
+	case FIOPINSWAP:
+		return hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT | HFS_DATALESS_PIN,
+							 NULL);
+
+	case FSIOC_CAS_BSDFLAGS: {
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+#if 0
+		struct fsioc_cas_bsdflags *cas = (void *)ap->a_data;
+		struct cnode *cp = VTOC(vp);
+		u_int32_t document_id = 0;
+		int decmpfs_reset_state = 0;
+		int error;
+
+		/* Don't allow modification of the journal. */
+		if (hfs_is_journal_file(hfsmp, cp)) {
+			return (EPERM);
+		}
+
+		if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			return (error);
+		}
+
+		cas->actual_flags = cp->c_bsdflags;
+		if (cas->actual_flags != cas->expected_flags) {
+			hfs_unlock(cp);
+			return (0);
+		}
+
+		//
+		// Check if we'll need a document_id.  If so, we need to drop the lock
+		// (to avoid any possible deadlock with the root vnode which has to get
+		// locked to get the document id), generate the document_id, re-acquire
+		// the lock, and perform the CAS check again.  We do it in this sequence
+		// in order to avoid throwing away document_ids in the case where the
+		// CAS check fails.  Note that it can still happen, but by performing
+		// the check first, hopefully we can reduce the ocurrence.
+		//
+		if ((cas->new_flags & UF_TRACKED) && !(VTOC(vp)->c_bsdflags & UF_TRACKED)) {
+			struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&(VTOC(vp)->c_attr.ca_finderinfo) + 16);
+			//
+			// If the document_id is not set, get a new one.  It will be set
+			// on the file down below once we hold the cnode lock.
+			//
+			if (fip->document_id == 0) {
+				//
+				// Drat, we have to generate one.  Unlock the cnode, do the
+				// deed, re-lock the cnode, and then to the CAS check again
+				// to see if we lost the race.
+				//
+				hfs_unlock(cp);
+				if (hfs_generate_document_id(hfsmp, &document_id) != 0) {
+					document_id = 0;
+				}
+				if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+					return (error);
+				}
+				cas->actual_flags = cp->c_bsdflags;
+				if (cas->actual_flags != cas->expected_flags) {
+					hfs_unlock(cp);
+					return (0);
+				}
+			}
+		}
+
+		bool setting_compression = false;
+
+		if (!(cas->actual_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED))
+			setting_compression = true;
+
+		if (setting_compression) {
+			hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			if (VTOF(vp)->ff_size) {
+				// hfs_truncate will deal with the cnode lock
+				error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
+			}
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		}
+
+		if (!error)
+			error = hfs_set_bsd_flags(hfsmp, cp, cas->new_flags,
+								  document_id, ap->a_context,
+								  &decmpfs_reset_state);
+		if (error == 0) {
+			error = hfs_update(vp, 0);
+		}
+		hfs_unlock(cp);
+		if (error) {
+			return (error);
+		}
+
+#if HFS_COMPRESSION
+		if (decmpfs_reset_state) {
+			/*
+			 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
+			 * but don't do it while holding the hfs cnode lock
+			 */
+			decmpfs_cnode *dp = VTOCMP(vp);
+			if (!dp) {
+				/*
+				 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
+				 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
+				 * on this file if it's locked
+				 */
+				dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp));
+				if (!dp) {
+					/* failed to allocate a decmpfs_cnode */
+					return ENOMEM; /* what should this be? */
+				}
+			}
+			decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
+		}
+#endif
+		break;
+#endif
+		return ENOTSUP;
+	}
+
+	default:
+		return (ENOTTY);
+	}
+
+	return 0;
+}
+
+/*
+ * select
+ */
+int
+hfs_vnop_select(__unused struct vnop_select_args *ap)
+/*
+	struct vnop_select_args {
+		vnode_t a_vp;
+		int  a_which;
+		int  a_fflags;
+		void *a_wql;
+		vfs_context_t a_context;
+	};
+*/
+{
+	/*
+	 * We should really check to see if I/O is possible.
+	 */
+	return (1);
+}
+
+/*
+ * Converts a logical block number to a physical block, and optionally returns
+ * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
+ * The physical block number is based on the device block size, currently its 512.
+ * The block run is returned in logical blocks, and is the REMAINING amount of blocks
+ */
+int
+hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
+{
+	struct filefork *fp = VTOF(vp);
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	int  retval = E_NONE;
+	u_int32_t  logBlockSize;
+	size_t  bytesContAvail = 0;
+	off_t  blockposition;
+	int lockExtBtree;
+	int lockflags = 0;
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (vpp != NULL)
+		*vpp = hfsmp->hfs_devvp;
+	if (bnp == NULL)
+		return (0);
+
+	logBlockSize = GetLogicalBlockSize(vp);
+	blockposition = (off_t)bn * logBlockSize;
+
+	lockExtBtree = overflow_extents(fp);
+
+	if (lockExtBtree)
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+
+	retval = MacToVFSError(
+                            MapFileBlockC (HFSTOVCB(hfsmp),
+                                            (FCB*)fp,
+                                            MAXPHYSIO,
+                                            blockposition,
+                                            bnp,
+                                            &bytesContAvail));
+
+	if (lockExtBtree)
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (retval == E_NONE) {
+		/* Figure out how many read ahead blocks there are */
+		if (runp != NULL) {
+			if (can_cluster(logBlockSize)) {
+				/* Make sure this result never goes negative: */
+				*runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
+			} else {
+				*runp = 0;
+			}
+		}
+	}
+	return (retval);
+}
+
+/*
+ * Convert logical block number to file offset.
+ */
+int
+hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
+/*
+	struct vnop_blktooff_args {
+		vnode_t a_vp;
+		daddr64_t a_lblkno;  
+		off_t *a_offset;
+	};
+*/
+{	
+	if (ap->a_vp == NULL)
+		return (EINVAL);
+	*ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
+
+	return(0);
+}
+
+/*
+ * Convert file offset to logical block number.
+ */
+int
+hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
+/*
+	struct vnop_offtoblk_args {
+		vnode_t a_vp;
+		off_t a_offset;    
+		daddr64_t *a_lblkno;
+	};
+*/
+{	
+	if (ap->a_vp == NULL)
+		return (EINVAL);
+	*ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
+
+	return(0);
+}
+
+/*
+ * Map file offset to physical block number.
+ *
+ * If this function is called for write operation, and if the file
+ * had virtual blocks allocated (delayed allocation), real blocks
+ * are allocated by calling ExtendFileC().
+ * 
+ * If this function is called for read operation, and if the file
+ * had virtual blocks allocated (delayed allocation), no change 
+ * to the size of file is done, and if required, rangelist is 
+ * searched for mapping.
+ *
+ * System file cnodes are expected to be locked (shared or exclusive).
+ *
+ * -- INVALID RANGES --
+ *
+ * Invalid ranges are used to keep track of where we have extended a
+ * file, but have not yet written that data to disk.  In the past we
+ * would clear up the invalid ranges as we wrote to those areas, but
+ * before data was actually flushed to disk.  The problem with that
+ * approach is that the data can be left in the cache and is therefore
+ * still not valid on disk.  So now we clear up the ranges here, when
+ * the flags field has VNODE_WRITE set, indicating a write is about to
+ * occur.  This isn't ideal (ideally we want to clear them up when
+ * know the data has been successfully written), but it's the best we
+ * can do.
+ *
+ * For reads, we use the invalid ranges here in block map to indicate
+ * to the caller that the data should be zeroed (a_bpn == -1).  We
+ * have to be careful about what ranges we return to the cluster code.
+ * Currently the cluster code can only handle non-rounded values for
+ * the EOF; it cannot handle funny sized ranges in the middle of the
+ * file (the main problem is that it sends down odd sized I/Os to the
+ * disk).  Our code currently works because whilst the very first
+ * offset and the last offset in the invalid ranges are not aligned,
+ * gaps in the invalid ranges between the first and last, have to be
+ * aligned (because we always write page sized blocks).  For example,
+ * consider this arrangement:
+ *
+ *         +-------------+-----+-------+------+
+ *         |             |XXXXX|       |XXXXXX|
+ *         +-------------+-----+-------+------+
+ *                       a     b       c      d
+ *
+ * This shows two invalid ranges <a, b> and <c, d>.  Whilst a and d
+ * are not necessarily aligned, b and c *must* be.
+ *
+ * Zero-filling occurs in a number of ways:
+ *
+ *   1. When a read occurs and we return with a_bpn == -1.
+ *
+ *   2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
+ *      which will cause us to iterate over the ranges bringing in
+ *      pages that are not present in the cache and zeroing them.  Any
+ *      pages that are already in the cache are left untouched.  Note
+ *      that hfs_fsync does not always flush invalid ranges.
+ *
+ *   3. When we extend a file we zero out from the old EOF to the end
+ *      of the page.  It would be nice if we didn't have to do this if
+ *      the page wasn't present (and could defer it), but because of
+ *      the problem described above, we have to.
+ *
+ * The invalid ranges are also used to restrict the size that we write
+ * out on disk: see hfs_prepare_fork_for_update.
+ *
+ * Note that invalid ranges are ignored when neither the VNODE_READ or
+ * the VNODE_WRITE flag is specified.  This is useful for the
+ * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
+ * just want to know whether blocks are physically allocated or not.
+ */
+int
+hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
+/*
+	struct vnop_blockmap_args {
+		vnode_t a_vp;
+		off_t a_foffset;    
+		size_t a_size;
+		daddr64_t *a_bpn;
+		size_t *a_run;
+		void *a_poff;
+		int a_flags;
+		vfs_context_t a_context;
+	};
+*/
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp;
+	struct hfsmount *hfsmp;
+	size_t bytesContAvail = ap->a_size;
+	int retval = E_NONE;
+	int syslocks = 0;
+	int lockflags = 0;
+	struct rl_entry *invalid_range;
+	enum rl_overlaptype overlaptype;
+	int started_tr = 0;
+	int tooklock = 0;
+
+#if HFS_COMPRESSION
+	if (VNODE_IS_RSRC(vp)) {
+		/* allow blockmaps to the resource fork */
+	} else {
+		if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
+			int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
+			switch(state) {
+				case FILE_IS_COMPRESSED:
+					return ENOTSUP;
+				case FILE_IS_CONVERTING:
+					/* if FILE_IS_CONVERTING, we allow blockmap */
+					break;
+				default:
+					printf("invalid state %d for compressed file\n", state);
+					/* fall through */
+			}
+		}
+	}
+#endif /* HFS_COMPRESSION */
+
+	/* Do not allow blockmap operation on a directory */
+	if (vnode_isdir(vp)) {
+		return (ENOTSUP);
+	}
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_bpn == NULL)
+		return (0);
+
+	hfsmp = VTOHFS(vp);
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+
+	if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
+		if (cp->c_lockowner != current_thread()) {
+			hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+			tooklock = 1;
+		}
+
+		// For reads, check the invalid ranges
+		if (ISSET(ap->a_flags, VNODE_READ)) {
+			if (ap->a_foffset >= fp->ff_size) {
+				retval = ERANGE;
+				goto exit;
+			}
+
+			overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
+								  ap->a_foffset + (off_t)bytesContAvail - 1,
+								  &invalid_range);
+			switch(overlaptype) {
+				case RL_MATCHINGOVERLAP:
+				case RL_OVERLAPCONTAINSRANGE:
+				case RL_OVERLAPSTARTSBEFORE:
+					/* There's no valid block for this byte offset */
+					*ap->a_bpn = (daddr64_t)-1;
+					/* There's no point limiting the amount to be returned
+					 * if the invalid range that was hit extends all the way
+					 * to the EOF (i.e. there's no valid bytes between the
+					 * end of this range and the file's EOF):
+					 */
+					if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+						((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+						bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+					}
+
+					retval = 0;
+					goto exit;
+
+				case RL_OVERLAPISCONTAINED:
+				case RL_OVERLAPENDSAFTER:
+					/* The range of interest hits an invalid block before the end: */
+					if (invalid_range->rl_start == ap->a_foffset) {
+						/* There's actually no valid information to be had starting here: */
+						*ap->a_bpn = (daddr64_t)-1;
+						if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
+							((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
+							bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
+						}
+
+						retval = 0;
+						goto exit;
+					} else {
+						/*
+						 * Sadly, the lower layers don't like us to
+						 * return unaligned ranges, so we skip over
+						 * any invalid ranges here that are less than
+						 * a page: zeroing of those bits is not our
+						 * responsibility (it's dealt with elsewhere).
+						 */
+						do {
+							off_t rounded_start = round_page_64(invalid_range->rl_start);
+							if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
+								break;
+							if (rounded_start < invalid_range->rl_end + 1) {
+								bytesContAvail = rounded_start - ap->a_foffset;
+								break;
+							}
+						} while ((invalid_range = TAILQ_NEXT(invalid_range,
+															 rl_link)));
+					}
+					break;
+
+				case RL_NOOVERLAP:
+					break;
+			} // switch
+		}
+	}
+
+#if CONFIG_PROTECT
+	if (cp->c_cpentry) {
+		const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
+							   ? VNODE_WRITE : VNODE_READ);
+
+		cp_io_params_t io_params;
+		cp_io_params(hfsmp, cp->c_cpentry,
+					 off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
+					 direction, &io_params);
+
+		if (io_params.max_len < (off_t)bytesContAvail)
+			bytesContAvail = io_params.max_len;
+
+		if (io_params.phys_offset != -1) {
+			*ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
+						  / hfsmp->hfs_logical_block_size);
+
+			retval = 0;
+			goto exit;
+		}
+	}
+#endif
+
+retry:
+
+	/* Check virtual blocks only when performing write operation */
+	if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+		if (hfs_start_transaction(hfsmp) != 0) {
+			retval = EINVAL;
+			goto exit;
+		} else {
+			started_tr = 1;
+		}
+		syslocks = SFL_EXTENTS | SFL_BITMAP;
+		
+	} else if (overflow_extents(fp)) {
+		syslocks = SFL_EXTENTS;
+	}
+	
+	if (syslocks)
+		lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
+
+	/*
+	 * Check for any delayed allocations.
+	 */
+	if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
+		int64_t actbytes;
+		u_int32_t loanedBlocks;
+
+		// 
+		// Make sure we have a transaction.  It's possible
+		// that we came in and fp->ff_unallocblocks was zero
+		// but during the time we blocked acquiring the extents
+		// btree, ff_unallocblocks became non-zero and so we
+		// will need to start a transaction.
+		//
+		if (started_tr == 0) {
+			if (syslocks) {
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				syslocks = 0;
+			}
+			goto retry;
+		}
+
+		/*
+		 * Note: ExtendFileC will Release any blocks on loan and
+		 * aquire real blocks.  So we ask to extend by zero bytes
+		 * since ExtendFileC will account for the virtual blocks.
+		 */
+
+		loanedBlocks = fp->ff_unallocblocks;
+		retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
+				     kEFAllMask | kEFNoClumpMask, &actbytes);
+
+		if (retval) {
+			fp->ff_unallocblocks = loanedBlocks;
+			cp->c_blocks += loanedBlocks;
+			fp->ff_blocks += loanedBlocks;
+
+			hfs_lock_mount (hfsmp);
+			hfsmp->loanedBlocks += loanedBlocks;
+			hfs_unlock_mount (hfsmp);
+
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			cp->c_flag |= C_MODIFIED;
+			if (started_tr) {
+				(void) hfs_update(vp, 0);
+				(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+
+				hfs_end_transaction(hfsmp);
+				started_tr = 0;
+			}
+			goto exit;
+		}
+	}
+
+	retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
+	                       ap->a_bpn, &bytesContAvail);
+	if (syslocks) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		syslocks = 0;
+	}
+
+	if (retval) {
+		/* On write, always return error because virtual blocks, if any, 
+		 * should have been allocated in ExtendFileC().  We do not 
+		 * allocate virtual blocks on read, therefore return error 
+		 * only if no virtual blocks are allocated.  Otherwise we search
+		 * rangelist for zero-fills
+		 */
+		if ((MacToVFSError(retval) != ERANGE) ||
+		    (ap->a_flags & VNODE_WRITE) ||
+		    ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
+			goto exit;
+		}
+
+		/* Validate if the start offset is within logical file size */
+		if (ap->a_foffset >= fp->ff_size) {
+			goto exit;
+		}
+
+		/*
+		 * At this point, we have encountered a failure during
+		 * MapFileBlockC that resulted in ERANGE, and we are not
+		 * servicing a write, and there are borrowed blocks.
+		 *
+		 * However, the cluster layer will not call blockmap for
+		 * blocks that are borrowed and in-cache.  We have to assume
+		 * that because we observed ERANGE being emitted from
+		 * MapFileBlockC, this extent range is not valid on-disk.  So
+		 * we treat this as a mapping that needs to be zero-filled
+		 * prior to reading.
+		 */
+
+		if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
+			bytesContAvail = fp->ff_size - ap->a_foffset;
+
+		*ap->a_bpn = (daddr64_t) -1;
+		retval = 0;
+
+		goto exit;
+	}
+
+exit:
+	if (retval == 0) {
+		if (ISSET(ap->a_flags, VNODE_WRITE)) {
+			struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
+
+			// See if we might be overlapping invalid ranges...
+			if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
+				/*
+				 * Mark the file as needing an update if we think the
+				 * on-disk EOF has changed.
+				 */
+				if (ap->a_foffset <= r->rl_start)
+					SET(cp->c_flag, C_MODIFIED);
+
+				/*
+				 * This isn't the ideal place to put this.  Ideally, we
+				 * should do something *after* we have successfully
+				 * written to the range, but that's difficult to do
+				 * because we cannot take locks in the callback.  At
+				 * present, the cluster code will call us with VNODE_WRITE
+				 * set just before it's about to write the data so we know
+				 * that data is about to be written.  If we get an I/O
+				 * error at this point then chances are the metadata
+				 * update to follow will also have an I/O error so the
+				 * risk here is small.
+				 */
+				rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
+						  &fp->ff_invalidranges);
+
+				if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
+					cp->c_flag &= ~C_ZFWANTSYNC;
+					cp->c_zftimeout = 0;
+				}
+			}
+		}
+
+		if (ap->a_run)
+			*ap->a_run = bytesContAvail;
+
+		if (ap->a_poff)
+			*(int *)ap->a_poff = 0;
+	}
+
+	if (started_tr) {
+		hfs_update(vp, TRUE);
+		hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+		hfs_end_transaction(hfsmp);
+		started_tr = 0;
+	}
+
+	if (tooklock)
+		hfs_unlock(cp);
+
+	return (MacToVFSError(retval));
+}
+
+/*
+ * prepare and issue the I/O
+ * buf_strategy knows how to deal
+ * with requests that require 
+ * fragmented I/Os
+ */
+int
+hfs_vnop_strategy(struct vnop_strategy_args *ap)
+{
+	buf_t	bp = ap->a_bp;
+	vnode_t	vp = buf_vnode(bp);
+	int error = 0;
+	
+	/* Mark buffer as containing static data if cnode flag set */
+	if (VTOC(vp)->c_flag & C_SSD_STATIC) {
+		buf_markstatic(bp);
+	}
+	
+	/* Mark buffer as containing static data if cnode flag set */
+	if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
+		bufattr_markgreedymode(buf_attr(bp));
+	}
+
+	/* mark buffer as containing burst mode data if cnode flag set */
+	if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
+		bufattr_markisochronous(buf_attr(bp));
+	}
+	
+#if CONFIG_PROTECT
+	error = cp_handle_strategy(bp);
+
+	if (error)
+		return error;
+#endif
+	
+	error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
+	
+	return error;
+}
+
+int
+do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
+{
+	register struct cnode *cp = VTOC(vp);
+    	struct filefork *fp = VTOF(vp);
+	kauth_cred_t cred = vfs_context_ucred(context);
+	int retval;
+	off_t bytesToAdd;
+	off_t actualBytesAdded;
+	off_t filebytes;
+	u_int32_t fileblocks;
+	int blksize;
+	struct hfsmount *hfsmp;
+	int lockflags;
+	int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
+
+	blksize = VTOVCB(vp)->blockSize;
+	fileblocks = fp->ff_blocks;
+	filebytes = (off_t)fileblocks * (off_t)blksize;
+
+	KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
+		 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+
+	if (length < 0)
+		return (EINVAL);
+
+	/* This should only happen with a corrupt filesystem */
+	if ((off_t)fp->ff_size < 0)
+		return (EINVAL);
+
+	if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
+		return (EFBIG);
+
+	hfsmp = VTOHFS(vp);
+
+	retval = E_NONE;
+
+	/* Files that are changing size are not hot file candidates. */
+	if (hfsmp->hfc_stage == HFC_RECORDING) {
+		fp->ff_bytesread = 0;
+	}
+
+	/* 
+	 * We cannot just check if fp->ff_size == length (as an optimization)
+	 * since there may be extra physical blocks that also need truncation.
+	 */
+#if QUOTA
+	if ((retval = hfs_getinoquota(cp)))
+		return(retval);
+#endif /* QUOTA */
+
+	/*
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of ff_size is 0, length will be at least 1.
+	 */
+	if (length > (off_t)fp->ff_size) {
+#if QUOTA
+		retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
+				   cred, 0);
+		if (retval)
+			goto Err_Exit;
+#endif /* QUOTA */
+		/*
+		 * If we don't have enough physical space then
+		 * we need to extend the physical size.
+		 */
+		if (length > filebytes) {
+			int eflags;
+			u_int32_t blockHint = 0;
+
+			/* All or nothing and don't round up to clumpsize. */
+			eflags = kEFAllMask | kEFNoClumpMask;
+
+			if (cred && (suser(cred, NULL) != 0)) {
+				eflags |= kEFReserveMask;  /* keep a reserve */
+			}
+
+			/*
+			 * Allocate Journal and Quota files in metadata zone.
+			 */
+			if (filebytes == 0 &&
+			    hfsmp->hfs_flags & HFS_METADATA_ZONE &&
+			    hfs_virtualmetafile(cp)) {
+				eflags |= kEFMetadataMask;
+				blockHint = hfsmp->hfs_metazone_start;
+			}
+			if (hfs_start_transaction(hfsmp) != 0) {
+			    retval = EINVAL;
+			    goto Err_Exit;
+			}
+
+			/* Protect extents b-tree and allocation bitmap */
+			lockflags = SFL_BITMAP;
+			if (overflow_extents(fp))
+				lockflags |= SFL_EXTENTS;
+			lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+			/* 
+			 * Keep growing the file as long as the current EOF is
+			 * less than the desired value.
+			 */
+			while ((length > filebytes) && (retval == E_NONE)) {
+				bytesToAdd = length - filebytes;
+				retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
+                                                    (FCB*)fp,
+                                                    bytesToAdd,
+                                                    blockHint,
+                                                    eflags,
+                                                    &actualBytesAdded));
+
+				filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+				if (actualBytesAdded == 0 && retval == E_NONE) {
+					if (length > filebytes)
+						length = filebytes;
+					break;
+				}
+			} /* endwhile */
+
+			hfs_systemfile_unlock(hfsmp, lockflags);
+
+			if (hfsmp->jnl) {
+				hfs_update(vp, 0);
+				hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			}
+
+			hfs_end_transaction(hfsmp);
+
+			if (retval)
+				goto Err_Exit;
+
+			KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
+				(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
+		}
+ 
+		if (ISSET(flags, IO_NOZEROFILL)) {
+			// An optimisation for the hibernation file
+			if (vnode_isswap(vp))
+				rl_remove_all(&fp->ff_invalidranges);
+		} else {
+			if (!vnode_issystem(vp) && retval == E_NONE) {
+				if (length > (off_t)fp->ff_size) {
+					struct timeval tv;
+
+		   			/* Extending the file: time to fill out the current last page w. zeroes? */
+					if (fp->ff_size & PAGE_MASK_64) {
+						/* There might be some valid data at the start of the (current) last page
+						   of the file, so zero out the remainder of that page to ensure the
+						   entire page contains valid data. */
+						hfs_unlock(cp);
+						retval = hfs_zero_eof_page(vp, length);
+						hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+						if (retval) goto Err_Exit;
+					}
+					microuptime(&tv);
+					rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
+					cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
+				}
+			} else {
+					panic("hfs_truncate: invoked on non-UBC object?!");
+			};
+		}
+		if (suppress_times == 0) {
+			cp->c_touch_modtime = TRUE;
+		}
+		fp->ff_size = length;
+
+	} else { /* Shorten the size of the file */
+
+		// An optimisation for the hibernation file
+		if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
+			rl_remove_all(&fp->ff_invalidranges);
+		} else if ((off_t)fp->ff_size > length) {
+			/* Any space previously marked as invalid is now irrelevant: */
+			rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
+		}
+
+		/* 
+		 * Account for any unmapped blocks. Note that the new
+		 * file length can still end up with unmapped blocks.
+		 */
+		if (fp->ff_unallocblocks > 0) {
+			u_int32_t finalblks;
+			u_int32_t loanedBlocks;
+
+			hfs_lock_mount(hfsmp);
+			loanedBlocks = fp->ff_unallocblocks;
+			cp->c_blocks -= loanedBlocks;
+			fp->ff_blocks -= loanedBlocks;
+			fp->ff_unallocblocks = 0;
+
+			hfsmp->loanedBlocks -= loanedBlocks;
+
+			finalblks = (length + blksize - 1) / blksize;
+			if (finalblks > fp->ff_blocks) {
+				/* calculate required unmapped blocks */
+				loanedBlocks = finalblks - fp->ff_blocks;
+				hfsmp->loanedBlocks += loanedBlocks;
+
+				fp->ff_unallocblocks = loanedBlocks;
+				cp->c_blocks += loanedBlocks;
+				fp->ff_blocks += loanedBlocks;
+			}
+			hfs_unlock_mount (hfsmp);
+		}
+
+		off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
+		if (hfs_start_transaction(hfsmp) != 0) {
+			retval = EINVAL;
+			goto Err_Exit;
+		}
+
+		if (fp->ff_unallocblocks == 0) {
+			/* Protect extents b-tree and allocation bitmap */
+			lockflags = SFL_BITMAP;
+			if (overflow_extents(fp))
+				lockflags |= SFL_EXTENTS;
+			lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+			retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 
+												 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
+
+			hfs_systemfile_unlock(hfsmp, lockflags);
+		}
+		if (hfsmp->jnl) {
+			if (retval == 0) {
+				fp->ff_size = length;
+			}
+			hfs_update(vp, 0);
+			hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+		}
+		hfs_end_transaction(hfsmp);
+
+		filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
+		if (retval)
+			goto Err_Exit;
+#if QUOTA
+		/* These are bytesreleased */
+		(void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
+#endif /* QUOTA */
+
+		//
+		// Unlike when growing a file, we adjust the hotfile block count here
+		// instead of deeper down in the block allocation code because we do
+		// not necessarily have a vnode or "fcb" at the time we're deleting
+		// the file and so we wouldn't know if it was hotfile cached or not
+		//
+		hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
+
+
+		/* 
+		 * Only set update flag if the logical length changes & we aren't
+		 * suppressing modtime updates.
+		 */
+		if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
+			cp->c_touch_modtime = TRUE;
+		}
+		fp->ff_size = length;
+	}
+	if (cp->c_mode & (S_ISUID | S_ISGID)) {
+		if (!vfs_context_issuser(context))
+			cp->c_mode &= ~(S_ISUID | S_ISGID);
+	}
+	cp->c_flag |= C_MODIFIED;
+	cp->c_touch_chgtime = TRUE;	/* status changed */
+	if (suppress_times == 0) {
+		cp->c_touch_modtime = TRUE;	/* file data was modified */
+
+		/*
+		 * If we are not suppressing the modtime update, then
+		 * update the gen count as well.
+		 */
+		if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
+			hfs_incr_gencount(cp);
+		}
+	}
+
+	retval = hfs_update(vp, 0);
+	if (retval) {
+		KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
+		     -1, -1, -1, retval, 0);
+	}
+
+Err_Exit:
+
+	KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
+		 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
+
+	return (retval);
+}
+
+/*
+ * Preparation which must be done prior to deleting the catalog record
+ * of a file or directory.  In order to make the on-disk as safe as possible,
+ * we remove the catalog entry before releasing the bitmap blocks and the 
+ * overflow extent records.  However, some work must be done prior to deleting
+ * the catalog record.
+ * 
+ * When calling this function, the cnode must exist both in memory and on-disk.
+ * If there are both resource fork and data fork vnodes, this function should
+ * be called on both.  
+ */
+
+int
+hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
+	
+	struct filefork *fp = VTOF(vp);
+	struct cnode *cp = VTOC(vp);
+#if QUOTA
+	int retval = 0;
+#endif /* QUOTA */
+	
+	/* Cannot truncate an HFS directory! */
+	if (vnode_isdir(vp)) {
+		return (EISDIR);
+	}
+	
+	/* 
+	 * See the comment below in hfs_truncate for why we need to call 
+	 * setsize here.  Essentially we want to avoid pending IO if we 
+	 * already know that the blocks are going to be released here.
+	 * This function is only called when totally removing all storage for a file, so
+	 * we can take a shortcut and immediately setsize (0);
+	 */
+	ubc_setsize(vp, 0);
+	
+	/* This should only happen with a corrupt filesystem */
+	if ((off_t)fp->ff_size < 0)
+		return (EINVAL);
+	
+	/* 
+	 * We cannot just check if fp->ff_size == length (as an optimization)
+	 * since there may be extra physical blocks that also need truncation.
+	 */
+#if QUOTA
+	if ((retval = hfs_getinoquota(cp))) {
+		return(retval);
+	}
+#endif /* QUOTA */
+	
+	/* Wipe out any invalid ranges which have yet to be backed by disk */
+	rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
+	
+	/* 
+	 * Account for any unmapped blocks. Since we're deleting the 
+	 * entire file, we don't have to worry about just shrinking
+	 * to a smaller number of borrowed blocks.
+	 */
+	if (fp->ff_unallocblocks > 0) {
+		u_int32_t loanedBlocks;
+		
+		hfs_lock_mount (hfsmp);
+		loanedBlocks = fp->ff_unallocblocks;
+		cp->c_blocks -= loanedBlocks;
+		fp->ff_blocks -= loanedBlocks;
+		fp->ff_unallocblocks = 0;
+		
+		hfsmp->loanedBlocks -= loanedBlocks;
+		
+		hfs_unlock_mount (hfsmp);
+	}
+	
+	return 0;
+}
+
+
+/*
+ * Special wrapper around calling TruncateFileC.  This function is useable
+ * even when the catalog record does not exist any longer, making it ideal
+ * for use when deleting a file.  The simplification here is that we know 
+ * that we are releasing all blocks.
+ *
+ * Note that this function may be called when there is no vnode backing
+ * the file fork in question.  We may call this from hfs_vnop_inactive
+ * to clear out resource fork data (and may not want to clear out the data 
+ * fork yet).  As a result, we pointer-check both sets of inputs before 
+ * doing anything with them.
+ *
+ * The caller is responsible for saving off a copy of the filefork(s)
+ * embedded within the cnode prior to calling this function.  The pointers
+ * supplied as arguments must be valid even if the cnode is no longer valid.
+ */
+
+int 
+hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 
+					 struct filefork *rsrcfork, u_int32_t fileid) {
+	
+	off_t filebytes;
+	u_int32_t fileblocks;
+	int blksize = 0;
+	int error = 0;
+	int lockflags;
+	
+	blksize = hfsmp->blockSize;
+	
+	/* Data Fork */
+	if (datafork) {
+		off_t prev_filebytes;
+
+		datafork->ff_size = 0;
+
+		fileblocks = datafork->ff_blocks;
+		filebytes = (off_t)fileblocks * (off_t)blksize;
+		prev_filebytes = filebytes;
+		
+		/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+		
+		while (filebytes > 0) {
+			if (filebytes > HFS_BIGFILE_SIZE) {
+				filebytes -= HFS_BIGFILE_SIZE;
+			} else {
+				filebytes = 0;
+			}
+			
+			/* Start a transaction, and wipe out as many blocks as we can in this iteration */
+			if (hfs_start_transaction(hfsmp) != 0) {
+				error = EINVAL;
+				break;
+			}
+			
+			if (datafork->ff_unallocblocks == 0) {
+				/* Protect extents b-tree and allocation bitmap */
+				lockflags = SFL_BITMAP;
+				if (overflow_extents(datafork))
+					lockflags |= SFL_EXTENTS;
+				lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+				
+				error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
+				
+				hfs_systemfile_unlock(hfsmp, lockflags);
+			}
+			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			
+			struct cnode *cp = datafork ? FTOC(datafork) : NULL;
+			struct vnode *vp;
+			vp = cp ? CTOV(cp, 0) : NULL;
+			hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
+			prev_filebytes = filebytes;
+			
+			/* Finish the transaction and start over if necessary */
+			hfs_end_transaction(hfsmp);
+			
+			if (error) {
+				break;
+			}
+		}
+	}
+	
+	/* Resource fork */
+	if (error == 0 && rsrcfork) {
+		rsrcfork->ff_size = 0;
+
+		fileblocks = rsrcfork->ff_blocks;
+		filebytes = (off_t)fileblocks * (off_t)blksize;
+		
+		/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+		
+		while (filebytes > 0) {
+			if (filebytes > HFS_BIGFILE_SIZE) {
+				filebytes -= HFS_BIGFILE_SIZE;
+			} else {
+				filebytes = 0;
+			}
+			
+			/* Start a transaction, and wipe out as many blocks as we can in this iteration */
+			if (hfs_start_transaction(hfsmp) != 0) {
+				error = EINVAL;
+				break;
+			}
+			
+			if (rsrcfork->ff_unallocblocks == 0) {
+				/* Protect extents b-tree and allocation bitmap */
+				lockflags = SFL_BITMAP;
+				if (overflow_extents(rsrcfork))
+					lockflags |= SFL_EXTENTS;
+				lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+				
+				error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
+				
+				hfs_systemfile_unlock(hfsmp, lockflags);
+			}
+			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			
+			/* Finish the transaction and start over if necessary */
+			hfs_end_transaction(hfsmp);			
+			
+			if (error) {
+				break;
+			}
+		}
+	}
+	
+	return error;
+}
+
+errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
+{
+	errno_t error;
+
+	/*
+	 * Call ubc_setsize to give the VM subsystem a chance to do
+	 * whatever it needs to with existing pages before we delete
+	 * blocks.  Note that symlinks don't use the UBC so we'll
+	 * get back ENOENT in that case.
+	 */
+	if (have_cnode_lock) {
+		error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
+		if (error == EAGAIN) {
+			cnode_t *cp = VTOC(vp);
+
+			if (cp->c_truncatelockowner != current_thread())
+				hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
+
+			hfs_unlock(cp);
+			error = ubc_setsize_ex(vp, len, 0);
+			hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+		}
+	} else
+		error = ubc_setsize_ex(vp, len, 0);
+
+	return error == ENOENT ? 0 : error;
+}
+
+/*
+ * Truncate a cnode to at most length size, freeing (or adding) the
+ * disk blocks.
+ */
+int
+hfs_truncate(struct vnode *vp, off_t length, int flags,
+			 int truncateflags, vfs_context_t context)
+{
+	struct filefork *fp = VTOF(vp);
+	off_t filebytes;
+	u_int32_t fileblocks;
+	int blksize;
+	errno_t error = 0;
+	struct cnode *cp = VTOC(vp);
+	hfsmount_t *hfsmp = VTOHFS(vp);
+
+	/* Cannot truncate an HFS directory! */
+	if (vnode_isdir(vp)) {
+		return (EISDIR);
+	}
+	/* A swap file cannot change size. */
+	if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
+		return (EPERM);
+	}
+
+	blksize = hfsmp->blockSize;
+	fileblocks = fp->ff_blocks;
+	filebytes = (off_t)fileblocks * (off_t)blksize;
+
+	bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
+
+	error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
+	if (error)
+		return error;
+
+	if (!caller_has_cnode_lock) {
+		error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		if (error)
+			return error;
+	}
+
+	if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
+		hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
+		cp->c_datafork->ff_symlinkptr = NULL;
+	}
+
+	// have to loop truncating or growing files that are
+	// really big because otherwise transactions can get
+	// enormous and consume too many kernel resources.
+
+	if (length < filebytes) {
+		while (filebytes > length) {
+			if ((filebytes - length) > HFS_BIGFILE_SIZE) {
+		    		filebytes -= HFS_BIGFILE_SIZE;
+			} else {
+		    		filebytes = length;
+			}
+			error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
+			if (error)
+				break;
+		}
+	} else if (length > filebytes) {
+		kauth_cred_t cred = vfs_context_ucred(context);
+		const bool keep_reserve = cred && suser(cred, NULL) != 0;
+
+		if (hfs_freeblks(hfsmp, keep_reserve)
+			< howmany(length - filebytes, blksize)) {
+			error = ENOSPC;
+		} else {
+			while (filebytes < length) {
+				if ((length - filebytes) > HFS_BIGFILE_SIZE) {
+					filebytes += HFS_BIGFILE_SIZE;
+				} else {
+					filebytes = length;
+				}
+				error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
+				if (error)
+					break;
+			}
+		}
+	} else /* Same logical size */ {
+
+		error = do_hfs_truncate(vp, length, flags, truncateflags, context);
+	}
+	/* Files that are changing size are not hot file candidates. */
+	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+		fp->ff_bytesread = 0;
+	}
+
+#if HFS_CONFIG_KEY_ROLL
+	if (!error && cp->c_truncatelockowner == current_thread()) {
+		hfs_key_roll_check(cp, true);
+	}
+#endif
+
+	if (!caller_has_cnode_lock)
+		hfs_unlock(cp);
+
+	// Make sure UBC's size matches up (in case we didn't completely succeed)
+	errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
+	if (!error)
+		error = err2;
+
+	return error;
+}
+
+
+/*
+ * Preallocate file storage space.
+ */
+int
+hfs_vnop_allocate(struct vnop_allocate_args /* {
+		vnode_t a_vp;
+		off_t a_length;
+		u_int32_t  a_flags;
+		off_t *a_bytesallocated;
+		off_t a_offset;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp;
+	ExtendedVCB *vcb;
+	off_t length = ap->a_length;
+	off_t startingPEOF;
+	off_t moreBytesRequested;
+	off_t actualBytesAdded;
+	off_t filebytes;
+	u_int32_t fileblocks;
+	int retval, retval2;
+	u_int32_t blockHint;
+	u_int32_t extendFlags;   /* For call to ExtendFileC */
+	struct hfsmount *hfsmp;
+	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+	int lockflags;
+	time_t orig_ctime;
+
+	*(ap->a_bytesallocated) = 0;
+
+	if (!vnode_isreg(vp))
+		return (EISDIR);
+	if (length < (off_t)0)
+		return (EINVAL);
+	
+	cp = VTOC(vp);
+
+	orig_ctime = VTOC(vp)->c_ctime;
+
+	nspace_snapshot_event(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
+	hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		goto Err_Exit;
+	}
+	
+	fp = VTOF(vp);
+	hfsmp = VTOHFS(vp);
+	vcb = VTOVCB(vp);
+
+	fileblocks = fp->ff_blocks;
+	filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
+
+	if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
+		retval = EINVAL;
+		goto Err_Exit;
+	}
+
+	/* Fill in the flags word for the call to Extend the file */
+
+	extendFlags = kEFNoClumpMask;
+	if (ap->a_flags & ALLOCATECONTIG) 
+		extendFlags |= kEFContigMask;
+	if (ap->a_flags & ALLOCATEALL)
+		extendFlags |= kEFAllMask;
+	if (cred && suser(cred, NULL) != 0)
+		extendFlags |= kEFReserveMask;
+	if (hfs_virtualmetafile(cp))
+		extendFlags |= kEFMetadataMask;
+
+	retval = E_NONE;
+	blockHint = 0;
+	startingPEOF = filebytes;
+
+	if (ap->a_flags & ALLOCATEFROMPEOF)
+		length += filebytes;
+	else if (ap->a_flags & ALLOCATEFROMVOL)
+		blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
+
+	/* If no changes are necesary, then we're done */
+	if (filebytes == length)
+		goto Std_Exit;
+
+	/*
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of filebytes is 0, length will be at least 1.
+	 */
+	if (length > filebytes) {
+		if (ISSET(extendFlags, kEFAllMask)
+			&& (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
+				< howmany(length - filebytes, hfsmp->blockSize))) {
+			retval = ENOSPC;
+			goto Err_Exit;
+		}
+
+		off_t total_bytes_added = 0, orig_request_size;
+
+		orig_request_size = moreBytesRequested = length - filebytes;
+		
+#if QUOTA
+		retval = hfs_chkdq(cp,
+				(int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 
+				cred, 0);
+		if (retval)
+			goto Err_Exit;
+
+#endif /* QUOTA */
+		/*
+		 * Metadata zone checks.
+		 */
+		if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+			/*
+			 * Allocate Journal and Quota files in metadata zone.
+			 */
+			if (hfs_virtualmetafile(cp)) {
+				blockHint = hfsmp->hfs_metazone_start;
+			} else if ((blockHint >= hfsmp->hfs_metazone_start) &&
+				   (blockHint <= hfsmp->hfs_metazone_end)) {
+				/*
+				 * Move blockHint outside metadata zone.
+				 */
+				blockHint = hfsmp->hfs_metazone_end + 1;
+			}
+		}
+
+
+		while ((length > filebytes) && (retval == E_NONE)) {
+		    off_t bytesRequested;
+		    
+		    if (hfs_start_transaction(hfsmp) != 0) {
+			retval = EINVAL;
+			goto Err_Exit;
+		    }
+
+		    /* Protect extents b-tree and allocation bitmap */
+		    lockflags = SFL_BITMAP;
+		    if (overflow_extents(fp))
+				lockflags |= SFL_EXTENTS;
+		    lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+		    if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+				bytesRequested = HFS_BIGFILE_SIZE;
+		    } else {
+				bytesRequested = moreBytesRequested;
+		    }
+
+		    if (extendFlags & kEFContigMask) {
+			    // if we're on a sparse device, this will force it to do a
+			    // full scan to find the space needed.
+			    hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
+		    }
+
+		    retval = MacToVFSError(ExtendFileC(vcb,
+						(FCB*)fp,
+						bytesRequested,
+						blockHint,
+						extendFlags,
+						&actualBytesAdded));
+
+		    if (retval == E_NONE) {
+			*(ap->a_bytesallocated) += actualBytesAdded;
+			total_bytes_added += actualBytesAdded;
+			moreBytesRequested -= actualBytesAdded;
+			if (blockHint != 0) {
+			    blockHint += actualBytesAdded / vcb->blockSize;
+			}
+		    }
+		    filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+		    
+		    hfs_systemfile_unlock(hfsmp, lockflags);
+
+		    if (hfsmp->jnl) {
+			(void) hfs_update(vp, 0);
+			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+		    }
+
+		    hfs_end_transaction(hfsmp);
+		}
+
+
+		/*
+		 * if we get an error and no changes were made then exit
+		 * otherwise we must do the hfs_update to reflect the changes
+		 */
+		if (retval && (startingPEOF == filebytes))
+			goto Err_Exit;
+        
+		/*
+		 * Adjust actualBytesAdded to be allocation block aligned, not
+		 * clump size aligned.
+		 * NOTE: So what we are reporting does not affect reality
+		 * until the file is closed, when we truncate the file to allocation
+		 * block size.
+		 */
+		if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
+			*(ap->a_bytesallocated) =
+				roundup(orig_request_size, (off_t)vcb->blockSize);
+
+	} else { /* Shorten the size of the file */
+
+		/*
+		 * N.B. At present, this code is never called.  If and when we
+		 * do start using it, it looks like there might be slightly
+		 * strange semantics with the file size: it's possible for the
+		 * file size to *increase* e.g. if current file size is 5,
+		 * length is 1024 and filebytes is 4096, the file size will
+		 * end up being 1024 bytes.  This isn't necessarily a problem
+		 * but it's not consistent with the code above which doesn't
+		 * change the file size.
+		 */
+
+		retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
+		filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+
+		/*
+		 * if we get an error and no changes were made then exit
+		 * otherwise we must do the hfs_update to reflect the changes
+		 */
+		if (retval && (startingPEOF == filebytes)) goto Err_Exit;
+#if QUOTA
+		/* These are  bytesreleased */
+		(void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
+#endif /* QUOTA */
+
+		if (fp->ff_size > filebytes) {
+			fp->ff_size = filebytes;
+
+			hfs_ubc_setsize(vp, fp->ff_size, true);
+		}
+	}
+
+Std_Exit:
+	cp->c_flag |= C_MODIFIED;
+	cp->c_touch_chgtime = TRUE;
+	cp->c_touch_modtime = TRUE;
+	retval2 = hfs_update(vp, 0);
+
+	if (retval == 0)
+		retval = retval2;
+Err_Exit:
+	hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	hfs_unlock(cp);
+	return (retval);
+}
+
+
+/*
+ * Pagein for HFS filesystem
+ */
+int
+hfs_vnop_pagein(struct vnop_pagein_args *ap)
+/*
+	struct vnop_pagein_args {
+	   	vnode_t a_vp,
+	   	upl_t 	      a_pl,
+		vm_offset_t   a_pl_offset,
+		off_t         a_f_offset,
+		size_t        a_size,
+		int           a_flags
+		vfs_context_t a_context;
+	};
+*/
+{
+	vnode_t 	vp;
+	struct cnode	*cp;
+	struct filefork *fp;
+	int		error = 0;
+	upl_t 		upl;
+	upl_page_info_t	*pl;
+	off_t		f_offset;
+	off_t		page_needed_f_offset;
+	int		offset;
+	int		isize; 
+	int		upl_size; 
+	int		pg_index;
+	boolean_t	truncate_lock_held = FALSE;
+	boolean_t 	file_converted = FALSE;
+	kern_return_t	kret;
+	
+	vp = ap->a_vp;
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+
+#if CONFIG_PROTECT
+	if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
+		/* 
+		 * If we errored here, then this means that one of two things occurred:
+		 * 1. there was a problem with the decryption of the key.
+		 * 2. the device is locked and we are not allowed to access this particular file.
+		 * 
+		 * Either way, this means that we need to shut down this upl now.  As long as 
+		 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
+		 * then we create a upl and immediately abort it.
+		 */
+		if (ap->a_pl == NULL) {
+			/* create the upl */
+			ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl, 
+					UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
+			/* mark the range as needed so it doesn't immediately get discarded upon abort */
+			ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
+	
+			/* Abort the range */
+			ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
+		}
+
+	
+		return error;
+	}
+#endif /* CONFIG_PROTECT */
+
+	if (ap->a_pl != NULL) {
+		/*
+		 * this can only happen for swap files now that
+		 * we're asking for V2 paging behavior...
+		 * so don't need to worry about decompression, or
+		 * keeping track of blocks read or taking the truncate lock
+		 */
+		error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+				       ap->a_size, (off_t)fp->ff_size, ap->a_flags);
+		goto pagein_done;
+	}
+
+	page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
+
+retry_pagein:
+	/*
+	 * take truncate lock (shared/recursive) to guard against 
+	 * zero-fill thru fsync interfering, but only for v2
+	 *
+	 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the 
+	 * lock shared and we are allowed to recurse 1 level if this thread already
+	 * owns the lock exclusively... this can legally occur
+	 * if we are doing a shrinking ftruncate against a file
+	 * that is mapped private, and the pages being truncated
+	 * do not currently exist in the cache... in that case
+	 * we will have to page-in the missing pages in order
+	 * to provide them to the private mapping... we must
+	 * also call hfs_unlock_truncate with a postive been_recursed 
+	 * arg to indicate that if we have recursed, there is no need to drop
+	 * the lock.  Allowing this simple recursion is necessary
+	 * in order to avoid a certain deadlock... since the ftruncate
+	 * already holds the truncate lock exclusively, if we try
+	 * to acquire it shared to protect the pagein path, we will
+	 * hang this thread
+	 *
+	 * NOTE: The if () block below is a workaround in order to prevent a 
+	 * VM deadlock. See rdar://7853471.
+	 * 
+	 * If we are in a forced unmount, then launchd will still have the 
+	 * dyld_shared_cache file mapped as it is trying to reboot.  If we 
+	 * take the truncate lock here to service a page fault, then our 
+	 * thread could deadlock with the forced-unmount.  The forced unmount 
+	 * thread will try to reclaim the dyld_shared_cache vnode, but since it's 
+	 * marked C_DELETED, it will call ubc_setsize(0).  As a result, the unmount 
+	 * thread will think it needs to copy all of the data out of the file 
+	 * and into a VM copy object.  If we hold the cnode lock here, then that 
+	 * VM operation will not be able to proceed, because we'll set a busy page 
+	 * before attempting to grab the lock.  Note that this isn't as simple as "don't
+	 * call ubc_setsize" because doing that would just shift the problem to the
+	 * ubc_msync done before the vnode is reclaimed.
+	 *
+	 * So, if a forced unmount on this volume is in flight AND the cnode is 
+	 * marked C_DELETED, then just go ahead and do the page in without taking 
+	 * the lock (thus suspending pagein_v2 semantics temporarily).  Since it's on a file
+	 * that is not going to be available on the next mount, this seems like a 
+	 * OK solution from a correctness point of view, even though it is hacky.
+	 */
+	if (vfs_isforce(vnode_mount(vp))) {
+		if (cp->c_flag & C_DELETED) {
+			/* If we don't get it, then just go ahead and operate without the lock */
+			truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+		}
+	}
+	else {
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+		truncate_lock_held = TRUE;
+	}
+
+	kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 
+
+	if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+		error = EINVAL;
+		goto pagein_done;
+	}
+	ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
+
+	upl_size = isize = ap->a_size;
+
+	/*
+	 * Scan from the back to find the last page in the UPL, so that we 
+	 * aren't looking at a UPL that may have already been freed by the
+	 * preceding aborts/completions.
+	 */ 
+	for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+		if (upl_page_present(pl, --pg_index))
+			break;
+		if (pg_index == 0) {
+			/*
+			 * no absent pages were found in the range specified
+			 * just abort the UPL to get rid of it and then we're done
+			 */
+			ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+			goto pagein_done;
+		}
+	}
+	/* 
+	 * initialize the offset variables before we touch the UPL.
+	 * f_offset is the position into the file, in bytes
+	 * offset is the position into the UPL, in bytes
+	 * pg_index is the pg# of the UPL we're operating on
+	 * isize is the offset into the UPL of the last page that is present. 
+	 */
+	isize = ((pg_index + 1) * PAGE_SIZE);	
+	pg_index = 0;
+	offset = 0;
+	f_offset = ap->a_f_offset;
+
+	while (isize) {
+		int  xsize;
+		int  num_of_pages;
+
+		if ( !upl_page_present(pl, pg_index)) {
+			/*
+			 * we asked for RET_ONLY_ABSENT, so it's possible
+			 * to get back empty slots in the UPL.
+			 * just skip over them
+			 */
+			f_offset += PAGE_SIZE;
+			offset   += PAGE_SIZE;
+			isize    -= PAGE_SIZE;
+			pg_index++;
+
+			continue;
+		}
+		/* 
+		 * We know that we have at least one absent page.
+		 * Now checking to see how many in a row we have
+		 */
+		num_of_pages = 1;
+		xsize = isize - PAGE_SIZE;
+
+		while (xsize) {
+			if ( !upl_page_present(pl, pg_index + num_of_pages))
+				break;
+			num_of_pages++;
+			xsize -= PAGE_SIZE;
+		}
+		xsize = num_of_pages * PAGE_SIZE;
+
+#if HFS_COMPRESSION
+		if (VNODE_IS_RSRC(vp)) {
+			/* allow pageins of the resource fork */
+		} else {
+			int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+
+			if (compressed) {
+
+				if (truncate_lock_held) {
+					/*
+					 * can't hold the truncate lock when calling into the decmpfs layer
+					 * since it calls back into this layer... even though we're only
+					 * holding the lock in shared mode, and the re-entrant path only
+					 * takes the lock shared, we can deadlock if some other thread
+					 * tries to grab the lock exclusively in between.
+					 */
+					hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+					truncate_lock_held = FALSE;
+				}
+				ap->a_pl = upl;
+				ap->a_pl_offset = offset;
+				ap->a_f_offset = f_offset;
+				ap->a_size = xsize;
+
+				error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+				/*
+				 * note that decpfs_pagein_compressed can change the state of
+				 * 'compressed'... it will set it to 0 if the file is no longer
+				 * compressed once the compression lock is successfully taken
+				 * i.e. we would block on that lock while the file is being inflated
+				 */
+				if (error == 0 && vnode_isfastdevicecandidate(vp)) {
+					(void) hfs_addhotfile(vp);
+				}
+				if (compressed) {
+					if (error == 0) {
+						/* successful page-in, update the access time */
+						VTOC(vp)->c_touch_acctime = TRUE;
+					
+						//
+						// compressed files are not traditional hot file candidates
+						// but they may be for CF (which ignores the ff_bytesread
+						// field)
+						//
+						if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+							fp->ff_bytesread = 0;
+						}
+					} else if (error == EAGAIN) {
+						/*
+						 * EAGAIN indicates someone else already holds the compression lock...
+						 * to avoid deadlocking, we'll abort this range of pages with an
+						 * indication that the pagein needs to be redriven
+						 */
+			        		ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
+					} else if (error == ENOSPC) {
+
+						if (upl_size == PAGE_SIZE)
+							panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
+
+						ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+						ap->a_size = PAGE_SIZE;
+						ap->a_pl = NULL;
+						ap->a_pl_offset = 0;
+						ap->a_f_offset = page_needed_f_offset;
+
+						goto retry_pagein;
+					} else {
+						ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
+						goto pagein_done;
+					}
+					goto pagein_next_range;
+				}
+				else {
+					/* 
+					 * Set file_converted only if the file became decompressed while we were
+					 * paging in.  If it were still compressed, we would re-start the loop using the goto
+					 * in the above block.  This avoid us overloading truncate_lock_held as our retry_pagein
+					 * condition below, since we could have avoided taking the truncate lock to prevent
+					 * a deadlock in the force unmount case.
+					 */
+					file_converted = TRUE;
+				}
+			}
+			if (file_converted == TRUE) {
+				/*
+				 * the file was converted back to a regular file after we first saw it as compressed
+				 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
+				 * reset a_size so that we consider what remains of the original request
+				 * and null out a_upl and a_pl_offset.
+				 *
+				 * We should only be able to get into this block if the decmpfs_pagein_compressed 
+				 * successfully decompressed the range in question for this file.
+				 */
+				ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+				ap->a_size = isize;
+				ap->a_pl = NULL;
+				ap->a_pl_offset = 0;
+
+				/* Reset file_converted back to false so that we don't infinite-loop. */
+				file_converted = FALSE;
+				goto retry_pagein;
+			}
+		}
+#endif
+		error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
+
+		/*
+		 * Keep track of blocks read.
+		 */
+		if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+			int bytesread;
+			int took_cnode_lock = 0;
+		
+			if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+				bytesread = fp->ff_size;
+			else
+				bytesread = xsize;
+
+			/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+			if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+				hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+				took_cnode_lock = 1;
+			}
+			/*
+			 * If this file hasn't been seen since the start of
+			 * the current sampling period then start over.
+			 */
+			if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+				struct timeval tv;
+
+				fp->ff_bytesread = bytesread;
+				microtime(&tv);
+				cp->c_atime = tv.tv_sec;
+			} else {
+				fp->ff_bytesread += bytesread;
+			}
+			cp->c_touch_acctime = TRUE;
+
+			if (vnode_isfastdevicecandidate(vp)) {
+				(void) hfs_addhotfile(vp);
+			}
+			if (took_cnode_lock)
+				hfs_unlock(cp);
+		}
+pagein_next_range:
+		f_offset += xsize;
+		offset   += xsize;
+		isize    -= xsize;
+		pg_index += num_of_pages;
+
+		error = 0;
+	}
+
+pagein_done:
+	if (truncate_lock_held == TRUE) {
+		/* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
+		hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+	}
+
+	return (error);
+}
+
+/* 
+ * Pageout for HFS filesystem.
+ */
+int
+hfs_vnop_pageout(struct vnop_pageout_args *ap)
+/*
+	struct vnop_pageout_args {
+	   vnode_t a_vp,
+	   upl_t         a_pl,
+	   vm_offset_t   a_pl_offset,
+	   off_t         a_f_offset,
+	   size_t        a_size,
+	   int           a_flags
+	   vfs_context_t a_context;
+	};
+*/
+{
+	vnode_t vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp;
+	int retval = 0;
+	off_t filesize;
+	upl_t 		upl;
+	upl_page_info_t* pl = NULL;
+	vm_offset_t	a_pl_offset;
+	int		a_flags;
+	int is_pageoutv2 = 0;
+	kern_return_t kret;
+
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+	
+	a_flags = ap->a_flags;
+	a_pl_offset = ap->a_pl_offset;
+
+	/*
+	 * we can tell if we're getting the new or old behavior from the UPL
+	 */
+	if ((upl = ap->a_pl) == NULL) {
+		int request_flags; 
+
+		is_pageoutv2 = 1;
+		/*
+		 * we're in control of any UPL we commit
+		 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT 
+		 */
+		a_flags &= ~UPL_NOCOMMIT;
+		a_pl_offset = 0;
+
+		/*
+		 * For V2 semantics, we want to take the cnode truncate lock
+		 * shared to guard against the file size changing via zero-filling.
+		 * 
+		 * However, we have to be careful because we may be invoked 
+		 * via the ubc_msync path to write out dirty mmap'd pages
+		 * in response to a lock event on a content-protected
+		 * filesystem (e.g. to write out class A files).
+		 * As a result, we want to take the truncate lock 'SHARED' with 
+		 * the mini-recursion locktype so that we don't deadlock/panic 
+		 * because we may be already holding the truncate lock exclusive to force any other
+		 * IOs to have blocked behind us. 
+		 */
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+
+		if (a_flags & UPL_MSYNC) {
+			request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
+		}
+		else {
+			request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
+		}
+		
+		kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
+
+		if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+			retval = EINVAL;
+			goto pageout_done;
+		}
+	}
+	/*
+	 * from this point forward upl points at the UPL we're working with
+	 * it was either passed in or we succesfully created it
+	 */
+
+	/*
+	 * Figure out where the file ends, for pageout purposes.  If
+	 * ff_new_size > ff_size, then we're in the middle of extending the
+	 * file via a write, so it is safe (and necessary) that we be able
+	 * to pageout up to that point.
+	 */
+	filesize = fp->ff_size;
+	if (fp->ff_new_size > filesize)
+		filesize = fp->ff_new_size;
+
+	/* 
+	 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own  
+	 * UPL instead of relying on the UPL passed into us.  We go ahead and do that here,
+	 * scanning for dirty ranges.  We'll issue our own N cluster_pageout calls, for
+	 * N dirty ranges in the UPL.  Note that this is almost a direct copy of the 
+	 * logic in vnode_pageout except that we need to do it after grabbing the truncate 
+	 * lock in HFS so that we don't lock invert ourselves.  
+	 * 
+	 * Note that we can still get into this function on behalf of the default pager with
+	 * non-V2 behavior (swapfiles).  However in that case, we did not grab locks above 
+	 * since fsync and other writing threads will grab the locks, then mark the 
+	 * relevant pages as busy.  But the pageout codepath marks the pages as busy, 
+	 * and THEN would attempt to grab the truncate lock, which would result in deadlock.  So
+	 * we do not try to grab anything for the pre-V2 case, which should only be accessed
+	 * by the paging/VM system.
+	 */
+
+	if (is_pageoutv2) {
+		off_t f_offset;
+		int offset;
+		int isize; 
+		int pg_index;
+		int error;
+		int error_ret = 0;
+
+		isize = ap->a_size;
+		f_offset = ap->a_f_offset;
+
+		/* 
+		 * Scan from the back to find the last page in the UPL, so that we 
+		 * aren't looking at a UPL that may have already been freed by the
+		 * preceding aborts/completions.
+		 */ 
+		for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+			if (upl_page_present(pl, --pg_index))
+				break;
+			if (pg_index == 0) {
+				ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+				goto pageout_done;
+			}
+		}
+
+		/* 
+		 * initialize the offset variables before we touch the UPL.
+		 * a_f_offset is the position into the file, in bytes
+		 * offset is the position into the UPL, in bytes
+		 * pg_index is the pg# of the UPL we're operating on.
+		 * isize is the offset into the UPL of the last non-clean page. 
+		 */
+		isize = ((pg_index + 1) * PAGE_SIZE);	
+
+		offset = 0;
+		pg_index = 0;
+
+		while (isize) {
+			int  xsize;
+			int  num_of_pages;
+
+			if ( !upl_page_present(pl, pg_index)) {
+				/*
+				 * we asked for RET_ONLY_DIRTY, so it's possible
+				 * to get back empty slots in the UPL.
+				 * just skip over them
+				 */
+				f_offset += PAGE_SIZE;
+				offset   += PAGE_SIZE;
+				isize    -= PAGE_SIZE;
+				pg_index++;
+
+				continue;
+			}
+			if ( !upl_dirty_page(pl, pg_index)) {
+				panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
+			}
+
+			/* 
+			 * We know that we have at least one dirty page.
+			 * Now checking to see how many in a row we have
+			 */
+			num_of_pages = 1;
+			xsize = isize - PAGE_SIZE;
+
+			while (xsize) {
+				if ( !upl_dirty_page(pl, pg_index + num_of_pages))
+					break;
+				num_of_pages++;
+				xsize -= PAGE_SIZE;
+			}
+			xsize = num_of_pages * PAGE_SIZE;
+
+			if ((error = cluster_pageout(vp, upl, offset, f_offset,
+							xsize, filesize, a_flags))) {
+				if (error_ret == 0)
+					error_ret = error;
+			}
+			f_offset += xsize;
+			offset   += xsize;
+			isize    -= xsize;
+			pg_index += num_of_pages;
+		}
+		/* capture errnos bubbled out of cluster_pageout if they occurred */
+		if (error_ret != 0) {
+			retval = error_ret;
+		}
+	} /* end block for v2 pageout behavior */
+	else {
+		/* 
+		 * just call cluster_pageout for old pre-v2 behavior
+		 */
+		retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
+				ap->a_size, filesize, a_flags);		
+	}
+
+	/*
+	 * If data was written, update the modification time of the file
+	 * but only if it's mapped writable; we will have touched the
+	 * modifcation time for direct writes.
+	 */
+	if (retval == 0 && (ubc_is_mapped_writable(vp)
+						|| ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+		// Check again with lock
+		bool mapped_writable = ubc_is_mapped_writable(vp);
+		if (mapped_writable
+			|| ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
+			cp->c_touch_modtime = TRUE;
+			cp->c_touch_chgtime = TRUE;
+
+			/*
+			 * We only need to increment the generation counter if
+			 * it's currently mapped writable because we incremented
+			 * the counter in hfs_vnop_mnomap.
+			 */
+			if (mapped_writable)
+				hfs_incr_gencount(VTOC(vp));
+
+			/*
+			 * If setuid or setgid bits are set and this process is
+			 * not the superuser then clear the setuid and setgid bits
+			 * as a precaution against tampering.
+			 */
+			if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
+				(vfs_context_suser(ap->a_context) != 0)) {
+				cp->c_mode &= ~(S_ISUID | S_ISGID);
+			}
+		}
+
+		hfs_unlock(cp);
+	}
+
+pageout_done:
+	if (is_pageoutv2) {
+		/* 
+		 * Release the truncate lock.  Note that because 
+		 * we may have taken the lock recursively by 
+		 * being invoked via ubc_msync due to lockdown,
+		 * we should release it recursively, too.
+		 */
+		hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
+	}
+	return (retval);
+}
+
+/*
+ * Intercept B-Tree node writes to unswap them if necessary.
+ */
+int
+hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
+{
+	int retval = 0;
+	register struct buf *bp = ap->a_bp;
+	register struct vnode *vp = buf_vnode(bp);
+	BlockDescriptor block;
+
+	/* Trap B-Tree writes */
+	if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
+	    (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
+	    (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
+	    (vp == VTOHFS(vp)->hfc_filevp)) {
+
+		/* 
+		 * Swap and validate the node if it is in native byte order.
+		 * This is always be true on big endian, so we always validate
+		 * before writing here.  On little endian, the node typically has
+		 * been swapped and validated when it was written to the journal,
+		 * so we won't do anything here.
+		 */
+		if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
+			/* Prepare the block pointer */
+			block.blockHeader = bp;
+			block.buffer = (char *)buf_dataptr(bp);
+			block.blockNum = buf_lblkno(bp);
+			/* not found in cache ==> came from disk */
+			block.blockReadFromDisk = (buf_fromcache(bp) == 0);
+			block.blockSize = buf_count(bp);
+    
+			/* Endian un-swap B-Tree node */
+			retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
+			if (retval)
+				panic("hfs_vnop_bwrite: about to write corrupt node!\n");
+		}
+	}
+
+	/* This buffer shouldn't be locked anymore but if it is clear it */
+	if ((buf_flags(bp) & B_LOCKED)) {
+	        // XXXdbg
+	        if (VTOHFS(vp)->jnl) {
+		        panic("hfs: CLEARING the lock bit on bp %p\n", bp);
+		}
+		buf_clearflags(bp, B_LOCKED);
+	}
+	retval = vn_bwrite (ap);
+
+	return (retval);
+}
+
+
+int
+hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks)
+{
+	_dk_cs_pin_t pin;
+	unsigned ioc;
+	int err;
+
+	memset(&pin, 0, sizeof(pin));
+	pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
+	pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
+	switch (pin_state) {
+	case HFS_PIN_IT:
+		ioc = _DKIOCCSPINEXTENT;
+		pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
+		break;
+	case HFS_PIN_IT | HFS_TEMP_PIN:
+		ioc = _DKIOCCSPINEXTENT;
+		pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
+		break;
+	case HFS_PIN_IT | HFS_DATALESS_PIN:
+		ioc = _DKIOCCSPINEXTENT;
+		pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
+		break;
+	case HFS_UNPIN_IT:
+		ioc = _DKIOCCSUNPINEXTENT;
+		pin.cp_flags = 0;
+		break;
+	case HFS_UNPIN_IT | HFS_EVICT_PIN:
+		ioc = _DKIOCCSPINEXTENT;
+		pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
+		break;
+	default:
+		return EINVAL;
+	}
+	err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, vfs_context_kernel());
+	return err;
+}
+
+//
+// The cnode lock should already be held on entry to this function
+//
+int
+hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned)
+{
+	struct filefork *fp = VTOF(vp);
+	int i, err=0, need_put=0;
+	struct vnode *rsrc_vp=NULL;
+	uint32_t npinned = 0;
+	off_t               offset;
+
+	if (num_blocks_pinned) {
+		*num_blocks_pinned = 0;
+	}
+	
+	if (vnode_vtype(vp) != VREG) {
+		/* Not allowed to pin directories or symlinks */
+		printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
+		return (EPERM);
+	}
+	
+	if (fp->ff_unallocblocks) {
+		printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
+		return (EINVAL);
+	}
+
+	/*
+	 * It is possible that if the caller unlocked/re-locked the cnode after checking
+	 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
+	 * cnode was unlocked.  So check the condition again and return ENOENT so that
+	 * the caller knows why we failed to pin the vnode. 
+	 */
+	if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
+		// makes no sense to pin something that's pending deletion
+		return ENOENT;
+	}
+
+	if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
+		if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
+			//printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
+			//       VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
+
+			fp = VTOC(rsrc_vp)->c_rsrcfork;
+			need_put = 1;
+		}
+	}
+	if (fp->ff_blocks == 0) {
+		if (need_put) {
+			//
+			// use a distinct error code for a compressed file that has no resource fork;
+			// we return EALREADY to indicate that the data is already probably hot file
+			// cached because it's in an EA and the attributes btree is on the ssd
+			// 
+			err = EALREADY;
+		} else {
+			err = EINVAL;
+		}
+		goto out;
+	}
+
+	offset = 0;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		if (fp->ff_extents[i].startBlock == 0) {
+			break;
+		}
+
+		err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount);
+		if (err) {
+			break;
+		} else {
+			npinned += fp->ff_extents[i].blockCount;			
+		}
+	}
+	
+	if (err || npinned == 0) {
+		goto out;
+	}
+
+	if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
+		uint32_t pblocks;
+		uint8_t forktype = 0;
+
+		if (fp == VTOC(vp)->c_rsrcfork) {
+			forktype = 0xff;
+		}
+		/*
+		 * The file could have overflow extents, better pin them.
+		 *
+		 * We assume that since we are holding the cnode lock for this cnode,
+		 * the files extents cannot be manipulated, but the tree could, so we
+		 * need to ensure that it doesn't change behind our back as we iterate it.
+		 */
+		int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+		err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
+		hfs_systemfile_unlock (hfsmp, lockflags);
+
+		if (err) {
+			goto out;
+		}
+		npinned += pblocks;
+	}
+
+out:
+	if (num_blocks_pinned) {
+		*num_blocks_pinned = npinned;
+	}
+	
+	if (need_put && rsrc_vp) {
+		//
+		// have to unlock the cnode since it's shared between the
+		// resource fork vnode and the data fork vnode (and the
+		// vnode_put() may need to re-acquire the cnode lock to
+		// reclaim the resource fork vnode)
+		//
+		hfs_unlock(VTOC(vp));
+		vnode_put(rsrc_vp);
+		hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+	}
+	return err;
+}
+
+
+/*
+ * Relocate a file to a new location on disk
+ *  cnode must be locked on entry
+ *
+ * Relocation occurs by cloning the file's data from its
+ * current set of blocks to a new set of blocks. During
+ * the relocation all of the blocks (old and new) are
+ * owned by the file.
+ *
+ * -----------------
+ * |///////////////|
+ * -----------------
+ * 0               N (file offset)
+ *
+ * -----------------     -----------------
+ * |///////////////|     |               |     STEP 1 (acquire new blocks)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ * -----------------     -----------------
+ * |///////////////|     |///////////////|     STEP 2 (clone data)
+ * -----------------     -----------------
+ * 0               N     N+1             2N
+ *
+ *                       -----------------
+ *                       |///////////////|     STEP 3 (head truncate blocks)
+ *                       -----------------
+ *                       0               N
+ *
+ * During steps 2 and 3 page-outs to file offsets less
+ * than or equal to N are suspended.
+ *
+ * During step 3 page-ins to the file get suspended.
+ */
+int
+hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
+	struct  proc *p)
+{
+	struct  cnode *cp;
+	struct  filefork *fp;
+	struct  hfsmount *hfsmp;
+	u_int32_t  headblks;
+	u_int32_t  datablks;
+	u_int32_t  blksize;
+	u_int32_t  growsize;
+	u_int32_t  nextallocsave;
+	daddr64_t  sector_a,  sector_b;
+	int eflags;
+	off_t  newbytes;
+	int  retval;
+	int lockflags = 0;
+	int took_trunc_lock = 0;
+	int started_tr = 0;
+	enum vtype vnodetype;
+
+	vnodetype = vnode_vtype(vp);
+	if (vnodetype != VREG) {
+		/* Not allowed to move symlinks. */
+		return (EPERM);
+	}
+	
+	hfsmp = VTOHFS(vp);
+	if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
+		return (ENOSPC);
+	}
+
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+	if (fp->ff_unallocblocks)
+		return (EINVAL);
+
+#if CONFIG_PROTECT
+	/* 
+	 * <rdar://problem/9118426>
+	 * Disable HFS file relocation on content-protected filesystems
+	 */
+	if (cp_fs_protected (hfsmp->hfs_mp)) {
+		return EINVAL;
+	}
+#endif
+	/* If it's an SSD, also disable HFS relocation */
+	if (hfsmp->hfs_flags & HFS_SSD) {
+		return EINVAL;
+	}
+
+
+	blksize = hfsmp->blockSize;
+	if (blockHint == 0)
+		blockHint = hfsmp->nextAllocation;
+
+	if (fp->ff_size > 0x7fffffff) {
+		return (EFBIG);
+	}
+
+	if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
+		hfs_unlock(cp);
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		/* Force lock since callers expects lock to be held. */
+		if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			return (retval);
+		}
+		/* No need to continue if file was removed. */
+		if (cp->c_flag & C_NOEXISTS) {
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			return (ENOENT);
+		}
+		took_trunc_lock = 1;
+	}
+	headblks = fp->ff_blocks;
+	datablks = howmany(fp->ff_size, blksize);
+	growsize = datablks * blksize;
+	eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
+	if (blockHint >= hfsmp->hfs_metazone_start &&
+	    blockHint <= hfsmp->hfs_metazone_end)
+		eflags |= kEFMetadataMask;
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+		if (took_trunc_lock)
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	    return (EINVAL);
+	}
+	started_tr = 1;
+	/*
+	 * Protect the extents b-tree and the allocation bitmap
+	 * during MapFileBlockC and ExtendFileC operations.
+	 */
+	lockflags = SFL_BITMAP;
+	if (overflow_extents(fp))
+		lockflags |= SFL_EXTENTS;
+	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+	retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
+	if (retval) {
+		retval = MacToVFSError(retval);
+		goto out;
+	}
+
+	/*
+	 * STEP 1 - acquire new allocation blocks.
+	 */
+	nextallocsave = hfsmp->nextAllocation;
+	retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
+	if (eflags & kEFMetadataMask) {
+		hfs_lock_mount(hfsmp);
+		HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
+		MarkVCBDirty(hfsmp);
+		hfs_unlock_mount(hfsmp);
+	}
+
+	retval = MacToVFSError(retval);
+	if (retval == 0) {
+		cp->c_flag |= C_MODIFIED;
+		if (newbytes < growsize) {
+			retval = ENOSPC;
+			goto restore;
+		} else if (fp->ff_blocks < (headblks + datablks)) {
+			printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
+			retval = ENOSPC;
+			goto restore;
+		}
+
+		retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
+		if (retval) {
+			retval = MacToVFSError(retval);
+		} else if ((sector_a + 1) == sector_b) {
+			retval = ENOSPC;
+			goto restore;
+		} else if ((eflags & kEFMetadataMask) &&
+		           ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
+		              hfsmp->hfs_metazone_end)) {
+#if 0
+			const char * filestr;
+			char emptystr = '\0';
+
+			if (cp->c_desc.cd_nameptr != NULL) {
+				filestr = (const char *)&cp->c_desc.cd_nameptr[0];
+			} else if (vnode_name(vp) != NULL) {
+				filestr = vnode_name(vp);
+			} else {
+				filestr = &emptystr;
+			}
+#endif
+			retval = ENOSPC;
+			goto restore;
+		}
+	}
+	/* Done with system locks and journal for now. */
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	lockflags = 0;
+	hfs_end_transaction(hfsmp);
+	started_tr = 0;
+
+	if (retval) {
+		/*
+		 * Check to see if failure is due to excessive fragmentation.
+		 */
+		if ((retval == ENOSPC) &&
+		    (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
+			hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
+		}
+		goto out;
+	}
+	/*
+	 * STEP 2 - clone file data into the new allocation blocks.
+	 */
+
+	if (vnodetype == VLNK)
+		retval = EPERM;
+	else if (vnode_issystem(vp))
+		retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
+	else
+		retval = hfs_clonefile(vp, headblks, datablks, blksize);
+
+	/* Start transaction for step 3 or for a restore. */
+	if (hfs_start_transaction(hfsmp) != 0) {
+		retval = EINVAL;
+		goto out;
+	}
+	started_tr = 1;
+	if (retval)
+		goto restore;
+
+	/*
+	 * STEP 3 - switch to cloned data and remove old blocks.
+	 */
+	lockflags = SFL_BITMAP;
+	if (overflow_extents(fp))
+		lockflags |= SFL_EXTENTS;
+	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+	retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	lockflags = 0;
+	if (retval)
+		goto restore;
+out:
+	if (took_trunc_lock)
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		lockflags = 0;
+	}
+
+	/* Push cnode's new extent data to disk. */
+	if (retval == 0) {
+		hfs_update(vp, 0);
+	}
+	if (hfsmp->jnl) {
+		if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
+			(void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+		else
+			(void) hfs_flushvolumeheader(hfsmp, 0);
+	}
+exit:
+	if (started_tr)
+		hfs_end_transaction(hfsmp);
+
+	return (retval);
+
+restore:
+	if (fp->ff_blocks == headblks) {
+		if (took_trunc_lock)
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		goto exit;
+	}
+	/*
+	 * Give back any newly allocated space.
+	 */
+	if (lockflags == 0) {
+		lockflags = SFL_BITMAP;
+		if (overflow_extents(fp))
+			lockflags |= SFL_EXTENTS;
+		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+	}
+
+	(void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp), 
+						 FTOC(fp)->c_fileid, false);
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	lockflags = 0;
+
+	if (took_trunc_lock)
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	goto exit;
+}
+
+
+/*
+ * Clone a file's data within the file.
+ *
+ */
+static int
+hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
+{
+	caddr_t  bufp;
+	size_t  bufsize;
+	size_t  copysize;
+        size_t  iosize;
+	size_t  offset;
+	off_t	writebase;
+	uio_t auio;
+	int  error = 0;
+
+	writebase = blkstart * blksize;
+	copysize = blkcnt * blksize;
+	iosize = bufsize = MIN(copysize, 128 * 1024);
+	offset = 0;
+
+	hfs_unlock(VTOC(vp));
+
+#if CONFIG_PROTECT
+	if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+		hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);	
+		return (error);
+	}
+#endif /* CONFIG_PROTECT */
+
+    bufp = hfs_malloc(bufsize);
+
+	auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+
+	while (offset < copysize) {
+		iosize = MIN(copysize - offset, iosize);
+
+		uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
+		uio_addiov(auio, (uintptr_t)bufp, iosize);
+
+		error = cluster_read(vp, auio, copysize, IO_NOCACHE);
+		if (error) {
+			printf("hfs_clonefile: cluster_read failed - %d\n", error);
+			break;
+		}
+		if (uio_resid(auio) != 0) {
+			printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
+			error = EIO;		
+			break;
+		}
+
+		uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
+		uio_addiov(auio, (uintptr_t)bufp, iosize);
+
+		error = cluster_write(vp, auio, writebase + offset,
+		                      writebase + offset + iosize,
+		                      uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
+		if (error) {
+			printf("hfs_clonefile: cluster_write failed - %d\n", error);
+			break;
+		}
+		if (uio_resid(auio) != 0) {
+			printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
+			error = EIO;		
+			break;
+		}	
+		offset += iosize;
+	}
+	uio_free(auio);
+
+	if ((blksize & PAGE_MASK)) {
+		/*
+		 * since the copy may not have started on a PAGE
+		 * boundary (or may not have ended on one), we 
+		 * may have pages left in the cache since NOCACHE
+		 * will let partially written pages linger...
+		 * lets just flush the entire range to make sure
+		 * we don't have any pages left that are beyond
+		 * (or intersect) the real LEOF of this file
+		 */
+		ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
+	} else {
+		/*
+		 * No need to call ubc_msync or hfs_invalbuf
+		 * since the file was copied using IO_NOCACHE and
+		 * the copy was done starting and ending on a page
+		 * boundary in the file.
+		 */
+	}
+    hfs_free(bufp, bufsize);
+
+	hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);	
+	return (error);
+}
+
+/*
+ * Clone a system (metadata) file.
+ *
+ */
+static int
+hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+                 kauth_cred_t cred, struct proc *p)
+{
+	caddr_t  bufp;
+	char * offset;
+	size_t  bufsize;
+	size_t  iosize;
+	struct buf *bp = NULL;
+	daddr64_t  blkno;
+ 	daddr64_t  blk;
+	daddr64_t  start_blk;
+	daddr64_t  last_blk;
+	int  breadcnt;
+        int  i;
+	int  error = 0;
+
+
+	iosize = GetLogicalBlockSize(vp);
+	bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
+	breadcnt = bufsize / iosize;
+
+    bufp = hfs_malloc(bufsize);
+
+	start_blk = ((daddr64_t)blkstart * blksize) / iosize;
+	last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
+	blkno = 0;
+
+	while (blkno < last_blk) {
+		/*
+		 * Read up to a megabyte
+		 */
+		offset = bufp;
+		for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
+			error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
+			if (error) {
+				printf("hfs_clonesysfile: meta_bread error %d\n", error);
+				goto out;
+			}
+			if (buf_count(bp) != iosize) {
+				printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
+				goto out;
+			}
+			bcopy((char *)buf_dataptr(bp), offset, iosize);
+
+			buf_markinvalid(bp);
+			buf_brelse(bp);
+			bp = NULL;
+
+			offset += iosize;
+		}
+	
+		/*
+		 * Write up to a megabyte
+		 */
+		offset = bufp;
+		for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
+			bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
+			if (bp == NULL) {
+				printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
+				error = EIO;
+				goto out;
+			}
+			bcopy(offset, (char *)buf_dataptr(bp), iosize);
+			error = (int)buf_bwrite(bp);
+			bp = NULL;
+			if (error)
+				goto out;
+			offset += iosize;
+		}
+	}
+out:
+	if (bp) {
+		buf_brelse(bp);
+	}
+
+    hfs_free(bufp, bufsize);
+
+	error = hfs_fsync(vp, MNT_WAIT, 0, p);
+
+	return (error);
+}
+
+errno_t hfs_flush_invalid_ranges(vnode_t vp)
+{
+	cnode_t *cp = VTOC(vp);
+
+	hfs_assert(cp->c_lockowner == current_thread());
+	hfs_assert(cp->c_truncatelockowner == current_thread());
+
+	if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
+		return 0;
+
+	filefork_t *fp = VTOF(vp);
+
+	/*
+	 * We can't hold the cnode lock whilst we call cluster_write so we
+	 * need to copy the extents into a local buffer.
+	 */
+	int max_exts = 16;
+	struct ext {
+		off_t start, end;
+	} exts_buf[max_exts];		// 256 bytes
+	struct ext *exts = exts_buf;
+	int ext_count = 0;
+	errno_t ret;
+
+	struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
+
+	while (r) {
+		/* If we have more than can fit in our stack buffer, switch
+		   to a heap buffer. */
+		if (exts == exts_buf && ext_count == max_exts) {
+			max_exts = 256;
+			exts = hfs_malloc(sizeof(struct ext) * max_exts);
+			memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
+		}
+
+		struct rl_entry *next = TAILQ_NEXT(r, rl_link);
+
+		exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
+
+		if (!next || (ext_count == max_exts && exts != exts_buf)) {
+			hfs_unlock(cp);
+			for (int i = 0; i < ext_count; ++i) {
+				ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
+									exts[i].start, 0,
+									IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
+				if (ret) {
+					hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+					goto exit;
+				}
+			}
+
+			if (!next) {
+				hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+				break;
+			}
+
+			/* Push any existing clusters which should clean up our invalid
+			   ranges as they go through hfs_vnop_blockmap. */
+			cluster_push(vp, 0);
+
+			hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+
+			/*
+			 * Get back to where we were (given we dropped the lock).
+			 * This shouldn't be many because we pushed above.
+			 */
+			TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
+				if (r->rl_end > exts[ext_count - 1].end)
+					break;
+			}
+
+			ext_count = 0;
+		} else
+			r = next;
+	}
+
+	ret = 0;
+
+exit:
+
+	if (exts != exts_buf)
+		hfs_free(exts, sizeof(struct ext) * max_exts);
+
+	return ret;
+}
diff --git a/core/hfs_resize.c b/core/hfs_resize.c
new file mode 100644
index 0000000..8686705
--- /dev/null
+++ b/core/hfs_resize.c
@@ -0,0 +1,3432 @@
+/*
+ * Copyright (c) 2013-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/systm.h>
+#include <sys/kauth.h>
+#include <sys/ubc.h>
+
+#include "hfs_journal.h"
+#include <miscfs/specfs/specdev.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+#include "hfs_endian.h"
+#include "hfs_btreeio.h"
+#include "hfs_cprotect.h"
+
+/* Enable/disable debugging code for live volume resizing */
+int hfs_resize_debug = 0;
+
+static errno_t hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit,
+										struct HFSPlusCatalogFile *filerec, bool *overlaps);
+static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
+static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
+
+/*
+ * Extend a file system.
+ */
+int
+hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
+{
+	struct proc *p = vfs_context_proc(context);
+	kauth_cred_t cred = vfs_context_ucred(context);
+	struct  vnode *vp = NULL;
+	struct  vnode *devvp;
+	struct  buf *bp;
+	struct  filefork *fp = NULL;
+	ExtendedVCB  *vcb;
+	struct  cat_fork forkdata;
+	u_int64_t  oldsize;
+	uint32_t  newblkcnt;
+	u_int64_t  prev_phys_block_count;
+	u_int32_t  addblks;
+	u_int64_t  sector_count;
+	u_int32_t  sector_size;
+	u_int32_t  phys_sector_size;
+	u_int32_t  overage_blocks;
+	daddr64_t  prev_fs_alt_sector;
+	daddr_t	   bitmapblks;
+	int  lockflags = 0;
+	int  error;
+	int64_t oldBitmapSize;
+	
+	Boolean  usedExtendFileC = false;
+	int transaction_begun = 0;
+	
+	devvp = hfsmp->hfs_devvp;
+	vcb = HFSTOVCB(hfsmp);
+    
+	/*
+	 * - HFS Plus file systems only.
+	 * - Journaling must be enabled.
+	 * - No embedded volumes.
+	 */
+	if ((vcb->vcbSigWord == kHFSSigWord) ||
+        (hfsmp->jnl == NULL) ||
+        (vcb->hfsPlusIOPosOffset != 0)) {
+		return (EPERM);
+	}
+	/*
+	 * If extending file system by non-root, then verify
+	 * ownership and check permissions.
+	 */
+	if (suser(cred, NULL)) {
+		error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
+        
+		if (error)
+			return (error);
+		error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
+		if (error == 0) {
+			error = hfs_write_access(vp, cred, p, false);
+		}
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+		if (error)
+			return (error);
+        
+		error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
+		if (error)
+			return (error);
+	}
+	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
+		return (ENXIO);
+	}
+	if (sector_size != hfsmp->hfs_logical_block_size) {
+		return (ENXIO);
+	}
+	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
+		return (ENXIO);
+	}
+	/* Check if partition size is correct for new file system size */
+	if ((sector_size * sector_count) < newsize) {
+		printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN);
+		return (ENOSPC);
+	}
+	error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
+	if (error) {
+		if ((error != ENOTSUP) && (error != ENOTTY)) {
+			return (ENXIO);
+		}
+		/* If ioctl is not supported, force physical and logical sector size to be same */
+		phys_sector_size = sector_size;
+	}
+	oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+    
+	/*
+	 * Validate new size.
+	 */
+	if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
+		printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
+		return (EINVAL);
+	}
+	uint64_t cnt = newsize / vcb->blockSize;
+	if (cnt > 0xFFFFFFFF) {
+		printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize);
+		return (EOVERFLOW);
+	}
+
+	newblkcnt = (uint32_t)cnt;
+
+	addblks = newblkcnt - vcb->totalBlocks;
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
+		printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, newblkcnt, addblks);
+	}
+	printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
+    
+	hfs_lock_mount (hfsmp);
+	if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
+		hfs_unlock_mount(hfsmp);
+		error = EALREADY;
+		goto out;
+	}
+	hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
+	hfs_unlock_mount (hfsmp);
+
+	/* Start with a clean journal. */
+	hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+    
+	/*
+	 * Enclose changes inside a transaction.
+	 */
+	if (hfs_start_transaction(hfsmp) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+	transaction_begun = 1;
+    
+    
+	/* Update the hfsmp fields for the physical information about the device */
+	prev_phys_block_count = hfsmp->hfs_logical_block_count;
+	prev_fs_alt_sector = hfsmp->hfs_fs_avh_sector;
+    
+	hfsmp->hfs_logical_block_count = sector_count;
+	hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
+	
+	/*
+	 * It is possible that the new file system is smaller than the partition size.
+	 * Therefore, update offsets for AVH accordingly.
+	 */
+	if (hfs_resize_debug) {
+		printf ("hfs_extendfs: old: partition_avh_sector=%qu, fs_avh_sector=%qu\n", 
+				hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+	}
+	hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
+		HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
+	
+	hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) + 
+		HFS_ALT_SECTOR(sector_size, (newsize/hfsmp->hfs_logical_block_size));
+	if (hfs_resize_debug) {
+		printf ("hfs_extendfs: new: partition_avh_sector=%qu, fs_avh_sector=%qu\n", 
+				hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+	}
+
+	/*
+	 * Note: we take the attributes lock in case we have an attribute data vnode
+	 * which needs to change size.
+	 */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	vp = vcb->allocationsRefNum;
+	fp = VTOF(vp);
+	bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
+    
+	/*
+	 * Calculate additional space required (if any) by allocation bitmap.
+	 */
+	oldBitmapSize = fp->ff_size;
+	bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
+	if (bitmapblks > (daddr_t)fp->ff_blocks)
+		bitmapblks -= fp->ff_blocks;
+	else
+		bitmapblks = 0;
+    
+	/*
+	 * The allocation bitmap can contain unused bits that are beyond end of
+	 * current volume's allocation blocks.  Usually they are supposed to be
+	 * zero'ed out but there can be cases where they might be marked as used.
+	 * After extending the file system, those bits can represent valid
+	 * allocation blocks, so we mark all the bits from the end of current
+	 * volume to end of allocation bitmap as "free".
+	 *
+	 * Figure out the number of overage blocks before proceeding though,
+	 * so we don't add more bytes to our I/O than necessary.
+	 * First figure out the total number of blocks representable by the
+	 * end of the bitmap file vs. the total number of blocks in the new FS.
+	 * Then subtract away the number of blocks in the current FS.  This is how much
+	 * we can mark as free right now without having to grow the bitmap file.
+	 */
+	overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
+	overage_blocks = MIN (overage_blocks, newblkcnt);
+   	overage_blocks -= vcb->totalBlocks;
+    
+	BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
+    
+	if (bitmapblks > 0) {
+		daddr64_t blkno;
+		daddr_t blkcnt;
+		off_t bytesAdded;
+        
+		/*
+		 * Get the bitmap's current size (in allocation blocks) so we know
+		 * where to start zero filling once the new space is added.  We've
+		 * got to do this before the bitmap is grown.
+		 */
+		blkno  = (daddr64_t)fp->ff_blocks;
+        
+		/*
+		 * Try to grow the allocation file in the normal way, using allocation
+		 * blocks already existing in the file system.  This way, we might be
+		 * able to grow the bitmap contiguously, or at least in the metadata
+		 * zone.
+		 */
+		error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
+                            kEFAllMask | kEFNoClumpMask | kEFReserveMask
+                            | kEFMetadataMask | kEFContigMask, &bytesAdded);
+        
+		if (error == 0) {
+			usedExtendFileC = true;
+		} else {
+			/*
+			 * If the above allocation failed, fall back to allocating the new
+			 * extent of the bitmap from the space we're going to add.  Since those
+			 * blocks don't yet belong to the file system, we have to update the
+			 * extent list directly, and manually adjust the file size.
+			 */
+			bytesAdded = 0;
+			error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
+			if (error) {
+				printf("hfs_extendfs: error %d adding extents\n", error);
+				goto out;
+			}
+			fp->ff_blocks += bitmapblks;
+			VTOC(vp)->c_blocks = fp->ff_blocks;
+			VTOC(vp)->c_flag |= C_MODIFIED;
+		}
+		
+		/*
+		 * Update the allocation file's size to include the newly allocated
+		 * blocks.  Note that ExtendFileC doesn't do this, which is why this
+		 * statement is outside the above "if" statement.
+		 */
+		fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
+		
+		/*
+		 * Zero out the new bitmap blocks.
+		 */
+		{
+            
+			bp = NULL;
+			blkcnt = bitmapblks;
+			while (blkcnt > 0) {
+				error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
+				if (error) {
+					if (bp) {
+						buf_brelse(bp);
+					}
+					break;
+				}
+				bzero((char *)buf_dataptr(bp), vcb->blockSize);
+				buf_markaged(bp);
+				error = (int)buf_bwrite(bp);
+				if (error)
+					break;
+				--blkcnt;
+				++blkno;
+			}
+		}
+		if (error) {
+			printf("hfs_extendfs: error %d clearing blocks\n", error);
+			goto out;
+		}
+		/*
+		 * Mark the new bitmap space as allocated.
+		 *
+		 * Note that ExtendFileC will have marked any blocks it allocated, so
+		 * this is only needed if we used AddFileExtent.  Also note that this
+		 * has to come *after* the zero filling of new blocks in the case where
+		 * we used AddFileExtent (since the part of the bitmap we're touching
+		 * is in those newly allocated blocks).
+		 */
+		if (!usedExtendFileC) {
+			error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
+			if (error) {
+				printf("hfs_extendfs: error %d setting bitmap\n", error);
+				goto out;
+			}
+			vcb->freeBlocks -= bitmapblks;
+		}
+	}
+
+	/*
+	 * Mark the new alternate VH as allocated.
+	 */
+	if (vcb->blockSize == 512)
+		error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
+	else
+		error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
+	if (error) {
+		printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
+		goto out;
+	}
+
+	/*
+	 * Mark the old alternate VH as free.
+	 */
+	if (vcb->blockSize == 512)
+		(void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
+	else
+		(void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
+
+	/*
+	 * Adjust file system variables for new space.
+	 */
+	vcb->totalBlocks += addblks;
+	vcb->freeBlocks += addblks;
+	MarkVCBDirty(vcb);
+	error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+	if (error) {
+		printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
+		/*
+		 * Restore to old state.
+		 */
+		if (usedExtendFileC) {
+			(void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
+								 FTOC(fp)->c_fileid, false);
+		} else {
+			fp->ff_blocks -= bitmapblks;
+			fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
+			/*
+			 * No need to mark the excess blocks free since those bitmap blocks
+			 * are no longer part of the bitmap.  But we do need to undo the
+			 * effect of the "vcb->freeBlocks -= bitmapblks" above.
+			 */
+			vcb->freeBlocks += bitmapblks;
+		}
+		vcb->totalBlocks -= addblks;
+		vcb->freeBlocks -= addblks;
+		hfsmp->hfs_logical_block_count = prev_phys_block_count;
+		hfsmp->hfs_fs_avh_sector = prev_fs_alt_sector;
+		/* Do not revert hfs_partition_avh_sector because the 
+		 * partition size is larger than file system size
+		 */
+		MarkVCBDirty(vcb);
+		if (vcb->blockSize == 512) {
+			if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+			}
+		} else {
+			if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
+				hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+			}
+		}
+		goto out;
+	}
+	/*
+	 * Invalidate the old alternate volume header.  We are growing the filesystem so
+	 * this sector must be returned to the FS as free space.
+	 */
+	bp = NULL;
+	if (prev_fs_alt_sector) {
+		if (buf_meta_bread(hfsmp->hfs_devvp,
+                           HFS_PHYSBLK_ROUNDDOWN(prev_fs_alt_sector, hfsmp->hfs_log_per_phys),
+                           hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
+			journal_modify_block_start(hfsmp->jnl, bp);
+            
+			bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
+            
+			journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+		} else if (bp) {
+			buf_brelse(bp);
+		}
+	}
+	
+	/*
+	 * Update the metadata zone size based on current volume size
+	 */
+	hfs_metadatazone_init(hfsmp, false);
+    
+	/*
+	 * Adjust the size of hfsmp->hfs_attrdata_vp
+	 */
+	if (hfsmp->hfs_attrdata_vp) {
+		struct cnode *attr_cp;
+		struct filefork *attr_fp;
+		
+		if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
+			attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
+			attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
+			
+			attr_cp->c_blocks = newblkcnt;
+			attr_fp->ff_blocks = newblkcnt;
+			attr_fp->ff_extents[0].blockCount = newblkcnt;
+			attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
+			ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
+			vnode_put(hfsmp->hfs_attrdata_vp);
+		}
+	}
+    
+	/*
+	 * We only update hfsmp->allocLimit if totalBlocks actually increased.
+	 */
+	if (error == 0) {
+		UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
+	}
+    
+	/* Release all locks and sync up journal content before
+	 * checking and extending, if required, the journal
+	 */
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		lockflags = 0;
+	}
+	if (transaction_begun) {
+		hfs_end_transaction(hfsmp);
+		hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+		transaction_begun = 0;
+	}
+    
+	/* Increase the journal size, if required. */
+	error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
+	if (error) {
+		printf ("hfs_extendfs: Could not extend journal size\n");
+		goto out_noalloc;
+	}
+    
+	/* Log successful extending */
+	printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
+	       hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
+	
+out:
+	if (error && fp) {
+		/* Restore allocation fork. */
+		bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
+		VTOC(vp)->c_blocks = fp->ff_blocks;
+		
+	}
+    
+out_noalloc:
+	hfs_lock_mount (hfsmp);
+	hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
+	hfs_unlock_mount (hfsmp);
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+	if (transaction_begun) {
+		hfs_end_transaction(hfsmp);
+		/* Just to be sure, sync all data to the disk */
+		int flush_error = hfs_flush(hfsmp, HFS_FLUSH_FULL);
+		if (flush_error && !error)
+			error = flush_error;
+	}
+	if (error) {
+		printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
+	}
+    
+	return MacToVFSError(error);
+}
+
+#define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
+
+/*
+ * Truncate a file system (while still mounted).
+ */
+int
+hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
+{
+	u_int64_t oldsize;
+	u_int32_t newblkcnt;
+	u_int32_t reclaimblks = 0;
+	int lockflags = 0;
+	int transaction_begun = 0;
+	Boolean updateFreeBlocks = false;
+	Boolean disable_sparse = false;
+	int error = 0;
+    
+	hfs_lock_mount (hfsmp);
+	if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
+		hfs_unlock_mount (hfsmp);
+		return (EALREADY);
+	}
+	hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
+	hfsmp->hfs_resize_blocksmoved = 0;
+	hfsmp->hfs_resize_totalblocks = 0;
+	hfsmp->hfs_resize_progress = 0;
+	hfs_unlock_mount (hfsmp);
+    
+	/*
+	 * - Journaled HFS Plus volumes only.
+	 * - No embedded volumes.
+	 */
+	if ((hfsmp->jnl == NULL) ||
+	    (hfsmp->hfsPlusIOPosOffset != 0)) {
+		error = EPERM;
+		goto out;
+	}
+	oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+	newblkcnt = newsize / hfsmp->blockSize;
+	reclaimblks = hfsmp->totalBlocks - newblkcnt;
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
+		printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
+	}
+    
+	/* Make sure new size is valid. */
+	if ((newsize < HFS_MIN_SIZE) ||
+	    (newsize >= oldsize) ||
+	    (newsize % hfsmp->hfs_logical_block_size) ||
+	    (newsize % hfsmp->hfs_physical_block_size)) {
+		printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Make sure that the file system has enough free blocks reclaim.
+	 *
+	 * Before resize, the disk is divided into four zones -
+	 * 	A. Allocated_Stationary - These are allocated blocks that exist
+	 * 	   before the new end of disk.  These blocks will not be
+	 * 	   relocated or modified during resize.
+	 * 	B. Free_Stationary - These are free blocks that exist before the
+	 * 	   new end of disk.  These blocks can be used for any new
+	 * 	   allocations during resize, including allocation for relocating
+	 * 	   data from the area of disk being reclaimed.
+	 * 	C. Allocated_To-Reclaim - These are allocated blocks that exist
+	 *         beyond the new end of disk.  These blocks need to be reclaimed
+	 *         during resize by allocating equal number of blocks in Free
+	 *         Stationary zone and copying the data.
+	 *      D. Free_To-Reclaim - These are free blocks that exist beyond the
+	 *         new end of disk.  Nothing special needs to be done to reclaim
+	 *         them.
+	 *
+	 * Total number of blocks on the disk before resize:
+	 * ------------------------------------------------
+	 * 	Total Blocks = Allocated_Stationary + Free_Stationary +
+	 * 	               Allocated_To-Reclaim + Free_To-Reclaim
+	 *
+	 * Total number of blocks that need to be reclaimed:
+	 * ------------------------------------------------
+	 *	Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
+	 *
+	 * Note that the check below also makes sure that we have enough space
+	 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
+	 * Therefore we do not need to check total number of blocks to relocate
+	 * later in the code.
+	 *
+	 * The condition below gets converted to:
+	 *
+	 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
+	 *
+	 * which is equivalent to:
+	 *
+	 *              Allocated To-Reclaim >= Free Stationary
+	 */
+	if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
+		printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
+		error = ENOSPC;
+		goto out;
+	}
+
+	/* Start with a clean journal. */
+	hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+	
+	if (hfs_start_transaction(hfsmp) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+	transaction_begun = 1;
+	
+	/* Take the bitmap lock to update the alloc limit field */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	/*
+	 * Prevent new allocations from using the part we're trying to truncate.
+	 *
+	 * NOTE: allocLimit is set to the allocation block number where the new
+	 * alternate volume header will be.  That way there will be no files to
+	 * interfere with allocating the new alternate volume header, and no files
+	 * in the allocation blocks beyond (i.e. the blocks we're trying to
+	 * truncate away.
+	 */
+	if (hfsmp->blockSize == 512) {
+		error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
+	}
+	else {
+		error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
+	}
+    
+	/* Sparse devices use first fit allocation which is not ideal
+	 * for volume resize which requires best fit allocation.  If a
+	 * sparse device is being truncated, disable the sparse device
+	 * property temporarily for the duration of resize.  Also reset
+	 * the free extent cache so that it is rebuilt as sorted by
+	 * totalBlocks instead of startBlock.
+	 *
+	 * Note that this will affect all allocations on the volume and
+	 * ideal fix would be just to modify resize-related allocations,
+	 * but it will result in complexity like handling of two free
+	 * extent caches sorted differently, etc.  So we stick to this
+	 * solution for now.
+	 */
+	hfs_lock_mount (hfsmp);
+	if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+		hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+		ResetVCBFreeExtCache(hfsmp);
+		disable_sparse = true;
+	}
+	
+	/*
+	 * Update the volume free block count to reflect the total number
+	 * of free blocks that will exist after a successful resize.
+	 * Relocation of extents will result in no net change in the total
+	 * free space on the disk.  Therefore the code that allocates
+	 * space for new extent and deallocates the old extent explicitly
+	 * prevents updating the volume free block count.  It will also
+	 * prevent false disk full error when the number of blocks in
+	 * an extent being relocated is more than the free blocks that
+	 * will exist after the volume is resized.
+	 */
+	hfsmp->reclaimBlocks = reclaimblks;
+	hfsmp->freeBlocks -= reclaimblks;
+	updateFreeBlocks = true;
+	hfs_unlock_mount(hfsmp);
+    
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		lockflags = 0;
+	}
+	
+	/*
+	 * Update the metadata zone size to match the new volume size,
+	 * and if it too less, metadata zone might be disabled.
+	 */
+	hfs_metadatazone_init(hfsmp, false);
+    
+	/*
+	 * If some files have blocks at or beyond the location of the
+	 * new alternate volume header, recalculate free blocks and
+	 * reclaim blocks.  Otherwise just update free blocks count.
+	 *
+	 * The current allocLimit is set to the location of new alternate
+	 * volume header, and reclaimblks are the total number of blocks
+	 * that need to be reclaimed.  So the check below is really
+	 * ignoring the blocks allocated for old alternate volume header.
+	 */
+	if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
+		/*
+		 * hfs_reclaimspace will use separate transactions when
+		 * relocating files (so we don't overwhelm the journal).
+		 */
+		hfs_end_transaction(hfsmp);
+		transaction_begun = 0;
+
+		/* Attempt to reclaim some space. */
+		error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
+		if (error != 0) {
+			printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
+			error = ENOSPC;
+			goto out;
+		}
+
+		if (hfs_start_transaction(hfsmp) != 0) {
+			error = EINVAL;
+			goto out;
+		}
+		transaction_begun = 1;
+		
+		/* Check if we're clear now. */
+		error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
+		if (error != 0) {
+			printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
+			error = EAGAIN;  /* tell client to try again */
+			goto out;
+		}
+	}
+    
+	/*
+	 * Note: we take the attributes lock in case we have an attribute data vnode
+	 * which needs to change size.
+	 */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+    
+	/*
+	 * Allocate last 1KB for alternate volume header.
+	 */
+	error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
+	if (error) {
+		printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
+		goto out;
+	}
+    
+	/*
+	 * Mark the old alternate volume header as free.
+	 * We don't bother shrinking allocation bitmap file.
+	 */
+	if (hfsmp->blockSize == 512)
+		(void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
+	else
+		(void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
+	
+	/* Don't invalidate the old AltVH yet.  It is still valid until the partition size is updated ! */
+    
+	/* Log successful shrinking. */
+	printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
+	       hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
+    
+	/*
+	 * Adjust file system variables and flush them to disk.
+	 *
+	 * Note that although the logical block size is updated here, it is only
+	 * done for the benefit/convenience of the partition management software.  The
+	 * logical block count change has not yet actually been propagated to
+	 * the disk device yet (and we won't get any notification when it does).
+	 */
+	hfsmp->totalBlocks = newblkcnt;
+	hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
+	hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
+	hfsmp->reclaimBlocks = 0;
+
+	/*
+	 * At this point, a smaller HFS file system exists in a larger volume.
+	 * As per volume format, the alternate volume header is located 1024 bytes
+	 * before end of the partition.  So, until the partition is also resized,
+	 * a valid alternate volume header will need to be updated at 1024 bytes
+	 * before end of the volume.  Under normal circumstances, a file system
+	 * resize is always followed by a volume resize, so we also need to
+	 * write a copy of the new alternate volume header at 1024 bytes before
+	 * end of the new file system.
+	 */
+	if (hfs_resize_debug) {
+		printf ("hfs_truncatefs: old: partition_avh_sector=%qu, fs_avh_sector=%qu\n", 
+				hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+	}
+	hfsmp->hfs_fs_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
+	/* Note hfs_partition_avh_sector stays unchanged! partition size has not yet been modified */
+	if (hfs_resize_debug) {
+		printf ("hfs_truncatefs: new: partition_avh_sector=%qu, fs_avh_sector=%qu\n", 
+				hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+	}
+	
+	MarkVCBDirty(hfsmp);
+	error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+	if (error) {
+		panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
+	}
+    
+	/*
+	 * Adjust the size of hfsmp->hfs_attrdata_vp
+	 */
+	if (hfsmp->hfs_attrdata_vp) {
+		struct cnode *cp;
+		struct filefork *fp;
+		
+		if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
+			cp = VTOC(hfsmp->hfs_attrdata_vp);
+			fp = VTOF(hfsmp->hfs_attrdata_vp);
+			
+			cp->c_blocks = newblkcnt;
+			fp->ff_blocks = newblkcnt;
+			fp->ff_extents[0].blockCount = newblkcnt;
+			fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
+			ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
+			vnode_put(hfsmp->hfs_attrdata_vp);
+		}
+	}
+	
+out:
+	/*
+	 * Update the allocLimit to acknowledge the last one or two blocks now.
+	 * Add it to the tree as well if necessary.
+	 */
+	UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
+	
+	hfs_lock_mount (hfsmp);
+	if (disable_sparse == true) {
+		/* Now that resize is completed, set the volume to be sparse
+		 * device again so that all further allocations will be first
+		 * fit instead of best fit.  Reset free extent cache so that
+		 * it is rebuilt.
+		 */
+		hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+		ResetVCBFreeExtCache(hfsmp);
+	}
+    
+	if (error && (updateFreeBlocks == true)) {
+		hfsmp->freeBlocks += reclaimblks;
+	}
+	hfsmp->reclaimBlocks = 0;
+
+	if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
+		hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
+	}
+	hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
+	hfs_unlock_mount (hfsmp);
+	
+	/* On error, reset the metadata zone for original volume size */
+	if (error && (updateFreeBlocks == true)) {
+		hfs_metadatazone_init(hfsmp, false);
+	}
+	
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+	if (transaction_begun) {
+		hfs_end_transaction(hfsmp);
+		/* Just to be sure, sync all data to the disk */
+		int flush_error = hfs_flush(hfsmp, HFS_FLUSH_FULL);
+		if (flush_error && !error)
+			error = flush_error;
+	}
+    
+	if (error) {
+		printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
+	}
+    
+	return MacToVFSError(error);
+}
+
+
+/*
+ * Invalidate the physical block numbers associated with buffer cache blocks
+ * in the given extent of the given vnode.
+ */
+struct hfs_inval_blk_no {
+	daddr64_t sectorStart;
+	daddr64_t sectorCount;
+};
+static int
+hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
+{
+	daddr64_t blkno;
+	struct hfs_inval_blk_no *args;
+	
+	blkno = buf_blkno(bp);
+	args = args_in;
+	
+	if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
+		buf_setblkno(bp, buf_lblkno(bp));
+    
+	return BUF_RETURNED;
+}
+static void
+hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
+{
+	struct hfs_inval_blk_no args;
+	args.sectorStart = sectorStart;
+	args.sectorCount = sectorCount;
+	
+	buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
+}
+
+
+/*
+ * Copy the contents of an extent to a new location.  Also invalidates the
+ * physical block number of any buffer cache block in the copied extent
+ * (so that if the block is written, it will go through VNOP_BLOCKMAP to
+ * determine the new physical block number).
+ *
+ * At this point, for regular files, we hold the truncate lock exclusive
+ * and the cnode lock exclusive.
+ */
+static int
+hfs_copy_extent(
+                struct hfsmount *hfsmp,
+                struct vnode *vp,		/* The file whose extent is being copied. */
+                u_int32_t oldStart,		/* The start of the source extent. */
+                u_int32_t newStart,		/* The start of the destination extent. */
+                u_int32_t blockCount,	/* The number of allocation blocks to copy. */
+                __unused vfs_context_t context)
+{
+	int err = 0;
+	size_t bufferSize;
+	void *buffer = NULL;
+	struct vfsioattr ioattr;
+	buf_t bp = NULL;
+	off_t resid;
+	size_t ioSize;
+	u_int32_t ioSizeSectors;	/* Device sectors in this I/O */
+	daddr64_t srcSector, destSector;
+	u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
+#if CONFIG_PROTECT
+	int cpenabled = 0;
+#endif
+    
+	/*
+	 * Sanity check that we have locked the vnode of the file we're copying.
+	 *
+	 * But since hfs_systemfile_lock() doesn't actually take the lock on
+	 * the allocation file if a journal is active, ignore the check if the
+	 * file being copied is the allocation file.
+	 */
+	struct cnode *cp = VTOC(vp);
+	if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
+		panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
+    
+#if CONFIG_PROTECT
+	/*
+	 * Prepare the CP blob and get it ready for use, if necessary.
+	 *
+	 * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
+	 * because they are implicitly protected via the media key on iOS.  As such, they
+	 * must not be relocated except with the media key.  So it is OK to not pass down
+	 * a special cpentry to the IOMedia/LwVM code for handling.
+	 */
+	if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
+		cpenabled = 1;
+	}
+#endif
+
+	/*
+	 * Determine the I/O size to use
+	 *
+	 * NOTE: Many external drives will result in an ioSize of 128KB.
+	 * TODO: Should we use a larger buffer, doing several consecutive
+	 * reads, then several consecutive writes?
+	 */
+	vfs_ioattr(hfsmp->hfs_mp, &ioattr);
+	bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
+	buffer = hfs_malloc(bufferSize);
+	
+	/* Get a buffer for doing the I/O */
+	bp = buf_alloc(hfsmp->hfs_devvp);
+	buf_setdataptr(bp, (uintptr_t)buffer);
+	
+	resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
+	srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
+	destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
+	while (resid > 0) {
+		ioSize = MIN(bufferSize, (size_t) resid);
+		ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
+		
+		/* Prepare the buffer for reading */
+		buf_reset(bp, B_READ);
+		buf_setsize(bp, ioSize);
+		buf_setcount(bp, ioSize);
+		buf_setblkno(bp, srcSector);
+		buf_setlblkno(bp, srcSector);
+        
+		/*
+		 * Note that because this is an I/O to the device vp
+		 * it is correct to have lblkno and blkno both point to the
+		 * start sector being read from.  If it were being issued against the
+		 * underlying file then that would be different.
+		 */
+        
+		/* Attach the new CP blob  to the buffer if needed */
+#if CONFIG_PROTECT
+		if (cpenabled) {
+			/* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
+			cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
+			bufattr_setcpx(buf_attr(bp), hfsmp->hfs_resize_cpx);
+
+			/* Initialize the content protection file offset to start at 0 */
+			bufattr_setcpoff(buf_attr(bp), 0);
+		}
+#endif
+
+		/* Do the read */
+		err = VNOP_STRATEGY(bp);
+		if (!err)
+			err = buf_biowait(bp);
+		if (err) {
+#if CONFIG_PROTECT
+			/* Turn the flag off in error cases. */
+			if (cpenabled) {
+				cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
+			}
+#endif
+			printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
+			break;
+		}
+		
+		/* Prepare the buffer for writing */
+		buf_reset(bp, B_WRITE);
+		buf_setsize(bp, ioSize);
+		buf_setcount(bp, ioSize);
+		buf_setblkno(bp, destSector);
+		buf_setlblkno(bp, destSector);
+		if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
+			buf_markfua(bp);
+        
+#if CONFIG_PROTECT
+		/* Attach the CP to the buffer if needed */
+		if (cpenabled) {
+			bufattr_setcpx(buf_attr(bp), hfsmp->hfs_resize_cpx);
+			/*
+			 * The last STRATEGY call may have updated the cp file offset behind our
+			 * back, so we cannot trust it.  Re-initialize the content protection
+			 * file offset back to 0 before initiating the write portion of this I/O.
+			 */
+			bufattr_setcpoff(buf_attr(bp), 0);
+		}
+#endif
+        
+		/* Do the write */
+		vnode_startwrite(hfsmp->hfs_devvp);
+		err = VNOP_STRATEGY(bp);
+		if (!err) {
+			err = buf_biowait(bp);
+		}
+#if CONFIG_PROTECT
+		/* Turn the flag off regardless once the strategy call finishes. */
+		if (cpenabled) {
+			cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
+		}
+#endif
+		if (err) {
+			printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
+			break;
+		}
+		
+		resid -= ioSize;
+		srcSector += ioSizeSectors;
+		destSector += ioSizeSectors;
+	}
+	if (bp)
+		buf_free(bp);
+	hfs_free(buffer, bufferSize);
+	
+	/* Make sure all writes have been flushed to disk. */
+	if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
+
+		err = hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+		if (err) {
+			printf("hfs_copy_extent: hfs_flush failed (%d)\n", err);
+			err = 0;	/* Don't fail the copy. */
+		}
+	}
+    
+	if (!err)
+		hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
+    
+	return err;
+}
+
+
+/* Structure to store state of reclaiming extents from a
+ * given file.  hfs_reclaim_file()/hfs_reclaim_xattr()
+ * initializes the values in this structure which are then
+ * used by code that reclaims and splits the extents.
+ */
+struct hfs_reclaim_extent_info {
+	struct vnode *vp;
+	u_int32_t fileID;
+	u_int8_t forkType;
+	u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
+	u_int8_t is_sysfile;                 /* Extent belongs to system file */
+	u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
+	u_int8_t extent_index;
+	int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
+	u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
+	u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
+	u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
+	struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
+	union record {
+		HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
+		HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
+	} record;
+	HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
+                                          * For catalog extent record, points to the correct
+                                          * extent information in filefork.  For overflow extent
+                                          * record, or xattr record, points to extent record
+                                          * in the structure above
+                                          */
+	struct cat_desc *dirlink_desc;
+	struct cat_attr *dirlink_attr;
+	struct filefork *dirlink_fork;	      /* For directory hard links, fp points actually to this */
+	struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr()
+                                           * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
+                                           * use it for writing updated extent record
+                                           */
+	struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
+	u_int16_t recordlen;
+	int overflow_count;                   /* For debugging, counter for overflow extent record */
+	FCB *fcb;                             /* Pointer to the current btree being traversed */
+};
+
+/*
+ * Split the current extent into two extents, with first extent
+ * to contain given number of allocation blocks.  Splitting of
+ * extent creates one new extent entry which can result in
+ * shifting of many entries through all the extent records of a
+ * file, and/or creating a new extent record in the overflow
+ * extent btree.
+ *
+ * Example:
+ * The diagram below represents two consecutive extent records,
+ * for simplicity, lets call them record X and X+1 respectively.
+ * Interesting extent entries have been denoted by letters.
+ * If the letter is unchanged before and after split, it means
+ * that the extent entry was not modified during the split.
+ * A '.' means that the entry remains unchanged after the split
+ * and is not relevant for our example.  A '0' means that the
+ * extent entry is empty.
+ *
+ * If there isn't sufficient contiguous free space to relocate
+ * an extent (extent "C" below), we will have to break the one
+ * extent into multiple smaller extents, and relocate each of
+ * the smaller extents individually.  The way we do this is by
+ * finding the largest contiguous free space that is currently
+ * available (N allocation blocks), and then convert extent "C"
+ * into two extents, C1 and C2, that occupy exactly the same
+ * allocation blocks as extent C.  Extent C1 is the first
+ * N allocation blocks of extent C, and extent C2 is the remainder
+ * of extent C.  Then we can relocate extent C1 since we know
+ * we have enough contiguous free space to relocate it in its
+ * entirety.  We then repeat the process starting with extent C2.
+ *
+ * In record X, only the entries following entry C are shifted, and
+ * the original entry C is replaced with two entries C1 and C2 which
+ * are actually two extent entries for contiguous allocation blocks.
+ *
+ * Note that the entry E from record X is shifted into record X+1 as
+ * the new first entry.  Since the first entry of record X+1 is updated,
+ * the FABN will also get updated with the blockCount of entry E.
+ * This also results in shifting of all extent entries in record X+1.
+ * Note that the number of empty entries after the split has been
+ * changed from 3 to 2.
+ *
+ * Before:
+ *               record X                           record X+1
+ *  ---------------------===---------     ---------------------------------
+ *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
+ *  ---------------------===---------     ---------------------------------
+ *
+ * After:
+ *  ---------------------=======-----     ---------------------------------
+ *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
+ *  ---------------------=======-----     ---------------------------------
+ *
+ *  C1.startBlock = C.startBlock
+ *  C1.blockCount = N
+ *
+ *  C2.startBlock = C.startBlock + N
+ *  C2.blockCount = C.blockCount - N
+ *
+ *                                        FABN = old FABN - E.blockCount
+ *
+ * Inputs:
+ *	extent_info -   This is the structure that contains state about
+ *	                the current file, extent, and extent record that
+ *	                is being relocated.  This structure is shared
+ *	                among code that traverses through all the extents
+ *	                of the file, code that relocates extents, and
+ *	                code that splits the extent.
+ *	newBlockCount - The blockCount of the extent to be split after
+ *	                successfully split operation.
+ * Output:
+ * 	Zero on success, non-zero on failure.
+ */
+static int
+hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
+{
+	int error = 0;
+	int index = extent_info->extent_index;
+	int i;
+	HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
+	HFSPlusExtentDescriptor last_extent;
+	HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
+	HFSPlusExtentRecord *extents_rec = NULL;
+	HFSPlusExtentKey *extents_key = NULL;
+	HFSPlusAttrRecord *xattr_rec = NULL;
+	HFSPlusAttrKey *xattr_key = NULL;
+	struct BTreeIterator iterator;
+	struct FSBufferDescriptor btdata;
+	uint16_t reclen;
+	uint32_t read_recStartBlock;	/* Starting allocation block number to read old extent record */
+	uint32_t write_recStartBlock;	/* Starting allocation block number to insert newly updated extent record */
+	Boolean create_record = false;
+	Boolean is_xattr;
+	struct cnode *cp;
+    
+	is_xattr = extent_info->is_xattr;
+	extents = extent_info->extents;
+	cp = VTOC(extent_info->vp);
+    
+	if (newBlockCount == 0) {
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: No splitting required for newBlockCount=0\n");
+		}
+		return error;
+	}
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
+	}
+    
+	/* Extents overflow btree can not have more than 8 extents.
+	 * No split allowed if the 8th extent is already used.
+	 */
+	if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
+		printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
+		error = ENOSPC;
+		goto out;
+	}
+    
+	/* Determine the starting allocation block number for the following
+	 * overflow extent record, if any, before the current record
+	 * gets modified.
+	 */
+	read_recStartBlock = extent_info->recStartBlock;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		if (extents[i].blockCount == 0) {
+			break;
+		}
+		read_recStartBlock += extents[i].blockCount;
+	}
+    
+	/* Shift and split */
+	if (index == kHFSPlusExtentDensity-1) {
+		/* The new extent created after split will go into following overflow extent record */
+		shift_extent.startBlock = extents[index].startBlock + newBlockCount;
+		shift_extent.blockCount = extents[index].blockCount - newBlockCount;
+        
+		/* Last extent in the record will be split, so nothing to shift */
+	} else {
+		/* Splitting of extents can result in at most of one
+		 * extent entry to be shifted into following overflow extent
+		 * record.  So, store the last extent entry for later.
+		 */
+		shift_extent = extents[kHFSPlusExtentDensity-1];
+		if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
+			printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
+		}
+        
+		/* Start shifting extent information from the end of the extent
+		 * record to the index where we want to insert the new extent.
+		 * Note that kHFSPlusExtentDensity-1 is already saved above, and
+		 * does not need to be shifted.  The extent entry that is being
+		 * split does not get shifted.
+		 */
+		for (i = kHFSPlusExtentDensity-2; i > index; i--) {
+			if (hfs_resize_debug) {
+				if (extents[i].blockCount) {
+					printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
+				}
+			}
+			extents[i+1] = extents[i];
+		}
+	}
+    
+	if (index == kHFSPlusExtentDensity-1) {
+		/* The second half of the extent being split will be the overflow
+		 * entry that will go into following overflow extent record.  The
+		 * value has been stored in 'shift_extent' above, so there is
+		 * nothing to be done here.
+		 */
+	} else {
+		/* Update the values in the second half of the extent being split
+		 * before updating the first half of the split.  Note that the
+		 * extent to split or first half of the split is at index 'index'
+		 * and a new extent or second half of the split will be inserted at
+		 * 'index+1' or into following overflow extent record.
+		 */
+		extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
+		extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
+	}
+	/* Update the extent being split, only the block count will change */
+	extents[index].blockCount = newBlockCount;
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
+		if (index != kHFSPlusExtentDensity-1) {
+			printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
+		} else {
+			printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
+		}
+	}
+    
+	/* Write out information about the newly split extent to the disk */
+	if (extent_info->catalog_fp) {
+		/* (extent_info->catalog_fp != NULL) means the newly split
+		 * extent exists in the catalog record.  This means that
+		 * the cnode was updated.  Therefore, to write out the changes,
+		 * mark the cnode as modified.   We cannot call hfs_update()
+		 * in this function because the caller hfs_reclaim_extent()
+		 * is holding the catalog lock currently.
+		 */
+		cp->c_flag |= C_MODIFIED;
+	} else {
+		/* The newly split extent is for large EAs or is in overflow
+		 * extent record, so update it directly in the btree using the
+		 * iterator information from the shared extent_info structure
+	 	 */
+		error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
+                                &(extent_info->btdata), extent_info->recordlen);
+		if (error) {
+			printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
+			goto out;
+		}
+	}
+    
+	/* No extent entry to be shifted into another extent overflow record */
+	if (shift_extent.blockCount == 0) {
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
+		}
+		error = 0;
+		goto out;
+	}
+    
+	/* The overflow extent entry has to be shifted into an extent
+	 * overflow record.  This means that we might have to shift
+	 * extent entries from all subsequent overflow records by one.
+	 * We start iteration from the first record to the last record,
+	 * and shift the extent entry from one record to another.
+	 * We might have to create a new extent record for the last
+	 * extent entry for the file.
+	 */
+	
+	/* Initialize iterator to search the next record */
+	bzero(&iterator, sizeof(iterator));
+	if (is_xattr) {
+		/* Copy the key from the iterator that was used to update the modified attribute record. */
+		xattr_key = (HFSPlusAttrKey *)&(iterator.key);
+		bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
+		/* Note: xattr_key->startBlock will be initialized later in the iteration loop */
+        
+		xattr_rec = hfs_malloc(sizeof(*xattr_rec));
+
+		btdata.bufferAddress = xattr_rec;
+		btdata.itemSize = sizeof(HFSPlusAttrRecord);
+		btdata.itemCount = 1;
+		extents = xattr_rec->overflowExtents.extents;
+	} else {
+		/* Initialize the extent key for the current file */
+		extents_key = (HFSPlusExtentKey *) &(iterator.key);
+		extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
+		extents_key->forkType = extent_info->forkType;
+		extents_key->fileID = extent_info->fileID;
+		/* Note: extents_key->startBlock will be initialized later in the iteration loop */
+		
+		extents_rec = hfs_malloc(sizeof(*extents_rec));
+
+		btdata.bufferAddress = extents_rec;
+		btdata.itemSize = sizeof(HFSPlusExtentRecord);
+		btdata.itemCount = 1;
+		extents = extents_rec[0];
+	}
+    
+	/* The overflow extent entry has to be shifted into an extent
+	 * overflow record.  This means that we might have to shift
+	 * extent entries from all subsequent overflow records by one.
+	 * We start iteration from the first record to the last record,
+	 * examine one extent record in each iteration and shift one
+	 * extent entry from one record to another.  We might have to
+	 * create a new extent record for the last extent entry for the
+	 * file.
+	 *
+	 * If shift_extent.blockCount is non-zero, it means that there is
+	 * an extent entry that needs to be shifted into the next
+	 * overflow extent record.  We keep on going till there are no such
+	 * entries left to be shifted.  This will also change the starting
+	 * allocation block number of the extent record which is part of
+	 * the key for the extent record in each iteration.  Note that
+	 * because the extent record key is changing while we are searching,
+	 * the record can not be updated directly, instead it has to be
+	 * deleted and inserted again.
+	 */
+	while (shift_extent.blockCount) {
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
+		}
+        
+		/* Search if there is any existing overflow extent record
+		 * that matches the current file and the logical start block
+		 * number.
+		 *
+		 * For this, the logical start block number in the key is
+		 * the value calculated based on the logical start block
+		 * number of the current extent record and the total number
+		 * of blocks existing in the current extent record.
+		 */
+		if (is_xattr) {
+			xattr_key->startBlock = read_recStartBlock;
+		} else {
+			extents_key->startBlock = read_recStartBlock;
+		}
+		error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
+		if (error) {
+			if (error != btNotFound) {
+				printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
+				goto out;
+			}
+			/* No matching record was found, so create a new extent record.
+			 * Note:  Since no record was found, we can't rely on the
+			 * btree key in the iterator any longer.  This will be initialized
+			 * later before we insert the record.
+			 */
+			create_record = true;
+		}
+        
+		/* The extra extent entry from the previous record is being inserted
+		 * as the first entry in the current extent record.  This will change
+		 * the file allocation block number (FABN) of the current extent
+		 * record, which is the startBlock value from the extent record key.
+		 * Since one extra entry is being inserted in the record, the new
+		 * FABN for the record will less than old FABN by the number of blocks
+		 * in the new extent entry being inserted at the start.  We have to
+		 * do this before we update read_recStartBlock to point at the
+		 * startBlock of the following record.
+		 */
+		write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
+		if (hfs_resize_debug) {
+			if (create_record) {
+				printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
+			}
+		}
+        
+		/* Now update the read_recStartBlock to account for total number
+		 * of blocks in this extent record.  It will now point to the
+		 * starting allocation block number for the next extent record.
+		 */
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extents[i].blockCount == 0) {
+				break;
+			}
+			read_recStartBlock += extents[i].blockCount;
+		}
+        
+		if (create_record == true) {
+			/* Initialize new record content with only one extent entry */
+			bzero(extents, sizeof(HFSPlusExtentRecord));
+			/* The new record will contain only one extent entry */
+			extents[0] = shift_extent;
+			/* There are no more overflow extents to be shifted */
+			shift_extent.startBlock = shift_extent.blockCount = 0;
+            
+			if (is_xattr) {
+				/* BTSearchRecord above returned btNotFound,
+				 * but since the attribute btree is never empty
+				 * if we are trying to insert new overflow
+				 * record for the xattrs, the extents_key will
+				 * contain correct data.  So we don't need to
+				 * re-initialize it again like below.
+				 */
+                
+				/* Initialize the new xattr record */
+				xattr_rec->recordType = kHFSPlusAttrExtents;
+				xattr_rec->overflowExtents.reserved = 0;
+				reclen = sizeof(HFSPlusAttrExtents);
+			} else {
+				/* BTSearchRecord above returned btNotFound,
+				 * which means that extents_key content might
+				 * not correspond to the record that we are
+				 * trying to create, especially when the extents
+				 * overflow btree is empty.  So we reinitialize
+				 * the extents_key again always.
+				 */
+				extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
+				extents_key->forkType = extent_info->forkType;
+				extents_key->fileID = extent_info->fileID;
+                
+				/* Initialize the new extent record */
+				reclen = sizeof(HFSPlusExtentRecord);
+			}
+		} else {
+			/* The overflow extent entry from previous record will be
+			 * the first entry in this extent record.  If the last
+			 * extent entry in this record is valid, it will be shifted
+			 * into the following extent record as its first entry.  So
+			 * save the last entry before shifting entries in current
+			 * record.
+			 */
+			last_extent = extents[kHFSPlusExtentDensity-1];
+			
+			/* Shift all entries by one index towards the end */
+			for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
+				extents[i+1] = extents[i];
+			}
+            
+			/* Overflow extent entry saved from previous record
+			 * is now the first entry in the current record.
+			 */
+			extents[0] = shift_extent;
+            
+			if (hfs_resize_debug) {
+				printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
+			}
+            
+			/* The last entry from current record will be the
+			 * overflow entry which will be the first entry for
+			 * the following extent record.
+			 */
+			shift_extent = last_extent;
+            
+			/* Since the key->startBlock is being changed for this record,
+			 * it should be deleted and inserted with the new key.
+			 */
+			error = BTDeleteRecord(extent_info->fcb, &iterator);
+			if (error) {
+				printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
+				goto out;
+			}
+			if (hfs_resize_debug) {
+				printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
+			}
+		}
+        
+		/* Insert the newly created or modified extent record */
+		bzero(&iterator.hint, sizeof(iterator.hint));
+		if (is_xattr) {
+			xattr_key->startBlock = write_recStartBlock;
+		} else {
+			extents_key->startBlock = write_recStartBlock;
+		}
+		error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
+		if (error) {
+			printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
+			goto out;
+		}
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
+		}
+	}
+    
+out:
+	/*
+	 * Extents overflow btree or attributes btree headers might have
+	 * been modified during the split/shift operation, so flush the
+	 * changes to the disk while we are inside journal transaction.
+	 * We should only be able to generate I/O that modifies the B-Tree
+	 * header nodes while we're in the middle of a journal transaction.
+	 * Otherwise it might result in panic during unmount.
+	 */
+	BTFlushPath(extent_info->fcb);
+    
+	hfs_free(extents_rec, sizeof(*extents_rec));
+	hfs_free(xattr_rec, sizeof(*xattr_rec));
+	return error;
+}
+
+
+/*
+ * Relocate an extent if it lies beyond the expected end of volume.
+ *
+ * This function is called for every extent of the file being relocated.
+ * It allocates space for relocation, copies the data, deallocates
+ * the old extent, and update corresponding on-disk extent.  If the function
+ * does not find contiguous space to  relocate an extent, it splits the
+ * extent in smaller size to be able to relocate it out of the area of
+ * disk being reclaimed.  As an optimization, if an extent lies partially
+ * in the area of the disk being reclaimed, it is split so that we only
+ * have to relocate the area that was overlapping with the area of disk
+ * being reclaimed.
+ *
+ * Note that every extent is relocated in its own transaction so that
+ * they do not overwhelm the journal.  This function handles the extent
+ * record that exists in the catalog record, extent record from overflow
+ * extents btree, and extents for large EAs.
+ *
+ * Inputs:
+ *	extent_info - This is the structure that contains state about
+ *	              the current file, extent, and extent record that
+ *	              is being relocated.  This structure is shared
+ *	              among code that traverses through all the extents
+ *	              of the file, code that relocates extents, and
+ *	              code that splits the extent.
+ */
+static int
+hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
+{
+	int error = 0;
+	int index;
+	struct cnode *cp;
+	u_int32_t oldStartBlock;
+	u_int32_t oldBlockCount;
+	u_int32_t newStartBlock = 0;
+	u_int32_t newBlockCount;
+	u_int32_t roundedBlockCount;
+	uint16_t node_size;
+	uint32_t remainder_blocks;
+	u_int32_t alloc_flags;
+	int blocks_allocated = false;
+    
+	index = extent_info->extent_index;
+	cp = VTOC(extent_info->vp);
+    
+	oldStartBlock = extent_info->extents[index].startBlock;
+	oldBlockCount = extent_info->extents[index].blockCount;
+    
+	if (0 && hfs_resize_debug) {
+		printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
+	}
+    
+	/* If the current extent lies completely within allocLimit,
+	 * it does not require any relocation.
+	 */
+	if ((oldStartBlock + oldBlockCount) <= allocLimit) {
+		extent_info->cur_blockCount += oldBlockCount;
+		return error;
+	}
+    
+	/* Every extent should be relocated in its own transaction
+	 * to make sure that we don't overflow the journal buffer.
+	 */
+	error = hfs_start_transaction(hfsmp);
+	if (error) {
+		return error;
+	}
+	extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
+    
+	/* Check if the extent lies partially in the area to reclaim,
+	 * i.e. it starts before allocLimit and ends beyond allocLimit.
+	 * We have already skipped extents that lie completely within
+	 * allocLimit in the check above, so we only check for the
+	 * startBlock.  If it lies partially, split it so that we
+	 * only relocate part of the extent.
+	 */
+	if (oldStartBlock < allocLimit) {
+		newBlockCount = allocLimit - oldStartBlock;
+        
+		if (hfs_resize_debug) {
+			int idx = extent_info->extent_index;
+			printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
+		}
+        
+		/* If the extent belongs to a btree, check and trim
+		 * it to be multiple of the node size.
+		 */
+		if (extent_info->is_sysfile) {
+			node_size = get_btree_nodesize(extent_info->vp);
+			/* If the btree node size is less than the block size,
+			 * splitting this extent will not split a node across
+			 * different extents.  So we only check and trim if
+			 * node size is more than the allocation block size.
+			 */
+			if (node_size > hfsmp->blockSize) {
+				remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
+				if (remainder_blocks) {
+					newBlockCount -= remainder_blocks;
+					if (hfs_resize_debug) {
+						printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
+					}
+				}
+			}
+			/* The newBlockCount is zero because of rounding-down so that
+			 * btree nodes are not split across extents.  Therefore this
+			 * straddling extent across resize-boundary does not require
+			 * splitting.  Skip over to relocating of complete extent.
+			 */
+			if (newBlockCount == 0) {
+				if (hfs_resize_debug) {
+					printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n");
+				}
+				goto relocate_full_extent;
+			}
+		}
+        
+		/* Split the extents into two parts --- the first extent lies
+		 * completely within allocLimit and therefore does not require
+		 * relocation.  The second extent will require relocation which
+		 * will be handled when the caller calls this function again
+		 * for the next extent.
+		 */
+		error = hfs_split_extent(extent_info, newBlockCount);
+		if (error == 0) {
+			/* Split success, no relocation required */
+			goto out;
+		}
+		/* Split failed, so try to relocate entire extent */
+		if (hfs_resize_debug) {
+			int idx = extent_info->extent_index;
+			printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
+		}
+	}
+    
+relocate_full_extent:
+	/* At this point, the current extent requires relocation.
+	 * We will try to allocate space equal to the size of the extent
+	 * being relocated first to try to relocate it without splitting.
+	 * If the allocation fails, we will try to allocate contiguous
+	 * blocks out of metadata zone.  If that allocation also fails,
+	 * then we will take a whatever contiguous block run is returned
+	 * by the allocation, split the extent into two parts, and then
+	 * relocate the first splitted extent.
+	 */
+	alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
+	if (extent_info->is_sysfile) {
+		alloc_flags |= HFS_ALLOC_METAZONE;
+	}
+    
+	error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
+                          &newStartBlock, &newBlockCount);
+	if ((extent_info->is_sysfile == false) &&
+	    ((error == dskFulErr) || (error == ENOSPC))) {
+		/* For non-system files, try reallocating space in metadata zone */
+		alloc_flags |= HFS_ALLOC_METAZONE;
+		error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
+                              alloc_flags, &newStartBlock, &newBlockCount);
+	}
+	if ((error == dskFulErr) || (error == ENOSPC)) {
+		/*
+		 * We did not find desired contiguous space for this
+		 * extent, when we asked for it, including the metazone allocations.
+		 * At this point we are not worrying about getting contiguity anymore.
+		 *
+		 * HOWEVER, if we now allow blocks to be used which were recently
+		 * de-allocated, we may find a contiguous range (though this seems
+		 * unlikely). As a result, assume that we will have to split the
+		 * current extent into two pieces, but if we are able to satisfy
+		 * the request with a single extent, detect that as well.
+		 */
+		alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
+		alloc_flags |= HFS_ALLOC_FLUSHTXN;
+        
+		error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
+                              alloc_flags, &newStartBlock, &newBlockCount);
+		if (error) {
+			printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+			goto out;
+		}
+        
+		/*
+		 * Allowing recently deleted extents may now allow us to find
+		 * a single contiguous extent in the amount & size desired.  If so,
+		 * do NOT split this extent into two pieces.  This is technically a
+		 * check for "< oldBlockCount", but we use != to highlight the point
+		 * that the special case is when they're equal. The allocator should
+		 * never vend back more blocks than were requested.
+		 */
+		if (newBlockCount != oldBlockCount) {
+			blocks_allocated = true;
+            
+			/* The number of blocks allocated is less than the requested
+			 * number of blocks.  For btree extents, check and trim the
+			 * extent to be multiple of the node size.
+			 */
+			if (extent_info->is_sysfile) {
+				node_size = get_btree_nodesize(extent_info->vp);
+				if (node_size > hfsmp->blockSize) {
+					remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
+					if (remainder_blocks) {
+						roundedBlockCount = newBlockCount - remainder_blocks;
+						/* Free tail-end blocks of the newly allocated extent */
+						BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
+                                        newBlockCount - roundedBlockCount,
+                                        HFS_ALLOC_SKIPFREEBLKS);
+						newBlockCount = roundedBlockCount;
+						if (hfs_resize_debug) {
+							printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
+						}
+						if (newBlockCount == 0) {
+							printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
+							error = ENOSPC;
+							goto out;
+						}
+					}
+				}
+			}
+            
+			/* The number of blocks allocated is less than the number of
+			 * blocks requested, so split this extent --- the first extent
+			 * will be relocated as part of this function call and the caller
+			 * will handle relocating the second extent by calling this
+			 * function again for the second extent.
+			 */
+			error = hfs_split_extent(extent_info, newBlockCount);
+			if (error) {
+				printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+				goto out;
+			}
+			oldBlockCount = newBlockCount;
+		} /* end oldBlockCount != newBlockCount */
+	} /* end allocation request for any available free space */
+    
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+		goto out;
+	}
+	blocks_allocated = true;
+    
+	/* Copy data from old location to new location */
+	error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
+                            newStartBlock, newBlockCount, context);
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
+		goto out;
+	}
+    
+	/* Update the extent record with the new start block information */
+	extent_info->extents[index].startBlock = newStartBlock;
+    
+	/* Sync the content back to the disk */
+	if (extent_info->catalog_fp) {
+		/* Update the extents in catalog record */
+		if (extent_info->is_dirlink) {
+			error = cat_update_dirlink(hfsmp, extent_info->forkType,
+                                       extent_info->dirlink_desc, extent_info->dirlink_attr,
+                                       &(extent_info->dirlink_fork->ff_data));
+		} else {
+			cp->c_flag |= C_MODIFIED;
+			/* If this is a system file, sync volume headers on disk */
+			if (extent_info->is_sysfile) {
+				error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+			}
+		}
+	} else {
+		/* Replace record for extents overflow or extents-based xattrs */
+		error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
+                                &(extent_info->btdata), extent_info->recordlen);
+	}
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
+		goto out;
+	}
+    
+	/* Deallocate the old extent */
+	error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+		goto out;
+	}
+	extent_info->blocks_relocated += newBlockCount;
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+	}
+    
+out:
+	if (error != 0) {
+		if (blocks_allocated == true) {
+			BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+		}
+	} else {
+		/* On success, increment the total allocation blocks processed */
+		extent_info->cur_blockCount += newBlockCount;
+	}
+    
+	hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
+    
+	/* For a non-system file, if an extent entry from catalog record
+	 * was modified, sync the in-memory changes to the catalog record
+	 * on disk before ending the transaction.
+	 */
+    if ((extent_info->catalog_fp) &&
+        (extent_info->is_sysfile == false)) {
+		hfs_update(extent_info->vp, 0);
+	}
+    
+	hfs_end_transaction(hfsmp);
+    
+	return error;
+}
+
+/* Report intermediate progress during volume resize */
+static void
+hfs_truncatefs_progress(struct hfsmount *hfsmp)
+{
+	u_int32_t cur_progress = 0;
+    
+	hfs_resize_progress(hfsmp, &cur_progress);
+	if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
+		printf("hfs_truncatefs: %d%% done...\n", cur_progress);
+		hfsmp->hfs_resize_progress = cur_progress;
+	}
+	return;
+}
+
+/*
+ * Reclaim space at the end of a volume for given file and forktype.
+ *
+ * This routine attempts to move any extent which contains allocation blocks
+ * at or after "allocLimit."  A separate transaction is used for every extent
+ * that needs to be moved.  If there is not contiguous space available for
+ * moving an extent, it can be split into smaller extents.  The contents of
+ * any moved extents are read and written via the volume's device vnode --
+ * NOT via "vp."  During the move, moved blocks which are part of a transaction
+ * have their physical block numbers invalidated so they will eventually be
+ * written to their new locations.
+ *
+ * This function is also called for directory hard links.  Directory hard links
+ * are regular files with no data fork and resource fork that contains alias
+ * information for backward compatibility with pre-Leopard systems.  However
+ * non-Mac OS X implementation can add/modify data fork or resource fork
+ * information to directory hard links, so we check, and if required, relocate
+ * both data fork and resource fork.
+ *
+ * Inputs:
+ *    hfsmp       The volume being resized.
+ *    vp          The vnode for the system file.
+ *    fileID	  ID of the catalog record that needs to be relocated
+ *    forktype	  The type of fork that needs relocated,
+ *    			kHFSResourceForkType for resource fork,
+ *    			kHFSDataForkType for data fork
+ *    allocLimit  Allocation limit for the new volume size,
+ *    		  do not use this block or beyond.  All extents
+ *    		  that use this block or any blocks beyond this limit
+ *    		  will be relocated.
+ *
+ * Side Effects:
+ * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
+ * blocks that were relocated.
+ */
+static int
+hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
+                 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
+{
+	int error = 0;
+	struct hfs_reclaim_extent_info *extent_info;
+	int i;
+	int lockflags = 0;
+	struct cnode *cp;
+	struct filefork *fp;
+	int took_truncate_lock = false;
+	int release_desc = false;
+	HFSPlusExtentKey *key;
+    
+	/* If there is no vnode for this file, then there's nothing to do. */
+	if (vp == NULL) {
+		return 0;
+	}
+    
+	cp = VTOC(vp);
+    
+	if (hfs_resize_debug) {
+		const char *filename = (const char *) cp->c_desc.cd_nameptr;
+		int namelen = cp->c_desc.cd_namelen;
+        
+		if (filename == NULL) {
+			filename = "";
+			namelen = 0;
+		}
+		printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
+	}
+    
+	extent_info = hfs_mallocz(sizeof(struct hfs_reclaim_extent_info));
+
+	extent_info->vp = vp;
+	extent_info->fileID = fileID;
+	extent_info->forkType = forktype;
+	extent_info->is_sysfile = vnode_issystem(vp);
+	if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
+		extent_info->is_dirlink = true;
+	}
+	/* We always need allocation bitmap and extent btree lock */
+	lockflags = SFL_BITMAP | SFL_EXTENTS;
+	if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
+		lockflags |= SFL_CATALOG;
+	} else if (fileID == kHFSAttributesFileID) {
+		lockflags |= SFL_ATTRIBUTE;
+	} else if (fileID == kHFSStartupFileID) {
+		lockflags |= SFL_STARTUP;
+	}
+	extent_info->lockflags = lockflags;
+	extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
+    
+	/* Flush data associated with current file on disk.
+	 *
+	 * If the current vnode is directory hard link, no flushing of
+	 * journal or vnode is required.  The current kernel does not
+	 * modify data/resource fork of directory hard links, so nothing
+	 * will be in the cache.  If a directory hard link is newly created,
+	 * the resource fork data is written directly using devvp and
+	 * the code that actually relocates data (hfs_copy_extent()) also
+	 * uses devvp for its I/O --- so they will see a consistent copy.
+	 */
+	if (extent_info->is_sysfile) {
+		/* If the current vnode is system vnode, flush journal
+		 * to make sure that all data is written to the disk.
+		 */
+		error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+		if (error) {
+			printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
+			goto out;
+		}
+	} else if (extent_info->is_dirlink == false) {
+		/* Flush all blocks associated with this regular file vnode.
+		 * Normally there should not be buffer cache blocks for regular
+		 * files, but for objects like symlinks, we can have buffer cache
+		 * blocks associated with the vnode.  Therefore we call
+		 * buf_flushdirtyblks() also.
+		 */
+		buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
+        
+		hfs_unlock(cp);
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		took_truncate_lock = true;
+		(void) cluster_push(vp, 0);
+		error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+		if (error) {
+			goto out;
+		}
+        
+		/* If the file no longer exists, nothing left to do */
+		if (cp->c_flag & C_NOEXISTS) {
+			error = 0;
+			goto out;
+		}
+        
+		/* Wait for any in-progress writes to this vnode to complete, so that we'll
+		 * be copying consistent bits.  (Otherwise, it's possible that an async
+		 * write will complete to the old extent after we read from it.  That
+		 * could lead to corruption.)
+		 */
+		error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
+		if (error) {
+			goto out;
+		}
+	}
+    
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
+	}
+    
+	if (extent_info->is_dirlink) {
+		extent_info->dirlink_desc = hfs_malloc(sizeof(struct cat_desc));
+		extent_info->dirlink_attr = hfs_malloc(sizeof(struct cat_attr));
+		extent_info->dirlink_fork = hfs_mallocz(sizeof(struct filefork));
+
+		/* Lookup catalog record for directory hard link and
+		 * create a fake filefork for the value looked up from
+		 * the disk.
+		 */
+		fp = extent_info->dirlink_fork;
+		extent_info->dirlink_fork->ff_cp = cp;
+		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+		error = cat_lookup_dirlink(hfsmp, fileID, forktype,
+                                   extent_info->dirlink_desc, extent_info->dirlink_attr,
+                                   &(extent_info->dirlink_fork->ff_data));
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
+			goto out;
+		}
+		release_desc = true;
+	} else {
+		fp = VTOF(vp);
+	}
+    
+	extent_info->catalog_fp = fp;
+	extent_info->recStartBlock = 0;
+	extent_info->extents = extent_info->catalog_fp->ff_extents;
+	/* Relocate extents from the catalog record */
+	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+		if (fp->ff_extents[i].blockCount == 0) {
+			break;
+		}
+		extent_info->extent_index = i;
+		error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
+		if (error) {
+			printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
+			goto out;
+		}
+	}
+    
+	/* If the number of allocation blocks processed for reclaiming
+	 * are less than total number of blocks for the file, continuing
+	 * working on overflow extents record.
+	 */
+	if (fp->ff_blocks <= extent_info->cur_blockCount) {
+		if (0 && hfs_resize_debug) {
+			printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
+		}
+		goto out;
+	}
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
+	}
+    
+	extent_info->iterator = hfs_mallocz(sizeof(struct BTreeIterator));
+	key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
+	key->keyLength = kHFSPlusExtentKeyMaximumLength;
+	key->forkType = forktype;
+	key->fileID = fileID;
+	key->startBlock = extent_info->cur_blockCount;
+    
+	extent_info->btdata.bufferAddress = extent_info->record.overflow;
+	extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
+	extent_info->btdata.itemCount = 1;
+    
+	extent_info->catalog_fp = NULL;
+    
+	/* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
+	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+	error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
+                           &(extent_info->btdata), &(extent_info->recordlen),
+                           extent_info->iterator);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	while (error == 0) {
+		extent_info->overflow_count++;
+		extent_info->recStartBlock = key->startBlock;
+		extent_info->extents = extent_info->record.overflow;
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extent_info->record.overflow[i].blockCount == 0) {
+				goto out;
+			}
+			extent_info->extent_index = i;
+			error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
+			if (error) {
+				printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
+				goto out;
+			}
+		}
+        
+		/* Look for more overflow records */
+		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+		error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
+                                extent_info->iterator, &(extent_info->btdata),
+                                &(extent_info->recordlen));
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			break;
+		}
+		/* Stop when we encounter a different file or fork. */
+		if ((key->fileID != fileID) || (key->forkType != forktype)) {
+			break;
+		}
+	}
+	if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+		error = 0;
+	}
+	
+out:
+	/* If any blocks were relocated, account them and report progress */
+	if (extent_info->blocks_relocated) {
+		hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
+		hfs_truncatefs_progress(hfsmp);
+		if (fileID < kHFSFirstUserCatalogNodeID) {
+			printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
+					extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
+		}
+	}
+	if (extent_info->iterator) {
+		hfs_free(extent_info->iterator, sizeof(*extent_info->iterator));
+	}
+	if (release_desc == true) {
+		cat_releasedesc(extent_info->dirlink_desc);
+	}
+	if (extent_info->dirlink_desc) {
+		hfs_free(extent_info->dirlink_desc, sizeof(*extent_info->dirlink_desc));
+	}
+	if (extent_info->dirlink_attr) {
+		hfs_free(extent_info->dirlink_attr, sizeof(*extent_info->dirlink_attr));
+	}
+	if (extent_info->dirlink_fork) {
+		hfs_free(extent_info->dirlink_fork, sizeof(*extent_info->dirlink_fork));
+	}
+	if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
+		hfs_update(vp, 0);
+	}
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	}
+	if (extent_info) {
+		hfs_free(extent_info, sizeof(*extent_info));
+	}
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
+	}
+    
+	return error;
+}
+
+
+/*
+ * This journal_relocate callback updates the journal info block to point
+ * at the new journal location.  This write must NOT be done using the
+ * transaction.  We must write the block immediately.  We must also force
+ * it to get to the media so that the new journal location will be seen by
+ * the replay code before we can safely let journaled blocks be written
+ * to their normal locations.
+ *
+ * The tests for journal_uses_fua below are mildly hacky.  Since the journal
+ * and the file system are both on the same device, I'm leveraging what
+ * the journal has decided about FUA.
+ */
+struct hfs_journal_relocate_args {
+	struct hfsmount *hfsmp;
+	vfs_context_t context;
+	u_int32_t newStartBlock;
+	u_int32_t newBlockCount;
+};
+
+static errno_t
+hfs_journal_relocate_callback(void *_args)
+{
+	int error;
+	struct hfs_journal_relocate_args *args = _args;
+	struct hfsmount *hfsmp = args->hfsmp;
+	buf_t bp;
+	JournalInfoBlock *jibp;
+    
+	error = buf_meta_bread(hfsmp->hfs_devvp,
+                           (uint64_t)hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
+                           hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
+	if (error) {
+		printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
+		if (bp) {
+            buf_brelse(bp);
+		}
+		return error;
+	}
+	jibp = (JournalInfoBlock*) buf_dataptr(bp);
+	jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
+	jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
+	if (journal_uses_fua(hfsmp->jnl))
+		buf_markfua(bp);
+	error = buf_bwrite(bp);
+	if (error) {
+		printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
+		return error;
+	}
+	if (!journal_uses_fua(hfsmp->jnl)) {
+		error = hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+		if (error) {
+			printf("hfs_journal_relocate_callback: hfs_flush failed (%d)\n", error);
+			error = 0;		/* Don't fail the operation. */
+		}
+	}
+    
+	return error;
+}
+
+
+/* Type of resize operation in progress */
+#define HFS_RESIZE_TRUNCATE	1
+#define HFS_RESIZE_EXTEND	2
+
+/*
+ * Core function to relocate the journal file.  This function takes the
+ * journal size of the newly relocated journal --- the caller can
+ * provide a new journal size if they want to change the size of
+ * the journal.  The function takes care of updating the journal info
+ * block and all other data structures correctly.
+ *
+ * Note: This function starts a transaction and grabs the btree locks.
+ */
+static int
+hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
+{
+	int error;
+	int journal_err;
+	int lockflags;
+	u_int32_t oldStartBlock;
+	u_int32_t newStartBlock;
+	u_int32_t oldBlockCount;
+	u_int32_t newBlockCount;
+	u_int32_t jnlBlockCount;
+	u_int32_t alloc_skipfreeblks;
+	struct cat_desc journal_desc;
+	struct cat_attr journal_attr;
+	struct cat_fork journal_fork;
+	struct hfs_journal_relocate_args callback_args;
+    
+	/* Calculate the number of allocation blocks required for the journal */
+	jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
+    
+	/*
+	 * During truncatefs(), the volume free block count is updated
+	 * before relocating data and reflects the total number of free
+	 * blocks that will exist on volume after the resize is successful.
+	 * This means that the allocation blocks required for relocation
+	 * have already been reserved and accounted for in the free block
+	 * count.  Therefore, block allocation and deallocation routines
+	 * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS
+	 * flag.
+	 *
+	 * This special handling is not required when the file system
+	 * is being extended as we want all the allocated and deallocated
+	 * blocks to be accounted for correctly.
+	 */
+	if (resize_type == HFS_RESIZE_TRUNCATE) {
+		alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
+	} else {
+		alloc_skipfreeblks = 0;
+	}
+    
+	error = hfs_start_transaction(hfsmp);
+	if (error) {
+		printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
+		return error;
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount,
+                          HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks,
+                          &newStartBlock, &newBlockCount);
+	if (error) {
+		printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
+		goto fail;
+	}
+	if (newBlockCount != jnlBlockCount) {
+		printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
+		goto free_fail;
+	}
+	
+	error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork);
+	if (error) {
+		printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
+		goto free_fail;
+	}
+    
+	oldStartBlock = journal_fork.cf_extents[0].startBlock;
+	oldBlockCount = journal_fork.cf_extents[0].blockCount;
+	error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
+	if (error) {
+		printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
+		goto free_fail;
+	}
+    
+	/* Update the catalog record for .journal */
+	journal_fork.cf_size = hfs_blk_to_bytes(newBlockCount, hfsmp->blockSize);
+	journal_fork.cf_extents[0].startBlock = newStartBlock;
+	journal_fork.cf_extents[0].blockCount = newBlockCount;
+	journal_fork.cf_blocks = newBlockCount;
+	error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
+	cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
+	if (error) {
+		printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
+		goto free_fail;
+	}
+	
+	/*
+	 * If the journal is part of the file system, then tell the journal
+	 * code about the new location.  If the journal is on an external
+	 * device, then just keep using it as-is.
+	 */
+	if (hfsmp->jvp == hfsmp->hfs_devvp) {
+		callback_args.hfsmp = hfsmp;
+		callback_args.context = context;
+		callback_args.newStartBlock = newStartBlock;
+		callback_args.newBlockCount = newBlockCount;
+        
+		error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
+                                 (off_t)newBlockCount*hfsmp->blockSize, 0,
+                                 hfs_journal_relocate_callback, &callback_args);
+		if (error) {
+			/* NOTE: journal_relocate will mark the journal invalid. */
+			printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
+			goto fail;
+		}
+		if (hfs_resize_debug) {
+			printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+		}
+		hfsmp->jnl_start = newStartBlock;
+		hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
+	}
+    
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	error = hfs_end_transaction(hfsmp);
+	if (error) {
+		printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
+	}
+    
+	return error;
+    
+free_fail:
+	journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+	if (journal_err) {
+		printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
+		hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+	}
+fail:
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	(void) hfs_end_transaction(hfsmp);
+	if (hfs_resize_debug) {
+		printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
+	}
+	return error;
+}
+
+
+/*
+ * Relocate the journal file when the file system is being truncated.
+ * We do not down-size the journal when the file system size is
+ * reduced, so we always provide the current journal size to the
+ * relocate code.
+ */
+static int
+hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+{
+	int error = 0;
+	u_int32_t startBlock;
+	u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
+    
+	/*
+	 * Figure out the location of the .journal file.  When the journal
+	 * is on an external device, we need to look up the .journal file.
+	 */
+	if (hfsmp->jvp == hfsmp->hfs_devvp) {
+		startBlock = hfsmp->jnl_start;
+		blockCount = hfsmp->jnl_size / hfsmp->blockSize;
+	} else {
+		u_int32_t fileid;
+		u_int32_t old_jnlfileid;
+		struct cat_attr attr;
+		struct cat_fork fork;
+        
+		/*
+		 * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
+		 * is set, and it is trying to hide the .journal file.  So temporarily
+		 * unset the field while calling GetFileInfo.
+		 */
+		old_jnlfileid = hfsmp->hfs_jnlfileid;
+		hfsmp->hfs_jnlfileid = 0;
+		fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
+		hfsmp->hfs_jnlfileid = old_jnlfileid;
+		if (fileid != old_jnlfileid) {
+			printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
+			return EIO;
+		}
+        
+		startBlock = fork.cf_extents[0].startBlock;
+		blockCount = fork.cf_extents[0].blockCount;
+	}
+    
+	if (startBlock + blockCount <= allocLimit) {
+		/* The journal file does not require relocation */
+		return 0;
+	}
+    
+	error = hfs_relocate_journal_file(hfsmp, hfs_blk_to_bytes(blockCount, hfsmp->blockSize),
+									  HFS_RESIZE_TRUNCATE, context);
+	if (error == 0) {
+		hfsmp->hfs_resize_blocksmoved += blockCount;
+		hfs_truncatefs_progress(hfsmp);
+		printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
+				blockCount, hfsmp->vcbVN);
+	}
+    
+	return error;
+}
+
+
+/*
+ * Move the journal info block to a new location.  We have to make sure the
+ * new copy of the journal info block gets to the media first, then change
+ * the field in the volume header and the catalog record.
+ */
+static int
+hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+{
+	int error;
+	int journal_err;
+	int lockflags;
+	u_int32_t oldBlock;
+	u_int32_t newBlock;
+	u_int32_t blockCount;
+	struct cat_desc jib_desc;
+	struct cat_attr jib_attr;
+	struct cat_fork jib_fork;
+	buf_t old_bp, new_bp;
+    
+	if (hfsmp->vcbJinfoBlock <= allocLimit) {
+		/* The journal info block does not require relocation */
+		return 0;
+	}
+	
+	error = hfs_start_transaction(hfsmp);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
+		return error;
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	error = BlockAllocate(hfsmp, 1, 1, 1,
+                          HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN,
+                          &newBlock, &blockCount);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
+		goto fail;
+	}
+	if (blockCount != 1) {
+		printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
+		goto free_fail;
+	}
+	
+	/* Copy the old journal info block content to the new location */
+	error = buf_meta_bread(hfsmp->hfs_devvp,
+                           (uint64_t)hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
+                           hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
+		if (old_bp) {
+            buf_brelse(old_bp);
+		}
+		goto free_fail;
+	}
+	new_bp = buf_getblk(hfsmp->hfs_devvp,
+                        (uint64_t)newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
+                        hfsmp->blockSize, 0, 0, BLK_META);
+	bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
+	buf_brelse(old_bp);
+	if (journal_uses_fua(hfsmp->jnl))
+		buf_markfua(new_bp);
+	error = buf_bwrite(new_bp);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
+		goto free_fail;
+	}
+	if (!journal_uses_fua(hfsmp->jnl)) {
+		error = hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+		if (error) {
+			printf("hfs_reclaim_journal_info_block: hfs_flush failed (%d)\n", error);
+			/* Don't fail the operation. */
+		}
+	}
+    
+	/* Deallocate the old block once the new one has the new valid content */
+	error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
+		goto free_fail;
+	}
+    
+	
+	/* Update the catalog record for .journal_info_block */
+	error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
+		goto fail;
+	}
+	oldBlock = jib_fork.cf_extents[0].startBlock;
+	jib_fork.cf_size = hfsmp->blockSize;
+	jib_fork.cf_extents[0].startBlock = newBlock;
+	jib_fork.cf_extents[0].blockCount = 1;
+	jib_fork.cf_blocks = 1;
+	error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
+	cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
+		goto fail;
+	}
+	
+	/* Update the pointer to the journal info block in the volume header. */
+	hfsmp->vcbJinfoBlock = newBlock;
+	error = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
+		goto fail;
+	}
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	error = hfs_end_transaction(hfsmp);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
+	}
+	error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
+	}
+    
+	/* Account for the block relocated and print progress */
+	hfsmp->hfs_resize_blocksmoved += 1;
+	hfs_truncatefs_progress(hfsmp);
+	if (!error) {
+		printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
+				hfsmp->vcbVN);
+		if (hfs_resize_debug) {
+			printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
+		}
+	}
+	return error;
+    
+free_fail:
+	journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
+	if (journal_err) {
+		printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
+		hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
+	}
+    
+fail:
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	(void) hfs_end_transaction(hfsmp);
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
+	}
+	return error;
+}
+
+
+static u_int64_t
+calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count)
+{
+	u_int64_t journal_size;
+	u_int32_t journal_scale;
+    
+#define DEFAULT_JOURNAL_SIZE (8*1024*1024)
+#define MAX_JOURNAL_SIZE     (512*1024*1024)
+    
+	/* Calculate the journal size for this volume.   We want
+	 * at least 8 MB of journal for each 100 GB of disk space.
+	 * We cap the size at 512 MB, unless the allocation block
+	 * size is larger, in which case, we use one allocation
+	 * block.
+	 */
+	journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
+	journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
+	if (journal_size > MAX_JOURNAL_SIZE) {
+		journal_size = MAX_JOURNAL_SIZE;
+	}
+	if (journal_size < hfsmp->blockSize) {
+		journal_size = hfsmp->blockSize;
+	}
+	return journal_size;
+}
+
+
+/*
+ * Calculate the expected journal size based on current partition size.
+ * If the size of the current journal is less than the calculated size,
+ * force journal relocation with the new journal size.
+ */
+static int
+hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
+{
+	int error = 0;
+	u_int64_t calc_journal_size;
+    
+	if (hfsmp->jvp != hfsmp->hfs_devvp) {
+		if (hfs_resize_debug) {
+			printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
+		}
+		return 0;
+	}
+    
+	calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
+	if (calc_journal_size <= hfsmp->jnl_size) {
+		/* The journal size requires no modification */
+		goto out;
+	}
+    
+	if (hfs_resize_debug) {
+		printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
+	}
+    
+	/* Extend the journal to the new calculated size */
+	error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
+	if (error == 0) {
+		printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n",
+				hfsmp->jnl_size, hfsmp->vcbVN);
+	}
+out:
+	return error;
+}
+
+
+/*
+ * This function traverses through all extended attribute records for a given
+ * fileID, and calls function that reclaims data blocks that exist in the
+ * area of the disk being reclaimed which in turn is responsible for allocating
+ * new space, copying extent data, deallocating new space, and if required,
+ * splitting the extent.
+ *
+ * Note: The caller has already acquired the cnode lock on the file.  Therefore
+ * we are assured that no other thread would be creating/deleting/modifying
+ * extended attributes for this file.
+ *
+ * Side Effects:
+ * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
+ * blocks that were relocated.
+ *
+ * Returns:
+ * 	0 on success, non-zero on failure.
+ */
+static int
+hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
+{
+	int error = 0;
+	struct hfs_reclaim_extent_info *extent_info;
+	int i;
+	HFSPlusAttrKey *key;
+	int *lockflags;
+    
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
+	}
+    
+	extent_info = hfs_mallocz(sizeof(struct hfs_reclaim_extent_info));
+	extent_info->vp = vp;
+	extent_info->fileID = fileID;
+	extent_info->is_xattr = true;
+	extent_info->is_sysfile = vnode_issystem(vp);
+	extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
+	lockflags = &(extent_info->lockflags);
+	*lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
+    
+	/* Initialize iterator from the extent_info structure */
+	extent_info->iterator = hfs_mallocz(sizeof(struct BTreeIterator));
+    
+	/* Build attribute key */
+	key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
+	error = hfs_buildattrkey(fileID, NULL, key);
+	if (error) {
+		goto out;
+	}
+    
+	/* Initialize btdata from extent_info structure.  Note that the
+	 * buffer pointer actually points to the xattr record from the
+	 * extent_info structure itself.
+	 */
+	extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
+	extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
+	extent_info->btdata.itemCount = 1;
+    
+	/*
+	 * Sync all extent-based attribute data to the disk.
+	 *
+	 * All extent-based attribute data I/O is performed via cluster
+	 * I/O using a virtual file that spans across entire file system
+	 * space.
+	 */
+	hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	(void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
+	error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
+	hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT);
+	if (error) {
+		goto out;
+	}
+    
+	/* Search for extended attribute for current file.  This
+	 * will place the iterator before the first matching record.
+	 */
+	*lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
+	error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
+                           &(extent_info->btdata), &(extent_info->recordlen),
+                           extent_info->iterator);
+	hfs_systemfile_unlock(hfsmp, *lockflags);
+	if (error) {
+		if (error != btNotFound) {
+			goto out;
+		}
+		/* btNotFound is expected here, so just mask it */
+		error = 0;
+	}
+    
+	while (1) {
+		/* Iterate to the next record */
+		*lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
+		error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
+                                extent_info->iterator, &(extent_info->btdata),
+                                &(extent_info->recordlen));
+		hfs_systemfile_unlock(hfsmp, *lockflags);
+        
+		/* Stop the iteration if we encounter end of btree or xattr with different fileID */
+		if (error || key->fileID != fileID) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;
+			}
+			break;
+		}
+        
+		/* We only care about extent-based EAs */
+		if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
+		    (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
+			continue;
+		}
+        
+		if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
+			extent_info->overflow_count = 0;
+			extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
+		} else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
+			extent_info->overflow_count++;
+			extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
+		}
+        
+		extent_info->recStartBlock = key->startBlock;
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extent_info->extents[i].blockCount == 0) {
+				break;
+			}
+			extent_info->extent_index = i;
+			error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
+			if (error) {
+				printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
+				goto out;
+			}
+		}
+	}
+    
+out:
+	/* If any blocks were relocated, account them and report progress */
+	if (extent_info->blocks_relocated) {
+		hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
+		hfs_truncatefs_progress(hfsmp);
+	}
+	if (extent_info->iterator) {
+		hfs_free(extent_info->iterator, sizeof(*extent_info->iterator));
+	}
+	if (extent_info) {
+		hfs_free(extent_info, sizeof(*extent_info));
+	}
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
+	}
+	return error;
+}
+
+/*
+ * Reclaim any extent-based extended attributes allocation blocks from
+ * the area of the disk that is being truncated.
+ *
+ * The function traverses the attribute btree to find out the fileIDs
+ * of the extended attributes that need to be relocated.  For every
+ * file whose large EA requires relocation, it looks up the cnode and
+ * calls hfs_reclaim_xattr() to do all the work for allocating
+ * new space, copying data, deallocating old space, and if required,
+ * splitting the extents.
+ *
+ * Inputs:
+ * 	allocLimit    - starting block of the area being reclaimed
+ *
+ * Returns:
+ *   	returns 0 on success, non-zero on failure.
+ */
+static int
+hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+{
+	int error = 0;
+	FCB *fcb;
+	struct BTreeIterator *iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	HFSPlusAttrKey *key;
+	HFSPlusAttrRecord rec;
+	int lockflags = 0;
+	cnid_t prev_fileid = 0;
+	struct vnode *vp;
+	int need_relocate;
+	int btree_operation;
+	u_int32_t files_moved = 0;
+	u_int32_t prev_blocksmoved;
+	int i;
+    
+	fcb = VTOF(hfsmp->hfs_attribute_vp);
+	/* Store the value to print total blocks moved by this function in end */
+	prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
+	
+	iterator = hfs_mallocz(sizeof(*iterator));
+	key = (HFSPlusAttrKey *)&iterator->key;
+	btdata.bufferAddress = &rec;
+	btdata.itemSize = sizeof(rec);
+	btdata.itemCount = 1;
+    
+	need_relocate = false;
+	btree_operation = kBTreeFirstRecord;
+	/* Traverse the attribute btree to find extent-based EAs to reclaim */
+	while (1) {
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+		error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;
+			}
+			break;
+		}
+		btree_operation = kBTreeNextRecord;
+        
+		/* If the extents of current fileID were already relocated, skip it */
+		if (prev_fileid == key->fileID) {
+			continue;
+		}
+        
+		/* Check if any of the extents in the current record need to be relocated */
+		need_relocate = false;
+		switch(rec.recordType) {
+			case kHFSPlusAttrForkData:
+				for (i = 0; i < kHFSPlusExtentDensity; i++) {
+					if (rec.forkData.theFork.extents[i].blockCount == 0) {
+						break;
+					}
+					if ((rec.forkData.theFork.extents[i].startBlock +
+					     rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
+						need_relocate = true;
+						break;
+					}
+				}
+				break;
+                
+			case kHFSPlusAttrExtents:
+				for (i = 0; i < kHFSPlusExtentDensity; i++) {
+					if (rec.overflowExtents.extents[i].blockCount == 0) {
+						break;
+					}
+					if ((rec.overflowExtents.extents[i].startBlock +
+					     rec.overflowExtents.extents[i].blockCount) > allocLimit) {
+						need_relocate = true;
+						break;
+					}
+				}
+				break;
+		};
+        
+		/* Continue iterating to next attribute record */
+		if (need_relocate == false) {
+			continue;
+		}
+        
+		/* Look up the vnode for corresponding file.  The cnode
+		 * will be locked which will ensure that no one modifies
+		 * the xattrs when we are relocating them.
+		 *
+		 * We want to allow open-unlinked files to be moved,
+		 * so provide allow_deleted == 1 for hfs_vget().
+		 */
+		if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
+			continue;
+		}
+        
+		error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+		if (error) {
+			printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
+			break;
+		}
+		prev_fileid = key->fileID;
+		files_moved++;
+	}
+    
+	if (files_moved) {
+		printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
+               (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
+               files_moved, hfsmp->vcbVN);
+	}
+	
+	hfs_free(iterator, sizeof(*iterator));
+	return error;
+}
+
+/*
+ * Reclaim blocks from regular files.
+ *
+ * This function iterates over all the record in catalog btree looking
+ * for files with extents that overlap into the space we're trying to
+ * free up.  If a file extent requires relocation, it looks up the vnode
+ * and calls function to relocate the data.
+ *
+ * Returns:
+ * 	Zero on success, non-zero on failure.
+ */
+static int
+hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+{
+	int error;
+	FCB *fcb;
+	struct BTreeIterator *iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	int btree_operation;
+	int lockflags;
+	struct HFSPlusCatalogFile filerec;
+	struct vnode *vp;
+	struct vnode *rvp;
+	struct filefork *datafork;
+	u_int32_t files_moved = 0;
+	u_int32_t prev_blocksmoved;
+    
+#if CONFIG_PROTECT
+	int keys_generated = 0;
+#endif
+    
+	fcb = VTOF(hfsmp->hfs_catalog_vp);
+	/* Store the value to print total blocks moved by this function at the end */
+	prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
+	
+#if CONFIG_PROTECT
+	/*
+	 * For content-protected filesystems, we may need to relocate files that
+	 * are encrypted.  If they use the new-style offset-based IVs, then
+	 * we can move them regardless of the lock state.  We create a temporary
+	 * key here that we use to read/write the data, then we discard it at the
+	 * end of the function.
+	 */
+	if (cp_fs_protected (hfsmp->hfs_mp)) {
+		error = cpx_gentempkeys(&hfsmp->hfs_resize_cpx, hfsmp);
+		if (error == 0) {
+			keys_generated = 1;
+		}
+
+		if (error) {
+			printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
+			goto reclaim_filespace_done;
+		}
+	}
+    
+#endif
+    
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	btdata.bufferAddress = &filerec;
+	btdata.itemSize = sizeof(filerec);
+	btdata.itemCount = 1;
+    
+	btree_operation = kBTreeFirstRecord;
+	while (1) {
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;
+			}
+			break;
+		}
+		btree_operation = kBTreeNextRecord;
+        
+		if (filerec.recordType != kHFSPlusFileRecord) {
+			continue;
+		}
+        
+		/* Check if any of the extents require relocation */
+		bool overlaps;
+		error = hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec, &overlaps);
+		if (error)
+			break;
+
+		if (!overlaps)
+			continue;
+
+		/* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
+		if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
+			if (hfs_resize_debug) {
+				printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
+			}
+			continue;
+		}
+        
+		/* If data fork exists or item is a directory hard link, relocate blocks */
+		datafork = VTOF(vp);
+		if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
+			error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
+                                     kHFSDataForkType, allocLimit, context);
+			if (error)  {
+				printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
+				hfs_unlock(VTOC(vp));
+				vnode_put(vp);
+				break;
+			}
+		}
+        
+		/* If resource fork exists or item is a directory hard link, relocate blocks */
+		if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
+			if (vnode_isdir(vp)) {
+				/* Resource fork vnode lookup is invalid for directory hard link.
+				 * So we fake data fork vnode as resource fork vnode.
+				 */
+				rvp = vp;
+			} else {
+				error = hfs_vgetrsrc(hfsmp, vp, &rvp);
+				if (error) {
+					printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
+					hfs_unlock(VTOC(vp));
+					vnode_put(vp);
+					break;
+				}
+				VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
+			}
+            
+			error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
+                                     kHFSResourceForkType, allocLimit, context);
+			if (error) {
+				printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
+				hfs_unlock(VTOC(vp));
+				vnode_put(vp);
+				break;
+			}
+		}
+        
+		/* The file forks were relocated successfully, now drop the
+		 * cnode lock and vnode reference, and continue iterating to
+		 * next catalog record.
+		 */
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+		files_moved++;
+	}
+    
+	if (files_moved) {
+		printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
+               (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
+               files_moved, hfsmp->vcbVN);
+	}
+	
+#if CONFIG_PROTECT
+reclaim_filespace_done:
+
+	if (keys_generated) {
+		cpx_free(hfsmp->hfs_resize_cpx);
+		hfsmp->hfs_resize_cpx = NULL;
+	}
+#endif
+
+	hfs_free(iterator, sizeof(*iterator));
+
+	return error;
+}
+
+/*
+ * Reclaim space at the end of a file system.
+ *
+ * Inputs -
+ * 	allocLimit 	- start block of the space being reclaimed
+ * 	reclaimblks 	- number of allocation blocks to reclaim
+ */
+static int
+hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
+{
+	int error = 0;
+    
+	/*
+	 * Preflight the bitmap to find out total number of blocks that need
+	 * relocation.
+	 *
+	 * Note: Since allocLimit is set to the location of new alternate volume
+	 * header, the check below does not account for blocks allocated for old
+	 * alternate volume header.
+	 */
+	error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
+	if (error) {
+		printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
+		return error;
+	}
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
+	}
+    
+	/* Just to be safe, sync the content of the journal to the disk before we proceed */
+	hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+    
+	/* First, relocate journal file blocks if they're in the way.
+	 * Doing this first will make sure that journal relocate code
+	 * gets access to contiguous blocks on disk first.  The journal
+	 * file has to be contiguous on the disk, otherwise resize will
+	 * fail.
+	 */
+	error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
+		return error;
+	}
+	
+	/* Relocate journal info block blocks if they're in the way. */
+	error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
+		return error;
+	}
+    
+	/* Relocate extents of the Extents B-tree if they're in the way.
+	 * Relocating extents btree before other btrees is important as
+	 * this will provide access to largest contiguous block range on
+	 * the disk for relocating extents btree.  Note that extents btree
+	 * can only have maximum of 8 extents.
+	 */
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
+                             kHFSDataForkType, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
+		return error;
+	}
+    
+	/* Relocate extents of the Allocation file if they're in the way. */
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
+                             kHFSDataForkType, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
+		return error;
+	}
+    
+	/* Relocate extents of the Catalog B-tree if they're in the way. */
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
+                             kHFSDataForkType, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
+		return error;
+	}
+    
+	/* Relocate extents of the Attributes B-tree if they're in the way. */
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
+                             kHFSDataForkType, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
+		return error;
+	}
+    
+	/* Relocate extents of the Startup File if there is one and they're in the way. */
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
+                             kHFSDataForkType, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
+		return error;
+	}
+	
+	/*
+	 * We need to make sure the alternate volume header gets flushed if we moved
+	 * any extents in the volume header.  But we need to do that before
+	 * shrinking the size of the volume, or else the journal code will panic
+	 * with an invalid (too large) block number.
+	 *
+	 * Note that blks_moved will be set if ANY extent was moved, even
+	 * if it was just an overflow extent.  In this case, the journal_flush isn't
+	 * strictly required, but shouldn't hurt.
+	 */
+	if (hfsmp->hfs_resize_blocksmoved) {
+		hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+	}
+    
+	/* Reclaim extents from catalog file records */
+	error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
+	if (error) {
+		printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
+		return error;
+	}
+    
+	/* Reclaim extents from extent-based extended attributes, if any */
+	error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
+	if (error) {
+		printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
+		return error;
+	}
+
+	/*
+	 * Make sure reserved ranges in the region we're to allocate don't
+	 * overlap.
+	 */
+	struct rl_entry *range;
+again:;
+	int lockf = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_SHARED_LOCK);
+	TAILQ_FOREACH(range, &hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS], rl_link) {
+		if (rl_overlap(range, hfsmp->allocLimit, RL_INFINITY) != RL_NOOVERLAP) {
+			// Wait 100ms
+			hfs_systemfile_unlock(hfsmp, lockf);
+			msleep(hfs_reclaimspace, NULL, PINOD, "waiting on reserved blocks",
+				   &(struct timespec){ 0, 100 * 1000000 });
+			goto again;
+		}
+	}
+	hfs_systemfile_unlock(hfsmp, lockf);
+
+	return error;
+}
+
+
+/*
+ * Check if there are any extents (including overflow extents) that overlap
+ * into the disk space that is being reclaimed.
+ *
+ * Output -
+ * 	true  - One of the extents need to be relocated
+ * 	false - No overflow extents need to be relocated, or there was an error
+ */
+static errno_t
+hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit,
+						 struct HFSPlusCatalogFile *filerec, bool *overlaps)
+{
+	struct BTreeIterator * iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	HFSPlusExtentRecord extrec;
+	HFSPlusExtentKey *extkeyptr;
+	FCB *fcb;
+	int i, j;
+	int error;
+	int lockflags = 0;
+	u_int32_t endblock;
+	errno_t ret = 0;
+
+	/* Check if data fork overlaps the target space */
+	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+		if (filerec->dataFork.extents[i].blockCount == 0) {
+			break;
+		}
+		endblock = filerec->dataFork.extents[i].startBlock +
+        filerec->dataFork.extents[i].blockCount;
+		if (endblock > allocLimit) {
+			*overlaps = true;
+			goto out;
+		}
+	}
+    
+	/* Check if resource fork overlaps the target space */
+	for (j = 0; j < kHFSPlusExtentDensity; ++j) {
+		if (filerec->resourceFork.extents[j].blockCount == 0) {
+			break;
+		}
+		endblock = filerec->resourceFork.extents[j].startBlock +
+        filerec->resourceFork.extents[j].blockCount;
+		if (endblock > allocLimit) {
+			*overlaps = true;
+			goto out;
+		}
+	}
+    
+	/* Return back if there are no overflow extents for this file */
+	if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
+		*overlaps = false;
+		goto out;
+	}
+    
+	iterator = hfs_malloc(sizeof(*iterator));
+
+	bzero(iterator, sizeof(*iterator));
+	extkeyptr = (HFSPlusExtentKey *)&iterator->key;
+	extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
+	extkeyptr->forkType = 0;
+	extkeyptr->fileID = filerec->fileID;
+	extkeyptr->startBlock = 0;
+    
+	btdata.bufferAddress = &extrec;
+	btdata.itemSize = sizeof(extrec);
+	btdata.itemCount = 1;
+	
+	fcb = VTOF(hfsmp->hfs_extents_vp);
+    
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+    
+	/* This will position the iterator just before the first overflow 
+	 * extent record for given fileID.  It will always return btNotFound, 
+	 * so we special case the error code.
+	 */
+	error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
+	if (error && (error != btNotFound)) {
+		ret = MacToVFSError(error);
+		goto out;
+	}
+
+	/* BTIterateRecord() might return error if the btree is empty, and 
+	 * therefore we return that the extent does not overflow to the caller
+	 */
+	error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
+	while (error == 0) {
+		/* Stop when we encounter a different file. */
+		if (extkeyptr->fileID != filerec->fileID) {
+			break;
+		}
+		/* Check if any of the forks exist in the target space. */
+		for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+			if (extrec[i].blockCount == 0) {
+				break;
+			}
+			endblock = extrec[i].startBlock + extrec[i].blockCount;
+			if (endblock > allocLimit) {
+				*overlaps = true;
+				goto out;
+			}
+		}
+		/* Look for more records. */
+		error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
+	}
+
+	if (error && error != btNotFound) {
+		ret = MacToVFSError(error);
+		goto out;
+	}
+
+	*overlaps = false;
+
+out:
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+
+	hfs_free(iterator, sizeof(*iterator));
+
+	return ret;
+}
+
+
+/*
+ * Calculate the progress of a file system resize operation.
+ */
+int
+hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
+{
+	if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
+		return (ENXIO);
+	}
+    
+	if (hfsmp->hfs_resize_totalblocks > 0) {
+		*progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
+	} else {
+		*progress = 0;
+	}
+    
+	return (0);
+}
diff --git a/core/hfs_search.c b/core/hfs_search.c
new file mode 100644
index 0000000..45aee7b
--- /dev/null
+++ b/core/hfs_search.c
@@ -0,0 +1,1395 @@
+/*
+ * Copyright (c) 1997-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ *
+ *	@(#)hfs_search.c
+ */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <mach/machine/vm_types.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/signalvar.h>
+#include <sys/attr.h>
+#include <sys/utfconv.h>
+#include <sys/kauth.h>
+#include <sys/vm.h>
+
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+#include "hfs.h"
+#include "hfs_dbg.h"
+#include "hfs_catalog.h"
+#include "hfs_attrlist.h"
+#include "hfs_endian.h"
+
+#include "FileMgrInternal.h"
+#include "HFSUnicodeWrappers.h"
+#include "BTreesPrivate.h"
+#include "BTreeScanner.h"
+#include "CatalogPrivate.h"
+
+#if CONFIG_SEARCHFS
+
+/* Search criterea. */
+struct directoryInfoSpec
+{
+	u_int32_t   numFiles;
+};
+
+struct fileInfoSpec
+{
+	off_t		dataLogicalLength;
+	off_t		dataPhysicalLength;
+	off_t		resourceLogicalLength;
+	off_t		resourcePhysicalLength;
+};
+
+struct searchinfospec
+{
+	u_char			name[kHFSPlusMaxFileNameBytes];
+	u_int32_t			nameLength;
+	char			attributes;		// see IM:Files 2-100
+	u_int32_t			nodeID;
+	u_int32_t			parentDirID;
+	struct timespec		creationDate;		
+	struct timespec		modificationDate;		
+	struct timespec		changeDate;	
+	struct timespec		accessDate;		
+	struct timespec		lastBackupDate;	
+	u_int8_t		finderInfo[32];
+	uid_t			uid;	
+	gid_t			gid;
+	mode_t			mask;
+	struct fileInfoSpec	f;
+	struct directoryInfoSpec d;
+};
+typedef struct searchinfospec searchinfospec_t;
+
+static void ResolveHardlink(struct hfsmount *hfsmp, HFSPlusCatalogFile *recp);
+
+
+static int UnpackSearchAttributeBlock(struct hfsmount *hfsmp, struct attrlist *alist,
+		searchinfospec_t *searchInfo, void *attributeBuffer, int firstblock);
+
+static int CheckCriteria(	ExtendedVCB *vcb, 
+							u_long searchBits,
+							struct attrlist *attrList, 
+							CatalogRecord *rec,
+							CatalogKey *key, 
+							searchinfospec_t *searchInfo1,
+							searchinfospec_t *searchInfo2,
+							struct vfs_context *ctx);
+
+static int CheckAccess(ExtendedVCB *vcb, u_long searchBits, CatalogKey *key, struct vfs_context *ctx);
+
+static int InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec,
+			CatalogKey *key, struct attrlist *returnAttrList,
+			void *attributesBuffer, void *variableBuffer,
+			uint32_t * nummatches );
+
+static Boolean CompareRange(u_long val, u_long low, u_long high);
+static Boolean CompareWideRange(u_int64_t val, u_int64_t low, u_int64_t high);
+
+static Boolean CompareRange( u_long val, u_long low, u_long high )
+{
+	return( (val >= low) && (val <= high) );
+}
+
+static Boolean CompareWideRange( u_int64_t val, u_int64_t low, u_int64_t high )
+{
+	return( (val >= low) && (val <= high) );
+}
+//#define CompareRange(val, low, high)	((val >= low) && (val <= high))
+			
+
+/************************************************************************/
+/* Entry for searchfs()                                                 */
+/************************************************************************/
+
+#define	errSearchBufferFull	101	/* Internal search errors */
+/*
+#
+#% searchfs	vp	L L L
+#
+vnop_searchfs {
+    IN struct vnode *vp;
+    IN off_t length;
+    IN int flags;
+    IN kauth_cred_t cred;
+    IN struct proc *p;
+};
+*/
+
+int
+hfs_vnop_search(ap)
+	struct vnop_searchfs_args *ap; /*
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		void *a_searchparams1;
+		void *a_searchparams2;
+		struct attrlist *a_searchattrs;
+		u_long a_maxmatches;
+		struct timeval *a_timelimit;
+		struct attrlist *a_returnattrs;
+		u_long *a_nummatches;
+		u_long a_scriptcode;
+		u_long a_options;
+		struct uio *a_uio;
+		struct searchstate *a_searchstate;
+		vfs_context_t a_context;
+	*/
+{
+	ExtendedVCB *vcb = VTOVCB(ap->a_vp);
+	struct hfsmount *hfsmp;
+	FCB * catalogFCB;
+	searchinfospec_t searchInfo1;
+	searchinfospec_t searchInfo2;
+	void *attributesBuffer = NULL;
+	void *variableBuffer;
+	u_int32_t fixedBlockSize;
+	u_int32_t eachReturnBufferSize;
+	struct proc *p = current_proc();
+	int err = E_NONE;
+	int isHFSPlus;
+	CatalogKey * myCurrentKeyPtr;
+	CatalogRecord * myCurrentDataPtr;
+	CatPosition * myCatPositionPtr;
+	BTScanState myBTScanState;
+	user_addr_t user_start = 0;
+	user_size_t user_len = 0;
+	int32_t searchTime;
+	int lockflags;
+	boolean_t timerExpired = FALSE;
+
+	/* XXX Parameter check a_searchattrs? */
+
+	*(ap->a_nummatches) = 0;
+
+	if (ap->a_options & ~SRCHFS_VALIDOPTIONSMASK) {
+		return (EINVAL);
+	}
+
+	/*
+	 * Fail requests for attributes that HFS does not support for the
+	 * items that match the search criteria.  Note that these checks
+	 * are for the OUTBOUND attributes to be returned (not search criteria).
+	 */
+	if ((ap->a_returnattrs->commonattr & ~HFS_ATTR_CMN_VALID) ||
+	    (ap->a_returnattrs->volattr != 0) ||
+	    (ap->a_returnattrs->dirattr & ~HFS_ATTR_DIR_VALID) ||
+	    (ap->a_returnattrs->fileattr & ~HFS_ATTR_FILE_VALID) ||
+	    (ap->a_returnattrs->forkattr != 0)) {
+
+		return (EINVAL);
+	}
+
+	/* SRCHFS_SKIPLINKS requires root access.
+	 * This option cannot be used with either
+	 * the ATTR_CMN_NAME or ATTR_CMN_PAROBJID
+	 * attributes.
+	 */
+	if (ap->a_options & SRCHFS_SKIPLINKS) {
+		attrgroup_t attrs;
+
+		attrs = ap->a_searchattrs->commonattr | ap->a_returnattrs->commonattr;
+		if (attrs & (ATTR_CMN_NAME | ATTR_CMN_PAROBJID)) {
+			return (EINVAL);
+		}
+
+		if ((err = vfs_context_suser(ap->a_context))) {
+			return (err);
+		}
+	}
+
+	// If both 32-bit and 64-bit parent ids or file ids are given	   
+	// then return an error.
+
+	attrgroup_t test_attrs=ap->a_searchattrs->commonattr;
+
+	if (((test_attrs & ATTR_CMN_OBJID) && (test_attrs & ATTR_CMN_FILEID)) ||
+			((test_attrs & ATTR_CMN_PARENTID) && (test_attrs & ATTR_CMN_PAROBJID))) {
+		return (EINVAL);
+	}
+
+	if (uio_resid(ap->a_uio) <= 0) {
+		return (EINVAL);
+	}
+
+	isHFSPlus = (vcb->vcbSigWord == kHFSPlusSigWord);
+	hfsmp = VTOHFS(ap->a_vp);
+	
+	searchTime = kMaxMicroSecsInKernel;
+	if (ap->a_timelimit->tv_sec == 0 &&
+	    ap->a_timelimit->tv_usec > 0 &&
+	    ap->a_timelimit->tv_usec < kMaxMicroSecsInKernel) {
+		searchTime = ap->a_timelimit->tv_usec;
+	}
+
+	/* UnPack the search boundries, searchInfo1, searchInfo2 */
+	err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs,
+				&searchInfo1, ap->a_searchparams1, 1);
+	if (err) { 
+		return err;
+	}
+	err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs,
+				&searchInfo2, ap->a_searchparams2, 0);
+	if (err) {
+		return err;
+	}
+	//shadow search bits if 64-bit file/parent ids are used	
+	if (ap->a_searchattrs->commonattr & ATTR_CMN_FILEID) 
+		ap->a_searchattrs->commonattr |= ATTR_CMN_OBJID;
+	if (ap->a_searchattrs->commonattr & ATTR_CMN_PARENTID) 
+		ap->a_searchattrs->commonattr |= ATTR_CMN_PAROBJID;
+
+	fixedBlockSize = sizeof(u_int32_t) + hfs_attrblksize(ap->a_returnattrs);	/* u_int32_t for length word */
+
+	eachReturnBufferSize = fixedBlockSize;
+
+	if ( ap->a_returnattrs->commonattr & ATTR_CMN_NAME )	/* XXX should be more robust! */
+		eachReturnBufferSize += kHFSPlusMaxFileNameBytes + 1;
+
+	attributesBuffer = hfs_mallocz(eachReturnBufferSize);
+	variableBuffer = (void*)((char*) attributesBuffer + fixedBlockSize);
+
+	// XXXdbg - have to lock the user's buffer so we don't fault
+	// while holding the shared catalog file lock.  see the comment
+	// in hfs_readdir() for more details.
+	//
+	if (hfsmp->jnl && uio_isuserspace(ap->a_uio)) {
+		user_start = uio_curriovbase(ap->a_uio);
+		user_len = uio_curriovlen(ap->a_uio);
+
+		if ((err = vslock(user_start, user_len)) != 0) {
+			user_start = 0;
+			goto ExitThisRoutine;
+		}
+	}
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	catalogFCB = GetFileControlBlock(vcb->catalogRefNum);
+	myCurrentKeyPtr = NULL;
+	myCurrentDataPtr = NULL;
+	myCatPositionPtr = (CatPosition *)ap->a_searchstate;
+
+	if (ap->a_options & SRCHFS_START) {
+		/* Starting a new search. */
+		/* Make sure the on-disk Catalog file is current */
+		(void) hfs_fsync(vcb->catalogRefNum, MNT_WAIT, 0, p);
+		if (hfsmp->jnl) {
+		    hfs_systemfile_unlock(hfsmp, lockflags);
+		    hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
+		    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		}
+
+		ap->a_options &= ~SRCHFS_START;
+		bzero((caddr_t)myCatPositionPtr, sizeof(*myCatPositionPtr));
+		err = BTScanInitialize(catalogFCB, 0, 0, 0, kCatSearchBufferSize, &myBTScanState);
+		if (err) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			goto ExitThisRoutine;
+		}
+	} else {
+		/* Resuming a search. */
+		err = BTScanInitialize(catalogFCB, myCatPositionPtr->nextNode, 
+					myCatPositionPtr->nextRecord, 
+					myCatPositionPtr->recordsFound,
+					kCatSearchBufferSize, 
+					&myBTScanState);
+		/* Make sure Catalog hasn't changed. */
+		if (err == 0
+		&&  myCatPositionPtr->writeCount != myBTScanState.btcb->writeCount) {
+			myCatPositionPtr->writeCount = myBTScanState.btcb->writeCount;
+			err = EBUSY; /* catChangedErr */
+		}
+	}
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (err)
+		goto ExitThisRoutine;
+
+	/*
+	 * Check all the catalog btree records...
+	 *   return the attributes for matching items
+	 */
+	for (;;) {
+		struct timeval myCurrentTime;
+		struct timeval myElapsedTime;
+		
+		err = BTScanNextRecord(&myBTScanState, timerExpired, 
+			(void **)&myCurrentKeyPtr, (void **)&myCurrentDataPtr, 
+			NULL);
+		if (err)
+			break;
+
+		/* Resolve any hardlinks */
+		if (isHFSPlus && (ap->a_options & SRCHFS_SKIPLINKS) == 0) {
+			ResolveHardlink(vcb, (HFSPlusCatalogFile *)myCurrentDataPtr);
+		}
+		if (CheckCriteria( vcb, ap->a_options, ap->a_searchattrs, myCurrentDataPtr,
+				myCurrentKeyPtr, &searchInfo1, &searchInfo2, ap->a_context )
+		&&  CheckAccess(vcb, ap->a_options, myCurrentKeyPtr, ap->a_context)) {
+			err = InsertMatch(hfsmp, ap->a_uio, myCurrentDataPtr, 
+					myCurrentKeyPtr, ap->a_returnattrs,
+					attributesBuffer, variableBuffer, ap->a_nummatches);
+			if (err) {
+				/*
+				 * The last match didn't fit so come back
+				 * to this record on the next trip.
+				 */
+				--myBTScanState.recordsFound;
+				--myBTScanState.recordNum;
+				break;
+			}
+
+			if (*(ap->a_nummatches) >= ap->a_maxmatches)
+				break;
+		}
+		if (timerExpired == FALSE) {
+			/*
+			 * Check our elapsed time and bail if we've hit the max.
+			 * The idea here is to throttle the amount of time we
+			 * spend in the kernel.
+			 */
+			microuptime(&myCurrentTime);
+			timersub(&myCurrentTime, &myBTScanState.startTime, &myElapsedTime);
+			/*
+			 * Note: assumes kMaxMicroSecsInKernel is less than 1,000,000
+			 */
+			if (myElapsedTime.tv_sec > 0
+			    ||  myElapsedTime.tv_usec >= searchTime) {
+				timerExpired = TRUE;
+			} else if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
+				timerExpired = TRUE;
+		}
+	}
+
+	/* Update catalog position */
+	myCatPositionPtr->writeCount = myBTScanState.btcb->writeCount;
+
+	BTScanTerminate(&myBTScanState, &myCatPositionPtr->nextNode, 
+			&myCatPositionPtr->nextRecord, 
+			&myCatPositionPtr->recordsFound);
+
+	if ( err == E_NONE ) {
+		err = EAGAIN;	/* signal to the user to call searchfs again */
+	} else if ( err == errSearchBufferFull ) {
+		if ( *(ap->a_nummatches) > 0 )
+			err = EAGAIN;
+ 		else
+			err = ENOBUFS;
+	} else if ( err == btNotFound ) {
+		err = E_NONE;	/* the entire disk has been searched */
+	} else if ( err == fsBTTimeOutErr ) {
+		err = EAGAIN;
+	}
+
+ExitThisRoutine:
+	if (attributesBuffer)
+		hfs_free(attributesBuffer, eachReturnBufferSize);
+
+	if (user_start) {
+		vsunlock(user_start, user_len, TRUE);
+	}
+
+	return (MacToVFSError(err));
+}
+
+
+static void
+ResolveHardlink(struct hfsmount *hfsmp, HFSPlusCatalogFile *recp)
+{
+	u_int32_t type, creator;
+	int isdirlink = 0;
+	int isfilelink = 0;
+	time_t filecreatedate;
+ 
+	if (recp->recordType != kHFSPlusFileRecord) {
+		return;
+	}
+	type = SWAP_BE32(recp->userInfo.fdType);
+	creator = SWAP_BE32(recp->userInfo.fdCreator);
+	filecreatedate = to_bsd_time(recp->createDate);
+
+	if ((type == kHardLinkFileType && creator == kHFSPlusCreator) &&
+	    (filecreatedate == (time_t)hfsmp->hfs_itime ||
+	     filecreatedate == (time_t)hfsmp->hfs_metadata_createdate)) {
+		isfilelink = 1;
+	} else if ((type == kHFSAliasType && creator == kHFSAliasCreator) &&
+	           (recp->flags & kHFSHasLinkChainMask) &&
+	           (filecreatedate == (time_t)hfsmp->hfs_itime ||
+	            filecreatedate == (time_t)hfsmp->hfs_metadata_createdate)) {
+		isdirlink = 1;
+	}
+
+	if (isfilelink || isdirlink) {
+		cnid_t saved_cnid;
+		int lockflags;
+
+		/* Export link's cnid (a unique value) instead of inode's cnid */
+		saved_cnid = recp->fileID;
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+		(void) cat_resolvelink(hfsmp, recp->hl_linkReference, isdirlink, recp);
+
+		recp->fileID = saved_cnid;
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+}
+
+
+static Boolean
+CompareMasked(const u_int32_t *thisValue, const u_int32_t *compareData,
+		const u_int32_t *compareMask, u_int32_t count)
+{
+	Boolean	matched;
+	u_int32_t	i;
+	
+	matched = true;		/* Assume it will all match */
+	
+	for (i=0; i<count; i++) {
+		if (((*thisValue++ ^ *compareData++) & *compareMask++) != 0) {
+			matched = false;
+			break;
+		}
+	}
+	
+	return matched;
+}
+
+
+static Boolean
+ComparePartialUnicodeName (register ConstUniCharArrayPtr str, register ItemCount s_len,
+			   register ConstUniCharArrayPtr find, register ItemCount f_len, int caseSensitive )
+{
+	if (f_len == 0 || s_len == 0) {
+		return FALSE;
+	}
+
+	if (caseSensitive) {
+		do {
+			if (s_len-- < f_len)
+				return FALSE;
+		} while (UnicodeBinaryCompare(str++, f_len, find, f_len) != 0);
+	}
+	else {
+		do {
+			if (s_len-- < f_len)
+				return FALSE;
+		} while (FastUnicodeCompare(str++, f_len, find, f_len) != 0);
+	}
+
+	return TRUE;
+}
+
+#if CONFIG_HFS_STD
+static Boolean
+ComparePartialPascalName ( register ConstStr31Param str, register ConstStr31Param find )
+{
+	register u_char s_len = str[0];
+	register u_char f_len = find[0];
+	register u_char *tsp;
+	Str31 tmpstr;
+
+	if (f_len == 0 || s_len == 0)
+		return FALSE;
+
+	bcopy(str, tmpstr, s_len + 1);
+	tsp = &tmpstr[0];
+
+	while (s_len-- >= f_len) {
+		*tsp = f_len;
+
+		if (FastRelString(tsp++, find) == 0)
+			return TRUE;
+	}
+
+	return FALSE;
+}
+#endif
+
+
+/*
+ * Check to see if caller has access rights to this item
+ */
+
+static int
+CheckAccess(ExtendedVCB *theVCBPtr, u_long searchBits, CatalogKey *theKeyPtr, struct vfs_context *ctx)
+{
+	Boolean				isHFSPlus;
+	int					myErr;
+	int					myResult; 	
+	HFSCatalogNodeID 	myNodeID;
+	hfsmount_t *		hfsmp;
+	struct FndrDirInfo	*finfop;
+	struct vnode * 		vp = NULL;
+
+	myResult = 0;	/* default to "no access" */
+		
+	if (!vfs_context_suser(ctx))  {
+		myResult = 1;	/* allow access */
+		goto ExitThisRoutine; /* root always has access */
+	}
+
+	hfsmp = VCBTOHFS( theVCBPtr );
+	isHFSPlus = ( theVCBPtr->vcbSigWord == kHFSPlusSigWord );
+	if ( isHFSPlus )
+		myNodeID = theKeyPtr->hfsPlus.parentID;
+#if CONFIG_HFS_STD
+	else
+		myNodeID = theKeyPtr->hfs.parentID;
+#endif
+	
+	while ( myNodeID >= kRootDirID ) {
+		cnode_t *	cp;
+		
+		/* now go get catalog data for this directory */
+		myErr = hfs_vget(hfsmp, myNodeID, &vp, 0, 0);
+		if ( myErr ) {
+			goto ExitThisRoutine;	/* no access */
+		}
+
+		cp = VTOC(vp);
+		finfop = (struct FndrDirInfo *)&cp->c_attr.ca_finderinfo[0];
+
+		if ( searchBits & SRCHFS_SKIPPACKAGES ) {
+		    if (   (SWAP_BE16(finfop->frFlags) & kHasBundle)
+			|| (cp->c_desc.cd_nameptr != NULL
+			    && is_package_name((const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen)) ) {
+				myResult = 0;
+				goto ExitThisRoutine;
+		    }
+		}
+
+		if ( searchBits & SRCHFS_SKIPINAPPROPRIATE ) {
+		    if ( cp->c_parentcnid == kRootDirID && cp->c_desc.cd_nameptr != NULL &&
+			     vn_searchfs_inappropriate_name((const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) ) {
+				myResult = 0;
+				goto ExitThisRoutine;
+		    }
+		}
+
+		if ( (searchBits & SRCHFS_SKIPINVISIBLE) && 
+			 (SWAP_BE16(finfop->frFlags) & kIsInvisible) ) {
+		    myResult = 0;
+		    goto ExitThisRoutine;
+		}
+
+		myNodeID = cp->c_parentcnid;	/* move up the hierarchy */
+		hfs_unlock(VTOC(vp));
+
+#if CONFIG_MACF
+		if (vp->v_type == VDIR) {
+			myErr = mac_vnode_check_readdir(ctx, vp);
+		} else {
+			myErr = mac_vnode_check_stat(ctx, NOCRED, vp);
+		}
+		if (myErr) {
+			vnode_put(vp);
+			vp = NULL;
+			goto ExitThisRoutine;
+		}
+#endif /* MAC */
+
+		if (vnode_vtype(vp) == VDIR) {
+		    myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), ctx);
+		} else {
+		    myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH), ctx);
+		}
+		vnode_put(vp);
+		vp = NULL;
+		if ( myErr ) {
+			goto ExitThisRoutine;	/* no access */
+		}
+	}
+	myResult = 1;	/* allow access */
+
+ExitThisRoutine:
+	if ( vp != NULL ) {
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+	}
+	return ( myResult );
+	
+}
+
+static int
+CheckCriteria(	ExtendedVCB *vcb, 
+				u_long searchBits,
+				struct attrlist *attrList, 
+				CatalogRecord *rec, 
+				CatalogKey *key,
+				searchinfospec_t  *searchInfo1, 
+				searchinfospec_t *searchInfo2,
+				struct vfs_context *ctx)
+{
+	Boolean matched, atleastone;
+	Boolean isHFSPlus;
+	attrgroup_t searchAttributes;
+	struct cat_attr c_attr;
+	struct cat_fork datafork;
+	struct cat_fork rsrcfork;
+	int force_case_sensitivity = proc_is_forcing_hfs_case_sensitivity(vfs_context_proc(ctx));
+	
+	bzero(&c_attr, sizeof(c_attr));
+	isHFSPlus = (vcb->vcbSigWord == kHFSPlusSigWord);
+
+	switch (rec->recordType) {
+
+#if CONFIG_HFS_STD
+		case kHFSFolderRecord:
+			if ( (searchBits & SRCHFS_MATCHDIRS) == 0 ) {	/* If we are NOT searching folders */
+				matched = false;
+				goto TestDone;
+			}
+			break;
+
+		case kHFSFileRecord:
+			if ( (searchBits & SRCHFS_MATCHFILES) == 0 ) {	/* If we are NOT searching files */
+				matched = false;
+				goto TestDone;
+			}
+			break;
+#endif
+
+		case kHFSPlusFolderRecord:
+			if ( (searchBits & SRCHFS_MATCHDIRS) == 0 ) {	/* If we are NOT searching folders */
+				matched = false;
+				goto TestDone;
+			}
+			break;
+
+		case kHFSPlusFileRecord:
+			/* Check if hardlink links should be skipped. */
+			if (searchBits & SRCHFS_SKIPLINKS) {
+				cnid_t parid = key->hfsPlus.parentID;
+				HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)rec;
+
+				if ((SWAP_BE32(filep->userInfo.fdType) == kHardLinkFileType) &&
+						(SWAP_BE32(filep->userInfo.fdCreator) == kHFSPlusCreator)) {
+					return (false);	/* skip over file link records */
+				} else if ((parid == vcb->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
+						(filep->bsdInfo.special.linkCount == 0)) {
+					return (false);	/* skip over unlinked files */
+				} else if ((SWAP_BE32(filep->userInfo.fdType) == kHFSAliasType) &&
+						(SWAP_BE32(filep->userInfo.fdCreator) == kHFSAliasCreator) &&
+						(filep->flags & kHFSHasLinkChainMask)) {
+					return (false);	/* skip over dir link records */
+				}
+			} else if (key->hfsPlus.parentID == vcb->hfs_private_desc[FILE_HARDLINKS].cd_cnid) {
+				return (false);	/* skip over private files */
+			} else if (key->hfsPlus.parentID == vcb->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+				return (false);	/* skip over private files */
+			}
+
+			if ( (searchBits & SRCHFS_MATCHFILES) == 0 ) {	/* If we are NOT searching files */
+				matched = false;
+				goto TestDone;
+			}
+			break;
+
+		default:	/* Never match a thread record or any other type. */
+			return( false );	/* Not a file or folder record, so can't search it */
+	}
+	
+	matched = true;		/* Assume we got a match */
+	atleastone = false;	/* Dont insert unless we match at least one criteria */
+	
+	/* First, attempt to match the name -- either partial or complete */
+	if ( attrList->commonattr & ATTR_CMN_NAME ) {
+		if (isHFSPlus) {
+			int case_sensitive = 0;
+
+			/*
+			 * Longstanding default behavior here is to use a non-case-sensitive 
+			 * search, even on case-sensitive filesystems. 
+			 * 
+			 * We only force case sensitivity if the controlling process has explicitly
+			 * asked for it in the proc flags, and only if they are not doing
+			 * a partial name match.  Consider that if you are doing a partial
+			 * name match ("all files that begin with 'image'"), the likelihood is 
+			 * high that you would want to see all matches, even those that do not
+			 * explicitly match the case.
+			 */
+			if (force_case_sensitivity) {
+				case_sensitive = 1;
+			}
+
+			/* Check for partial/full HFS Plus name match */
+
+			if ( searchBits & SRCHFS_MATCHPARTIALNAMES ) {
+				/* always use a case-INSENSITIVE search here */
+				matched = ComparePartialUnicodeName(key->hfsPlus.nodeName.unicode,
+								    key->hfsPlus.nodeName.length,
+								    (UniChar*)searchInfo1->name,
+								    searchInfo1->nameLength, 0);
+			} 
+			else {
+				/* Full name match.  Are we HFSX (case sensitive) or HFS+ ? */
+				if (case_sensitive) {
+					matched = (UnicodeBinaryCompare(key->hfsPlus.nodeName.unicode,
+								key->hfsPlus.nodeName.length,
+								(UniChar*)searchInfo1->name,
+								searchInfo1->nameLength ) == 0);
+				}
+				else {
+					matched = (FastUnicodeCompare(key->hfsPlus.nodeName.unicode,
+								key->hfsPlus.nodeName.length,
+								(UniChar*)searchInfo1->name,
+								searchInfo1->nameLength ) == 0);
+				}
+			}
+		}
+#if CONFIG_HFS_STD
+		else {
+			/* Check for partial/full HFS name match */
+
+			if ( searchBits & SRCHFS_MATCHPARTIALNAMES )
+				matched = ComparePartialPascalName(key->hfs.nodeName, (u_char*)searchInfo1->name);
+			else /* full HFS name match */
+				matched = (FastRelString(key->hfs.nodeName, (u_char*)searchInfo1->name) == 0);
+		}
+#endif
+
+		if ( matched == false || (searchBits & ~SRCHFS_MATCHPARTIALNAMES) == 0 )
+			goto TestDone;	/* no match, or nothing more to compare */
+
+		atleastone = true;
+	}
+
+	/* Convert catalog record into cat_attr format. */
+	cat_convertattr(VCBTOHFS(vcb), rec, &c_attr, &datafork, &rsrcfork);
+	
+	if (searchBits & SRCHFS_SKIPINVISIBLE) {
+	    int flags;
+	    
+	    switch (rec->recordType) {
+#if CONFIG_HFS_STD
+			case kHFSFolderRecord:
+				{
+					struct FndrDirInfo *finder_info;
+
+					finder_info = (struct FndrDirInfo *)&c_attr.ca_finderinfo[0];
+					flags = SWAP_BE16(finder_info->frFlags);
+					break;
+				}
+
+			case kHFSFileRecord:
+				{
+					struct FndrFileInfo *finder_info;
+
+					finder_info = (struct FndrFileInfo *)&c_attr.ca_finderinfo[0];
+					flags = SWAP_BE16(finder_info->fdFlags);
+					break;
+				}
+#endif
+
+			case kHFSPlusFolderRecord: 
+				{
+					struct FndrDirInfo *finder_info;
+
+					finder_info = (struct FndrDirInfo *)&c_attr.ca_finderinfo[0];
+					flags = SWAP_BE16(finder_info->frFlags);
+					break;
+				}
+
+			case kHFSPlusFileRecord: 
+				{
+					struct FndrFileInfo *finder_info;
+
+					finder_info = (struct FndrFileInfo *)&c_attr.ca_finderinfo[0];
+					flags = SWAP_BE16(finder_info->fdFlags);
+					break;
+				}
+
+			default: 
+				{
+					flags = kIsInvisible;
+					break;
+				}
+		}
+
+		if (flags & kIsInvisible) {
+			matched = false;
+			goto TestDone;
+		}
+	}
+	
+		    
+
+	/* Now that we have a record worth searching, see if it matches the search attributes */
+#if CONFIG_HFS_STD
+	if (rec->recordType == kHFSFileRecord ||
+	    rec->recordType == kHFSPlusFileRecord) {
+#else
+	if (rec->recordType == kHFSPlusFileRecord) {
+#endif
+
+		if ((attrList->fileattr & ~ATTR_FILE_VALIDMASK) != 0) {	/* attr we do know about  */
+			matched = false;
+			goto TestDone;
+		}
+		else if ((attrList->fileattr & ATTR_FILE_VALIDMASK) != 0) {
+		searchAttributes = attrList->fileattr;
+
+#if HFS_COMPRESSION
+			if ( c_attr.ca_flags & UF_COMPRESSED ) {
+				/* for compressed files, set the data length to the uncompressed data size */
+				if (( searchAttributes & ATTR_FILE_DATALENGTH ) || 
+					( searchAttributes & ATTR_FILE_DATAALLOCSIZE ) ) {
+					if ( 0 == hfs_uncompressed_size_of_compressed_file(vcb, NULL, c_attr.ca_fileid, &datafork.cf_size, 1) ) { /* 1 == don't take the cnode lock */
+						datafork.cf_blocks = rsrcfork.cf_blocks;
+					}	
+				}
+			/* treat compressed files as if their resource fork is empty */
+				if (( searchAttributes & ATTR_FILE_RSRCLENGTH ) || 
+					( searchAttributes & ATTR_FILE_RSRCALLOCSIZE ) ) {
+					rsrcfork.cf_size = 0;
+					rsrcfork.cf_blocks = 0;
+				}
+			}
+#endif /* HFS_COMPRESSION */
+			
+		/* File logical length (data fork) */
+		if ( searchAttributes & ATTR_FILE_DATALENGTH ) {
+			matched = CompareWideRange(
+			    datafork.cf_size,
+			    searchInfo1->f.dataLogicalLength,
+			    searchInfo2->f.dataLogicalLength);
+			if (matched == false) goto TestDone;
+				atleastone = true;
+		}
+	
+		/* File physical length (data fork) */
+		if ( searchAttributes & ATTR_FILE_DATAALLOCSIZE ) {
+			matched = CompareWideRange(
+			    (u_int64_t)datafork.cf_blocks * (u_int64_t)vcb->blockSize,
+			    searchInfo1->f.dataPhysicalLength,
+			    searchInfo2->f.dataPhysicalLength);
+			if (matched == false) goto TestDone;
+				atleastone = true;
+		}
+
+		/* File logical length (resource fork) */
+		if ( searchAttributes & ATTR_FILE_RSRCLENGTH ) {
+			matched = CompareWideRange(
+			    rsrcfork.cf_size,
+			    searchInfo1->f.resourceLogicalLength,
+			    searchInfo2->f.resourceLogicalLength);
+			if (matched == false) goto TestDone;
+				atleastone = true;
+		}
+		
+		/* File physical length (resource fork) */
+		if ( searchAttributes & ATTR_FILE_RSRCALLOCSIZE ) {
+			matched = CompareWideRange(
+			    (u_int64_t)rsrcfork.cf_blocks * (u_int64_t)vcb->blockSize,
+			    searchInfo1->f.resourcePhysicalLength,
+			    searchInfo2->f.resourcePhysicalLength);
+			if (matched == false) goto TestDone;
+				atleastone = true;
+			}
+		}
+		else {
+			atleastone = true;	/* to match SRCHFS_MATCHFILES */
+		}
+	}
+	/*
+	 * Check the directory attributes
+	 */
+#if CONFIG_HFS_STD
+	else if (rec->recordType == kHFSFolderRecord ||
+	         rec->recordType == kHFSPlusFolderRecord) {
+#else
+	else if (rec->recordType == kHFSPlusFolderRecord) {
+#endif
+		if ((attrList->dirattr & ~ATTR_DIR_VALIDMASK) != 0) {	/* attr we do know about  */
+			matched = false;
+			goto TestDone;
+		}
+		else if ((attrList->dirattr & ATTR_DIR_VALIDMASK) != 0) {
+		searchAttributes = attrList->dirattr;
+		
+		/* Directory valence */
+		if ( searchAttributes & ATTR_DIR_ENTRYCOUNT ) {
+			matched = CompareRange(c_attr.ca_entries,
+					searchInfo1->d.numFiles,
+					searchInfo2->d.numFiles );
+			if (matched == false) goto TestDone;
+				atleastone = true;
+			}
+		}
+		else {
+			atleastone = true;		/* to match SRCHFS_MATCHDIRS */
+		}
+	}
+	
+	/*
+	 * Check the common attributes
+	 */
+	searchAttributes = attrList->commonattr;
+	if ( (searchAttributes & ATTR_CMN_VALIDMASK) != 0 ) {
+		/* node ID */
+		if ( searchAttributes & ATTR_CMN_OBJID ) {
+			matched = CompareRange(c_attr.ca_fileid,
+					searchInfo1->nodeID,
+					searchInfo2->nodeID );
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+
+		/* Parent ID */
+		if ( searchAttributes & ATTR_CMN_PAROBJID ) {
+			HFSCatalogNodeID parentID;
+			
+			if (isHFSPlus)
+				parentID = key->hfsPlus.parentID;
+#if CONFIG_HFS_STD
+			else
+				parentID = key->hfs.parentID;
+#endif
+				
+			matched = CompareRange(parentID, searchInfo1->parentDirID,
+					searchInfo2->parentDirID );
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+
+		/* Finder Info & Extended Finder Info where extFinderInfo is last 32 bytes */
+		if ( searchAttributes & ATTR_CMN_FNDRINFO ) {
+			u_int32_t *thisValue;
+			thisValue = (u_int32_t *) &c_attr.ca_finderinfo;
+
+			/* 
+			 * Note: ioFlFndrInfo and ioDrUsrWds have the same offset in search info, so
+			 * no need to test the object type here.
+			 */
+			matched = CompareMasked(thisValue,
+					(u_int32_t *)&searchInfo1->finderInfo,
+					(u_int32_t *) &searchInfo2->finderInfo, 8);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+
+		/* Create date */
+		if ( searchAttributes & ATTR_CMN_CRTIME ) {
+			matched = CompareRange(c_attr.ca_itime,
+					searchInfo1->creationDate.tv_sec,
+					searchInfo2->creationDate.tv_sec);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+	
+		/* Mod date */
+		if ( searchAttributes & ATTR_CMN_MODTIME ) {
+			matched = CompareRange(c_attr.ca_mtime,
+					searchInfo1->modificationDate.tv_sec,
+					searchInfo2->modificationDate.tv_sec);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+	
+		/* Change Time */
+		if ( searchAttributes & ATTR_CMN_CHGTIME ) {
+			matched = CompareRange(c_attr.ca_ctime,
+					searchInfo1->changeDate.tv_sec,
+					searchInfo2->changeDate.tv_sec);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+	
+		/* Access date */
+		if ( searchAttributes & ATTR_CMN_ACCTIME ) {
+			matched = CompareRange(c_attr.ca_atime,
+					searchInfo1->accessDate.tv_sec,
+					searchInfo2->accessDate.tv_sec);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+
+		/* Backup date */
+		if ( searchAttributes & ATTR_CMN_BKUPTIME ) {
+			matched = CompareRange(c_attr.ca_btime,
+					searchInfo1->lastBackupDate.tv_sec,
+					searchInfo2->lastBackupDate.tv_sec);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+	
+		/* User ID */
+		if ( searchAttributes & ATTR_CMN_OWNERID ) {
+			matched = CompareRange(c_attr.ca_uid,
+					searchInfo1->uid, searchInfo2->uid);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+
+		/* Group ID */
+		if ( searchAttributes & ATTR_CMN_GRPID ) {
+			matched = CompareRange(c_attr.ca_gid,
+					searchInfo1->gid, searchInfo2->gid);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+
+		/* mode */
+		if ( searchAttributes & ATTR_CMN_ACCESSMASK ) {
+			matched = CompareRange((u_int32_t)c_attr.ca_mode, 
+					(u_int32_t)searchInfo1->mask,
+					(u_int32_t)searchInfo2->mask);
+			if (matched == false) goto TestDone;
+			atleastone = true;
+		}
+	}
+
+	/* If we got here w/o matching any, then set to false */
+	if (! atleastone)
+		matched = false;
+	
+TestDone:
+	/*
+	 * Finally, determine whether we need to negate the sense of the match
+	 * (i.e. find all objects that DON'T match).
+	 */
+	if ( searchBits & SRCHFS_NEGATEPARAMS )
+		matched = !matched;
+	
+	return( matched );
+}
+
+
+/*
+ * Adds another record to the packed array for output
+ */
+static int
+InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec,
+            CatalogKey *key, struct attrlist *returnAttrList,
+            void *attributesBuffer, void *variableBuffer, uint32_t * nummatches)
+{
+	int err;
+	void *rovingAttributesBuffer;
+	void *rovingVariableBuffer;
+	long packedBufferSize;
+	struct attrblock attrblk;
+	struct cat_desc c_desc;
+	struct cat_attr c_attr;
+	struct cat_fork datafork;
+	struct cat_fork rsrcfork;
+
+	bzero(&c_desc, sizeof(c_desc));
+	bzero(&c_attr, sizeof(c_attr));
+	rovingAttributesBuffer = (char*)attributesBuffer + sizeof(u_int32_t); /* Reserve space for length field */
+	rovingVariableBuffer = variableBuffer;
+
+	/* Convert catalog record into cat_attr format. */
+	cat_convertattr(hfsmp, rec, &c_attr, &datafork, &rsrcfork);
+
+	/* Hide our private meta data directories */
+	if (c_attr.ca_fileid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+	    c_attr.ca_fileid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+		err = 0;
+		goto exit;
+	}
+
+	/* Hide the private journal files */
+	if (hfsmp->jnl &&
+	    ((c_attr.ca_fileid == hfsmp->hfs_jnlfileid) ||
+	     (c_attr.ca_fileid == hfsmp->hfs_jnlinfoblkid))) {
+		err = 0;
+		goto exit;
+	}
+
+	if (returnAttrList->commonattr & ATTR_CMN_NAME) {
+		err = cat_convertkey(hfsmp, key, rec, &c_desc);
+		if (err) {
+			/* This means that we probably had a CNID error */
+			goto exit;
+		}
+	} else {
+		c_desc.cd_cnid = c_attr.ca_fileid;
+		if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) 
+			c_desc.cd_parentcnid = key->hfsPlus.parentID;
+#if CONFIG_HFS_STD
+		else
+			c_desc.cd_parentcnid = key->hfs.parentID;
+#endif
+
+	}
+
+	attrblk.ab_attrlist = returnAttrList;
+	attrblk.ab_attrbufpp = &rovingAttributesBuffer;
+	attrblk.ab_varbufpp = &rovingVariableBuffer;
+	attrblk.ab_flags = 0;
+	attrblk.ab_blocksize = 0;
+	attrblk.ab_context = vfs_context_current();
+
+	hfs_packattrblk(&attrblk, hfsmp, NULL, &c_desc, &c_attr, &datafork, &rsrcfork, vfs_context_current());
+
+	packedBufferSize = (char*)rovingVariableBuffer - (char*)attributesBuffer;
+
+	if ( packedBufferSize > uio_resid(a_uio) )
+		return( errSearchBufferFull );
+
+   	(* nummatches)++;
+	
+	*((u_int32_t *)attributesBuffer) = packedBufferSize;	/* Store length of fixed + var block */
+	
+	err = uiomove( (caddr_t)attributesBuffer, packedBufferSize, a_uio );
+exit:
+	cat_releasedesc(&c_desc);
+	
+	return( err );
+}
+
+
+static int
+UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist	*alist, 
+		searchinfospec_t *searchInfo, void *attributeBuffer, int firstblock)
+{
+	attrgroup_t		a;
+	u_int32_t			bufferSize;
+	boolean_t       is_64_bit;
+
+    hfs_assert(searchInfo != NULL);
+    
+    is_64_bit = proc_is64bit(current_proc());
+
+    bufferSize = *((u_int32_t *)attributeBuffer);
+	if (bufferSize == 0)
+		return (EINVAL);	/* XXX -DJB is a buffer size of zero ever valid for searchfs? */
+
+	attributeBuffer = (u_int32_t *)attributeBuffer + 1; /* advance past the size */
+	
+	/* 
+	 * UnPack common attributes
+	 */
+	a = alist->commonattr;
+	if ( a != 0 ) {
+		if ( a & ATTR_CMN_NAME ) {
+			if (firstblock) {
+				/* Only use the attrreference_t for the first searchparams */
+				char *s;
+				u_int32_t len;
+
+				s = (char*) attributeBuffer + ((attrreference_t *) attributeBuffer)->attr_dataoffset;
+				len = ((attrreference_t *) attributeBuffer)->attr_length;
+
+				if (len > sizeof(searchInfo->name))
+					return (EINVAL);
+
+
+				if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
+					size_t ucslen;
+					/* Convert name to Unicode to match HFS Plus B-Tree names */
+
+					if (len > 0) {
+						if (utf8_decodestr((u_int8_t *)s, len-1, (UniChar*)searchInfo->name, &ucslen,
+									sizeof(searchInfo->name), ':', UTF_DECOMPOSED | UTF_ESCAPE_ILLEGAL))
+							return (EINVAL);
+
+						searchInfo->nameLength = ucslen / sizeof(UniChar);
+					} else {
+						searchInfo->nameLength = 0;
+					}
+				}
+#if CONFIG_HFS_STD
+				else {
+					/* Convert name to pascal string to match HFS (Standard) B-Tree names */
+
+					if (len > 0) {
+						if (utf8_to_hfs(HFSTOVCB(hfsmp), len-1, (u_char *)s, (u_char*)searchInfo->name) != 0)
+							return (EINVAL);
+
+						searchInfo->nameLength = searchInfo->name[0];
+					} else {
+						searchInfo->name[0] = searchInfo->nameLength = 0;
+					}
+				}
+#endif	
+			}
+			attributeBuffer = (attrreference_t*) attributeBuffer +1;
+		}
+		if ( a & ATTR_CMN_OBJID ) {
+			searchInfo->nodeID = ((fsobj_id_t *) attributeBuffer)->fid_objno;	/* ignore fid_generation */
+			attributeBuffer = (fsobj_id_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_CMN_PAROBJID ) {
+			searchInfo->parentDirID = ((fsobj_id_t *) attributeBuffer)->fid_objno;  /* ignore fid_generation */
+			attributeBuffer = (fsobj_id_t *)attributeBuffer + 1;
+		}
+
+		if ( a & ATTR_CMN_CRTIME ) {
+            if (is_64_bit) {
+                struct user64_timespec tmp;
+                tmp = *((struct user64_timespec *)attributeBuffer);
+                searchInfo->creationDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->creationDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user64_timespec *)attributeBuffer + 1;
+            }
+            else {
+                struct user32_timespec tmp;
+                tmp = *((struct user32_timespec *)attributeBuffer);
+                searchInfo->creationDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->creationDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user32_timespec *)attributeBuffer + 1;
+            }
+		}
+		if ( a & ATTR_CMN_MODTIME ) {
+            if (is_64_bit) {
+                struct user64_timespec tmp;
+                tmp = *((struct user64_timespec *)attributeBuffer);
+                searchInfo->modificationDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->modificationDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user64_timespec *)attributeBuffer + 1;
+            }
+            else {
+                struct user32_timespec tmp;
+                tmp = *((struct user32_timespec *)attributeBuffer);
+                searchInfo->modificationDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->modificationDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user32_timespec *)attributeBuffer + 1;
+            }
+		}
+		if ( a & ATTR_CMN_CHGTIME ) {
+            if (is_64_bit) {
+                struct user64_timespec tmp;
+                tmp = *((struct user64_timespec *)attributeBuffer);
+                searchInfo->changeDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->changeDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user64_timespec *)attributeBuffer + 1;
+            }
+            else {
+                struct user32_timespec tmp;
+                tmp = *((struct user32_timespec *)attributeBuffer);
+                searchInfo->changeDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->changeDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user32_timespec *)attributeBuffer + 1;
+            }
+		}
+		if ( a & ATTR_CMN_ACCTIME ) {
+            if (is_64_bit) {
+                struct user64_timespec tmp;
+                tmp = *((struct user64_timespec *)attributeBuffer);
+                searchInfo->accessDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->accessDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user64_timespec *)attributeBuffer + 1;
+            }
+            else {
+                struct user32_timespec tmp;
+                tmp = *((struct user32_timespec *)attributeBuffer);
+                searchInfo->accessDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->accessDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user32_timespec *)attributeBuffer + 1;
+            }
+		}
+		if ( a & ATTR_CMN_BKUPTIME ) {
+            if (is_64_bit) {
+                struct user64_timespec tmp;
+                tmp = *((struct user64_timespec *)attributeBuffer);
+                searchInfo->lastBackupDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->lastBackupDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user64_timespec *)attributeBuffer + 1;
+            }
+            else {
+                struct user32_timespec tmp;
+                tmp = *((struct user32_timespec *)attributeBuffer);
+                searchInfo->lastBackupDate.tv_sec = (time_t)tmp.tv_sec;
+                searchInfo->lastBackupDate.tv_nsec = tmp.tv_nsec;
+				attributeBuffer = (struct user32_timespec *)attributeBuffer + 1;
+            }
+		}
+		if ( a & ATTR_CMN_FNDRINFO ) {
+			bcopy( attributeBuffer, searchInfo->finderInfo, sizeof(searchInfo->finderInfo) );
+			attributeBuffer = (u_int8_t *)attributeBuffer + 32;
+		}
+		if ( a & ATTR_CMN_OWNERID ) {
+			searchInfo->uid = *((uid_t *)attributeBuffer);
+			attributeBuffer = (uid_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_CMN_GRPID ) {
+			searchInfo->gid = *((gid_t *)attributeBuffer);
+			attributeBuffer = (gid_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_CMN_ACCESSMASK ) {
+			searchInfo->mask = *((mode_t *)attributeBuffer);
+			attributeBuffer = (mode_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_CMN_FILEID ) {
+			searchInfo->nodeID = (u_int32_t)*((u_int64_t *) attributeBuffer);	
+			attributeBuffer = (u_int64_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_CMN_PARENTID ) {
+			searchInfo->parentDirID = (u_int32_t)*((u_int64_t *) attributeBuffer);	
+			attributeBuffer = (u_int64_t *)attributeBuffer + 1;
+		}
+	}
+
+	a = alist->dirattr;
+	if ( a != 0 ) {
+		if ( a & ATTR_DIR_ENTRYCOUNT ) {
+			searchInfo->d.numFiles = *((u_int32_t *)attributeBuffer);
+			attributeBuffer = (u_int32_t *)attributeBuffer + 1;
+		}
+	}
+
+	a = alist->fileattr;
+	if ( a != 0 ) {
+		if ( a & ATTR_FILE_DATALENGTH ) {
+			searchInfo->f.dataLogicalLength = *((off_t *)attributeBuffer);
+			attributeBuffer = (off_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_FILE_DATAALLOCSIZE ) {
+			searchInfo->f.dataPhysicalLength = *((off_t *)attributeBuffer);
+			attributeBuffer = (off_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_FILE_RSRCLENGTH ) {
+			searchInfo->f.resourceLogicalLength = *((off_t *)attributeBuffer);
+			attributeBuffer = (off_t *)attributeBuffer + 1;
+		}
+		if ( a & ATTR_FILE_RSRCALLOCSIZE ) {
+			searchInfo->f.resourcePhysicalLength = *((off_t *)attributeBuffer);
+			attributeBuffer = (off_t *)attributeBuffer + 1;
+		}
+	}
+
+	return (0);
+}
+#endif	/* CONFIG_SEARCHFS */
diff --git a/core/hfs_unistr.h b/core/hfs_unistr.h
new file mode 100644
index 0000000..5b300a2
--- /dev/null
+++ b/core/hfs_unistr.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __HFS_UNISTR__
+#define __HFS_UNISTR__
+
+#include <sys/types.h>
+
+/* 
+ * hfs_unitstr.h
+ *
+ * This file contains definition of the unicode string used for HFS Plus 
+ * files and folder names, as described by the on-disk format.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifndef _HFSUNISTR255_DEFINED_
+#define _HFSUNISTR255_DEFINED_
+/* Unicode strings are used for HFS Plus file and folder names */
+struct HFSUniStr255 {
+	u_int16_t	length;		/* number of unicode characters */
+	u_int16_t	unicode[255];	/* unicode characters */
+} __attribute__((aligned(2), packed));
+typedef struct HFSUniStr255 HFSUniStr255;
+typedef const HFSUniStr255 *ConstHFSUniStr255Param;
+#endif /* _HFSUNISTR255_DEFINED_ */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* __HFS_UNISTR__ */
diff --git a/core/hfs_vfsops.c b/core/hfs_vfsops.c
new file mode 100644
index 0000000..6ceab54
--- /dev/null
+++ b/core/hfs_vfsops.c
@@ -0,0 +1,4751 @@
+/*
+ * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      hfs_vfsops.c
+ *  derived from	@(#)ufs_vfsops.c	8.8 (Berkeley) 5/20/95
+ *
+ *      (c) Copyright 1997-2002 Apple Inc. All rights reserved.
+ *
+ *      hfs_vfsops.c -- VFS layer for loadable HFS file system.
+ *
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kauth.h>
+
+#include <sys/ubc.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+#include <sys/quota.h>
+#include <sys/disk.h>
+#include <sys/paths.h>
+#include <sys/utfconv.h>
+#include <sys/kdebug.h>
+#include <sys/fslog.h>
+#include <sys/ubc.h>
+#include <libkern/OSKextLib.h>
+#include <libkern/OSAtomic.h>
+
+/* for parsing boot-args */
+#include <pexpert/pexpert.h>
+
+
+#include <kern/locks.h>
+
+#include "hfs_journal.h"
+
+#include <miscfs/specfs/specdev.h>
+#include "hfs_mount.h"
+
+#include <libkern/crypto/md5.h>
+#include <uuid/uuid.h>
+
+#include "hfs_iokit.h"
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+#include "hfs_dbg.h"
+#include "hfs_endian.h"
+#include "hfs_hotfiles.h"
+#include "hfs_quota.h"
+#include "hfs_btreeio.h"
+#include "hfs_kdebug.h"
+#include "hfs_cprotect.h"
+
+#include "FileMgrInternal.h"
+#include "BTreesInternal.h"
+
+#define HFS_MOUNT_DEBUG 1
+
+/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
+extern int hfs_resize_debug;
+
+lck_grp_attr_t *  hfs_group_attr;
+lck_attr_t *  hfs_lock_attr;
+lck_grp_t *  hfs_mutex_group;
+lck_grp_t *  hfs_rwlock_group;
+lck_grp_t *  hfs_spinlock_group;
+
+// variables to manage HFS kext retain count -- only supported on Macs
+#if	TARGET_OS_OSX
+int hfs_active_mounts = 0;
+#endif
+
+extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
+
+#if CONFIG_HFS_STD
+extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
+static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
+#endif
+
+/* not static so we can re-use in hfs_readwrite.c for build_path calls */
+int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
+
+static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
+static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
+static int hfs_flushfiles(struct mount *, int, struct proc *);
+static int hfs_init(struct vfsconf *vfsp);
+static void hfs_locks_destroy(struct hfsmount *hfsmp);
+static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
+static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
+static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
+static void hfs_syncer_free(struct hfsmount *hfsmp);
+
+void hfs_initialize_allocator (struct hfsmount *hfsmp);
+int hfs_teardown_allocator (struct hfsmount *hfsmp);
+
+int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
+int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
+int hfs_reload(struct mount *mp);
+int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
+int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
+int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
+                      user_addr_t newp, size_t newlen, vfs_context_t context);
+int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
+
+static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
+
+#if HFS_LEAK_DEBUG
+#include <IOKit/IOLib.h>
+#endif
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+
+int
+hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
+{
+
+#if HFS_LEAK_DEBUG
+
+#warning HFS_LEAK_DEBUG is on
+
+	hfs_alloc_trace_enable();
+
+#endif
+
+	struct proc *p = vfs_context_proc(context);
+	struct hfsmount *hfsmp = NULL;
+	struct hfs_mount_args args;
+	int retval = E_NONE;
+	u_int32_t cmdflags;
+
+	if (data && (retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mount: copyin returned %d for fs\n", retval);
+		}
+		return (retval);
+	}
+	cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
+	if (cmdflags & MNT_UPDATE) {
+		hfs_assert(data);
+
+		hfsmp = VFSTOHFS(mp);
+
+		/* Reload incore data after an fsck. */
+		if (cmdflags & MNT_RELOAD) {
+			if (vfs_isrdonly(mp)) {
+				int error = hfs_reload(mp);
+				if (error && HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
+				}
+				return error;
+			}
+			else {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
+				}
+				return (EINVAL);
+			}
+		}
+
+		/* Change to a read-only file system. */
+		if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
+		    vfs_isrdonly(mp)) {
+			int flags;
+
+			/* Set flag to indicate that a downgrade to read-only
+			 * is in progress and therefore block any further 
+			 * modifications to the file system.
+			 */
+			hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+			hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
+			hfsmp->hfs_downgrading_thread = current_thread();
+			hfs_unlock_global (hfsmp);
+			hfs_syncer_free(hfsmp);
+            
+			/* use hfs_sync to push out System (btree) files */
+			retval = hfs_sync(mp, MNT_WAIT, context);
+			if (retval && ((cmdflags & MNT_FORCE) == 0)) {
+				hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+				hfsmp->hfs_downgrading_thread = NULL;
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
+				}
+				goto out;
+			}
+		
+			flags = WRITECLOSE;
+			if (cmdflags & MNT_FORCE)
+				flags |= FORCECLOSE;
+				
+			if ((retval = hfs_flushfiles(mp, flags, p))) {
+				hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+				hfsmp->hfs_downgrading_thread = NULL;
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
+				}
+				goto out;
+			}
+
+			/* mark the volume cleanly unmounted */
+			hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
+			retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+			hfsmp->hfs_flags |= HFS_READ_ONLY;
+
+			/*
+			 * Close down the journal. 
+			 *
+			 * NOTE: It is critically important to close down the journal
+			 * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
+			 * In a journaled environment it is expected that the journal be
+			 * the only actor permitted to issue I/O for metadata blocks in HFS.
+			 * If we were to call VNOP_FSYNC prior to closing down the journal,
+			 * we would inadvertantly issue (and wait for) the I/O we just 
+			 * initiated above as part of the flushvolumeheader call.
+			 * 
+			 * To avoid this, we follow the same order of operations as in
+			 * unmount and issue the journal_close prior to calling VNOP_FSYNC.
+			 */
+	
+			if (hfsmp->jnl) {
+				hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+			    journal_close(hfsmp->jnl);
+			    hfsmp->jnl = NULL;
+
+			    // Note: we explicitly don't want to shutdown
+			    //       access to the jvp because we may need
+			    //       it later if we go back to being read-write.
+
+				hfs_unlock_global (hfsmp);
+
+                vfs_clearflags(hfsmp->hfs_mp, MNT_JOURNALED);
+			}
+
+			/*
+			 * Write out any pending I/O still outstanding against the device node
+			 * now that the journal has been closed.
+			 */
+			if (retval == 0) {
+				vnode_get(hfsmp->hfs_devvp);
+				retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
+				vnode_put(hfsmp->hfs_devvp);
+			}
+
+			if (retval) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
+				}
+				hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+				hfsmp->hfs_downgrading_thread = NULL;
+				hfsmp->hfs_flags &= ~HFS_READ_ONLY;
+				goto out;
+			}
+		
+			if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+				if (hfsmp->hfs_summary_table) {
+					int err = 0;
+					/* 
+					 * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress 
+					 */
+					if (hfsmp->hfs_allocation_vp) {
+						err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+					}
+					hfs_free(hfsmp->hfs_summary_table, hfsmp->hfs_summary_bytes);
+					hfsmp->hfs_summary_table = NULL;
+					hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
+					if (err == 0 && hfsmp->hfs_allocation_vp){
+						hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
+					}
+				}
+			}
+
+			hfsmp->hfs_downgrading_thread = NULL;
+		}
+
+		/* Change to a writable file system. */
+		if (vfs_iswriteupgrade(mp)) {
+			/*
+			 * On inconsistent disks, do not allow read-write mount
+			 * unless it is the boot volume being mounted.
+			 */
+			if (!(vfs_flags(mp) & MNT_ROOTFS) &&
+					(hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n",  (hfsmp->vcbVN));
+				}
+				retval = EINVAL;
+				goto out;
+			}
+
+			// If the journal was shut-down previously because we were
+			// asked to be read-only, let's start it back up again now
+			
+			if (   (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
+			    && hfsmp->jnl == NULL
+			    && hfsmp->jvp != NULL) {
+			    int jflags;
+
+			    if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
+					jflags = JOURNAL_RESET;
+				} else {
+					jflags = 0;
+				}
+
+				hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+				/* We provide the mount point twice here: The first is used as
+				 * an opaque argument to be passed back when hfs_sync_metadata
+				 * is called.  The second is provided to the throttling code to
+				 * indicate which mount's device should be used when accounting
+				 * for metadata writes.
+				 */
+				hfsmp->jnl = journal_open(hfsmp->jvp,
+						hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
+						hfsmp->jnl_size,
+						hfsmp->hfs_devvp,
+						hfsmp->hfs_logical_block_size,
+						jflags,
+						0,
+						hfs_sync_metadata, hfsmp->hfs_mp,
+						hfsmp->hfs_mp);
+				
+				/*
+				 * Set up the trim callback function so that we can add
+				 * recently freed extents to the free extent cache once
+				 * the transaction that freed them is written to the
+				 * journal on disk.
+				 */
+				if (hfsmp->jnl)
+					journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+				
+				hfs_unlock_global (hfsmp);
+
+				if (hfsmp->jnl == NULL) {
+					if (HFS_MOUNT_DEBUG) {
+						printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
+					}
+					retval = EINVAL;
+					goto out;
+				} else {
+					hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
+                    vfs_setflags(hfsmp->hfs_mp, MNT_JOURNALED);
+				}
+			}
+
+			/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
+			retval = hfs_erase_unused_nodes(hfsmp);
+			if (retval != E_NONE) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
+				}
+				goto out;
+			}
+
+			/* If this mount point was downgraded from read-write 
+			 * to read-only, clear that information as we are now 
+			 * moving back to read-write.
+			 */
+			hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
+			hfsmp->hfs_downgrading_thread = NULL;
+
+			/* mark the volume dirty (clear clean unmount bit) */
+			hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
+
+			retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+			if (retval != E_NONE) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
+				}
+				goto out;
+			}
+		
+			/* Only clear HFS_READ_ONLY after a successful write */
+			hfsmp->hfs_flags &= ~HFS_READ_ONLY;
+
+
+			if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
+				/* Setup private/hidden directories for hardlinks. */
+				hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
+				hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
+
+				hfs_remove_orphans(hfsmp);
+
+				/*
+				 * Since we're upgrading to a read-write mount, allow
+				 * hot file clustering if conditions allow.
+				 *
+				 * Note: this normally only would happen if you booted
+				 *       single-user and upgraded the mount to read-write
+				 *
+				 * Note: at this point we are not allowed to fail the
+				 *       mount operation because the HotFile init code
+				 *       in hfs_recording_init() will lookup vnodes with
+				 *       VNOP_LOOKUP() which hangs vnodes off the mount
+				 *       (and if we were to fail, VFS is not prepared to
+				 *       clean that up at this point.  Since HotFiles are
+				 *       optional, this is not a big deal.
+				 */
+				if (ISSET(hfsmp->hfs_flags, HFS_METADATA_ZONE)
+					&& (!ISSET(hfsmp->hfs_flags, HFS_SSD)
+						|| ISSET(hfsmp->hfs_flags, HFS_CS_HOTFILE_PIN))) {
+					hfs_recording_init(hfsmp);
+				}					
+				/* Force ACLs on HFS+ file systems. */
+				if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
+					vfs_setextendedsecurity(HFSTOVFS(hfsmp));
+				}
+			}
+		}
+
+		/* Update file system parameters. */
+		retval = hfs_changefs(mp, &args);
+		if (retval &&  HFS_MOUNT_DEBUG) {
+			printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
+		}
+
+	} else /* not an update request */ {
+		if (devvp == NULL) {
+			retval = EINVAL;
+			goto out;
+		}
+		/* Set the mount flag to indicate that we support volfs  */
+		vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
+
+		retval = hfs_mountfs(devvp, mp, data ? &args : NULL, 0, context);
+		if (retval) { 
+			const char *name = vnode_getname(devvp);
+			printf("hfs_mount: hfs_mountfs returned error=%d for device %s\n", retval, (name ? name : "unknown-dev"));
+			if (name) {
+				vnode_putname(name);
+			}
+			goto out;
+		}
+
+		/* After hfs_mountfs succeeds, we should have valid hfsmp */
+		hfsmp = VFSTOHFS(mp);
+
+		/* Set up the maximum defrag file size */
+		hfsmp->hfs_defrag_max = HFS_INITIAL_DEFRAG_SIZE;
+
+
+		if (!data) {
+			// Root mount
+
+			hfsmp->hfs_uid = UNKNOWNUID;
+			hfsmp->hfs_gid = UNKNOWNGID;
+			hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
+			hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
+
+			/* Establish the free block reserve. */
+			hfsmp->reserveBlocks = ((u_int64_t)hfsmp->totalBlocks * HFS_MINFREE) / 100;
+			hfsmp->reserveBlocks = MIN(hfsmp->reserveBlocks, HFS_MAXRESERVE / hfsmp->blockSize);
+		}
+#if	TARGET_OS_OSX 
+		// increment kext retain count
+		OSIncrementAtomic(&hfs_active_mounts);
+		OSKextRetainKextWithLoadTag(OSKextGetCurrentLoadTag());
+		if (hfs_active_mounts <= 0 && panic_on_assert)
+			panic("hfs_mount: error - kext resource count is non-positive: %d but at least one active mount\n", hfs_active_mounts);
+#endif
+	}
+
+out:
+	if (retval == 0) {
+		(void)hfs_statfs(mp, vfs_statfs(mp), context);
+	}
+	return (retval);
+}
+
+
+struct hfs_changefs_cargs {
+	struct hfsmount *hfsmp;
+        int		namefix;
+        int		permfix;
+        int		permswitch;
+};
+
+static int
+hfs_changefs_callback(struct vnode *vp, void *cargs)
+{
+	ExtendedVCB *vcb;
+	struct cnode *cp;
+	struct cat_desc cndesc;
+	struct cat_attr cnattr;
+	struct hfs_changefs_cargs *args;
+	int lockflags;
+	int error;
+
+	args = (struct hfs_changefs_cargs *)cargs;
+
+	cp = VTOC(vp);
+	vcb = HFSTOVCB(args->hfsmp);
+
+	lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+	error = cat_lookup(args->hfsmp, &cp->c_desc, 0, 0, &cndesc, &cnattr, NULL, NULL);
+	hfs_systemfile_unlock(args->hfsmp, lockflags);
+	if (error) {
+	        /*
+		 * If we couldn't find this guy skip to the next one
+		 */
+	        if (args->namefix)
+		        cache_purge(vp);
+
+		return (VNODE_RETURNED);
+	}
+	/*
+	 * Get the real uid/gid and perm mask from disk.
+	 */
+	if (args->permswitch || args->permfix) {
+	        cp->c_uid = cnattr.ca_uid;
+		cp->c_gid = cnattr.ca_gid;
+		cp->c_mode = cnattr.ca_mode;
+	}
+	/*
+	 * If we're switching name converters then...
+	 *   Remove the existing entry from the namei cache.
+	 *   Update name to one based on new encoder.
+	 */
+	if (args->namefix) {
+	        cache_purge(vp);
+		replace_desc(cp, &cndesc);
+
+		if (cndesc.cd_cnid == kHFSRootFolderID) {
+		        strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
+			cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
+		}
+	} else {
+	        cat_releasedesc(&cndesc);
+	}
+	return (VNODE_RETURNED);
+}
+
+/* Change fs mount parameters */
+static int
+hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
+{
+	int retval = 0;
+	int namefix, permfix, permswitch;
+	struct hfsmount *hfsmp;
+	ExtendedVCB *vcb;
+	struct hfs_changefs_cargs cargs;
+	u_int32_t mount_flags;
+
+#if CONFIG_HFS_STD
+	u_int32_t old_encoding = 0;
+	hfs_to_unicode_func_t	get_unicode_func;
+	unicode_to_hfs_func_t	get_hfsname_func = NULL;
+#endif
+
+	hfsmp = VFSTOHFS(mp);
+	vcb = HFSTOVCB(hfsmp);
+	mount_flags = (unsigned int)vfs_flags(mp);
+
+	hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
+	
+	permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
+	               ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
+	              (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
+	               (mount_flags & MNT_UNKNOWNPERMISSIONS)));
+
+	/* The root filesystem must operate with actual permissions: */
+	if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
+		vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));	/* Just say "No". */
+		retval = EINVAL;
+		goto exit;
+	}
+	if (mount_flags & MNT_UNKNOWNPERMISSIONS)
+		hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
+	else
+		hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
+
+	namefix = permfix = 0;
+
+	/*
+	 * Tracking of hot files requires up-to-date access times.  So if
+	 * access time updates are disabled, we must also disable hot files.
+	 */
+	if (mount_flags & MNT_NOATIME) {
+		(void) hfs_recording_suspend(hfsmp);
+	}
+	
+	/* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
+	if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
+		gTimeZone = args->hfs_timezone;
+	}
+
+	/* Change the default uid, gid and/or mask */
+	if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
+		hfsmp->hfs_uid = args->hfs_uid;
+		if (vcb->vcbSigWord == kHFSPlusSigWord)
+			++permfix;
+	}
+	if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
+		hfsmp->hfs_gid = args->hfs_gid;
+		if (vcb->vcbSigWord == kHFSPlusSigWord)
+			++permfix;
+	}
+	if (args->hfs_mask != (mode_t)VNOVAL) {
+		if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
+			hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
+			hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
+			if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
+				hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
+			if (vcb->vcbSigWord == kHFSPlusSigWord)
+				++permfix;
+		}
+	}
+	
+#if CONFIG_HFS_STD
+	/* Change the hfs encoding value (hfs only) */
+	if ((vcb->vcbSigWord == kHFSSigWord)	&&
+	    (args->hfs_encoding != (u_int32_t)VNOVAL)              &&
+	    (hfsmp->hfs_encoding != args->hfs_encoding)) {
+
+		retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
+		if (retval)
+			goto exit;
+
+		/*
+		 * Connect the new hfs_get_unicode converter but leave
+		 * the old hfs_get_hfsname converter in place so that
+		 * we can lookup existing vnodes to get their correctly
+		 * encoded names.
+		 *
+		 * When we're all finished, we can then connect the new
+		 * hfs_get_hfsname converter and release our interest
+		 * in the old converters.
+		 */
+		hfsmp->hfs_get_unicode = get_unicode_func;
+		old_encoding = hfsmp->hfs_encoding;
+		hfsmp->hfs_encoding = args->hfs_encoding;
+		++namefix;
+	}
+#endif
+
+	if (!(namefix || permfix || permswitch))
+		goto exit;
+
+	/* XXX 3762912 hack to support HFS filesystem 'owner' */
+	if (permfix) {
+		vfs_setowner(mp,
+		    hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
+		    hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
+	}
+
+	/*
+	 * For each active vnode fix things that changed
+	 *
+	 * Note that we can visit a vnode more than once
+	 * and we can race with fsync.
+	 *
+	 * hfs_changefs_callback will be called for each vnode
+	 * hung off of this mount point
+	 *
+	 * The vnode will be properly referenced and unreferenced 
+	 * around the callback
+	 */
+	cargs.hfsmp = hfsmp;
+	cargs.namefix = namefix;
+	cargs.permfix = permfix;
+	cargs.permswitch = permswitch;
+
+	vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
+
+#if CONFIG_HFS_STD
+	/*
+	 * If we're switching name converters we can now
+	 * connect the new hfs_get_hfsname converter and
+	 * release our interest in the old converters.
+	 */
+	if (namefix) {
+		/* HFS standard only */
+		hfsmp->hfs_get_hfsname = get_hfsname_func;
+		vcb->volumeNameEncodingHint = args->hfs_encoding;
+		(void) hfs_relconverter(old_encoding);
+	}
+#endif
+
+exit:
+	hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
+	return (retval);
+}
+
+
+struct hfs_reload_cargs {
+	struct hfsmount *hfsmp;
+        int		error;
+};
+
+static int
+hfs_reload_callback(struct vnode *vp, void *cargs)
+{
+	struct cnode *cp;
+	struct hfs_reload_cargs *args;
+	int lockflags;
+
+	args = (struct hfs_reload_cargs *)cargs;
+	/*
+	 * flush all the buffers associated with this node
+	 */
+	(void) buf_invalidateblks(vp, 0, 0, 0);
+
+	cp = VTOC(vp);
+	/* 
+	 * Remove any directory hints
+	 */
+	if (vnode_isdir(vp))
+	        hfs_reldirhints(cp, 0);
+
+	/*
+	 * Re-read cnode data for all active vnodes (non-metadata files).
+	 */
+	if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
+	        struct cat_fork *datafork;
+		struct cat_desc desc;
+
+		datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
+
+		/* lookup by fileID since name could have changed */
+		lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, 0, &desc, &cp->c_attr, datafork);
+		hfs_systemfile_unlock(args->hfsmp, lockflags);
+		if (args->error) {
+		        return (VNODE_RETURNED_DONE);
+		}
+
+		/* update cnode's catalog descriptor */
+		(void) replace_desc(cp, &desc);
+	}
+	return (VNODE_RETURNED);
+}
+
+/*
+ * Reload all incore data for a filesystem (used after running fsck on
+ * the root filesystem and finding things to fix). The filesystem must
+ * be mounted read-only.
+ *
+ * Things to do to update the mount:
+ *	invalidate all cached meta-data.
+ *	invalidate all inactive vnodes.
+ *	invalidate all cached file data.
+ *	re-read volume header from disk.
+ *	re-load meta-file info (extents, file size).
+ *	re-load B-tree header data.
+ *	re-read cnode data for all active vnodes.
+ */
+int
+hfs_reload(struct mount *mountp)
+{
+	register struct vnode *devvp;
+	struct buf *bp;
+	int error, i;
+	struct hfsmount *hfsmp;
+	struct HFSPlusVolumeHeader *vhp;
+	ExtendedVCB *vcb;
+	struct filefork *forkp;
+    	struct cat_desc cndesc;
+	struct hfs_reload_cargs args;
+	daddr64_t priIDSector;
+
+    	hfsmp = VFSTOHFS(mountp);
+	vcb = HFSTOVCB(hfsmp);
+
+	if (vcb->vcbSigWord == kHFSSigWord)
+		return (EINVAL);	/* rooting from HFS is not supported! */
+
+	/*
+	 * Invalidate all cached meta-data.
+	 */
+	devvp = hfsmp->hfs_devvp;
+	if (buf_invalidateblks(devvp, 0, 0, 0))
+		panic("hfs_reload: dirty1");
+
+	args.hfsmp = hfsmp;
+	args.error = 0;
+	/*
+	 * hfs_reload_callback will be called for each vnode
+	 * hung off of this mount point that can't be recycled...
+	 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
+	 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and 
+	 * properly referenced and unreferenced around the callback
+	 */
+	vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
+
+	if (args.error)
+	        return (args.error);
+
+	/*
+	 * Re-read VolumeHeader from disk.
+	 */
+	priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 
+			HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
+
+	error = (int)buf_meta_bread(hfsmp->hfs_devvp,
+			HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+			hfsmp->hfs_physical_block_size, NOCRED, &bp);
+	if (error) {
+        	if (bp != NULL)
+        		buf_brelse(bp);
+		return (error);
+	}
+
+	vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
+
+	/* Do a quick sanity check */
+	if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
+	     SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
+	    (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
+	     SWAP_BE16(vhp->version) != kHFSXVersion) ||
+	    SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
+		buf_brelse(bp);
+		return (EIO);
+	}
+
+	vcb->vcbLsMod		= to_bsd_time(SWAP_BE32(vhp->modifyDate));
+	vcb->vcbAtrb		= SWAP_BE32 (vhp->attributes);
+	vcb->vcbJinfoBlock  = SWAP_BE32(vhp->journalInfoBlock);
+	vcb->vcbClpSiz		= SWAP_BE32 (vhp->rsrcClumpSize);
+	vcb->vcbNxtCNID		= SWAP_BE32 (vhp->nextCatalogID);
+	vcb->vcbVolBkUp		= to_bsd_time(SWAP_BE32(vhp->backupDate));
+	vcb->vcbWrCnt		= SWAP_BE32 (vhp->writeCount);
+	vcb->vcbFilCnt		= SWAP_BE32 (vhp->fileCount);
+	vcb->vcbDirCnt		= SWAP_BE32 (vhp->folderCount);
+	HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
+	vcb->totalBlocks	= SWAP_BE32 (vhp->totalBlocks);
+	vcb->freeBlocks		= SWAP_BE32 (vhp->freeBlocks);
+	vcb->encodingsBitmap	= SWAP_BE64 (vhp->encodingsBitmap);
+	bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));    
+	vcb->localCreateDate	= SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */ 
+
+	/*
+	 * Re-load meta-file vnode data (extent info, file size, etc).
+	 */
+	forkp = VTOF((struct vnode *)vcb->extentsRefNum);
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		forkp->ff_extents[i].startBlock =
+			SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
+		forkp->ff_extents[i].blockCount =
+			SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
+	}
+	forkp->ff_size      = SWAP_BE64 (vhp->extentsFile.logicalSize);
+	forkp->ff_blocks    = SWAP_BE32 (vhp->extentsFile.totalBlocks);
+	forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
+
+
+	forkp = VTOF((struct vnode *)vcb->catalogRefNum);
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		forkp->ff_extents[i].startBlock	=
+			SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
+		forkp->ff_extents[i].blockCount	=
+			SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
+	}
+	forkp->ff_size      = SWAP_BE64 (vhp->catalogFile.logicalSize);
+	forkp->ff_blocks    = SWAP_BE32 (vhp->catalogFile.totalBlocks);
+	forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
+
+	if (hfsmp->hfs_attribute_vp) {
+		forkp = VTOF(hfsmp->hfs_attribute_vp);
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			forkp->ff_extents[i].startBlock	=
+				SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
+			forkp->ff_extents[i].blockCount	=
+				SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
+		}
+		forkp->ff_size      = SWAP_BE64 (vhp->attributesFile.logicalSize);
+		forkp->ff_blocks    = SWAP_BE32 (vhp->attributesFile.totalBlocks);
+		forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
+	}
+
+	forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		forkp->ff_extents[i].startBlock	=
+			SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
+		forkp->ff_extents[i].blockCount	=
+			SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
+	}
+	forkp->ff_size      = SWAP_BE64 (vhp->allocationFile.logicalSize);
+	forkp->ff_blocks    = SWAP_BE32 (vhp->allocationFile.totalBlocks);
+	forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
+
+	buf_brelse(bp);
+	vhp = NULL;
+
+	/*
+	 * Re-load B-tree header data
+	 */
+	forkp = VTOF((struct vnode *)vcb->extentsRefNum);
+	if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
+		return (error);
+
+	forkp = VTOF((struct vnode *)vcb->catalogRefNum);
+	if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
+		return (error);
+
+	if (hfsmp->hfs_attribute_vp) {
+		forkp = VTOF(hfsmp->hfs_attribute_vp);
+		if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
+			return (error);
+	}
+
+	/* Reload the volume name */
+	if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, NULL, NULL)))
+		return (error);
+	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
+	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
+	cat_releasedesc(&cndesc);
+
+	/* Re-establish private/hidden directories. */
+	hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
+	hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
+
+	/* In case any volume information changed to trigger a notification */
+	hfs_generate_volume_notifications(hfsmp);
+    
+	return (0);
+}
+
+__unused
+static uint64_t tv_to_usecs(struct timeval *tv)
+{
+	return tv->tv_sec * 1000000ULL + tv->tv_usec;
+}
+
+// Returns TRUE if b - a >= usecs
+static bool hfs_has_elapsed (const struct timeval *a,
+							 const struct timeval *b,
+							 uint64_t usecs)
+{
+    struct timeval diff;
+    timersub(b, a, &diff);
+    return diff.tv_sec * 1000000ULL + diff.tv_usec >= usecs;
+}
+	
+void hfs_syncer(void *arg, __unused wait_result_t wr)
+{
+    struct hfsmount *hfsmp = arg;
+    struct timeval   now;
+
+	KDBG(HFSDBG_SYNCER | DBG_FUNC_START, obfuscate_addr(hfsmp));
+
+    hfs_syncer_lock(hfsmp);
+
+	while (ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
+		   && timerisset(&hfsmp->hfs_sync_req_oldest)) {
+
+		hfs_syncer_wait(hfsmp, &HFS_META_DELAY_TS);
+
+		if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
+			|| !timerisset(&hfsmp->hfs_sync_req_oldest)) {
+			break;
+		}
+
+		/* Check to see whether we should flush now: either the oldest
+		   is > HFS_MAX_META_DELAY or HFS_META_DELAY has elapsed since
+		   the request and there are no pending writes. */
+
+		microuptime(&now);
+		uint64_t idle_time = vfs_idle_time(hfsmp->hfs_mp);
+
+		if (!hfs_has_elapsed(&hfsmp->hfs_sync_req_oldest, &now,
+							 HFS_MAX_META_DELAY)
+			&& idle_time < HFS_META_DELAY) {
+			continue;
+		}
+
+		timerclear(&hfsmp->hfs_sync_req_oldest);
+
+		hfs_syncer_unlock(hfsmp);
+
+		KDBG(HFSDBG_SYNCER_TIMED | DBG_FUNC_START, obfuscate_addr(hfsmp));
+
+		/*
+		 * We intentionally do a synchronous flush (of the journal or entire volume) here.
+		 * For journaled volumes, this means we wait until the metadata blocks are written
+		 * to both the journal and their final locations (in the B-trees, etc.).
+		 *
+		 * This tends to avoid interleaving the metadata writes with other writes (for
+		 * example, user data, or to the journal when a later transaction notices that
+		 * an earlier transaction has finished its async writes, and then updates the
+		 * journal start in the journal header).  Avoiding interleaving of writes is
+		 * very good for performance on simple flash devices like SD cards, thumb drives;
+		 * and on devices like floppies.  Since removable devices tend to be this kind of
+		 * simple device, doing a synchronous flush actually improves performance in
+		 * practice.
+		 *
+		 * NOTE: For non-journaled volumes, the call to hfs_sync will also cause dirty
+		 * user data to be written.
+		 */
+		if (hfsmp->jnl) {
+			hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
+		} else {
+			hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_current());
+		}
+
+		KDBG(HFSDBG_SYNCER_TIMED | DBG_FUNC_END);
+
+		hfs_syncer_lock(hfsmp);
+	} // while (...)
+
+	hfsmp->hfs_syncer_thread = NULL;
+	hfs_syncer_unlock(hfsmp);
+	hfs_syncer_wakeup(hfsmp);
+
+    /* BE CAREFUL WHAT YOU ADD HERE: at this point hfs_unmount is free
+       to continue and therefore hfsmp might be invalid. */
+
+    KDBG(HFSDBG_SYNCER | DBG_FUNC_END);
+}
+
+/*
+ * Call into the allocator code and perform a full scan of the bitmap file.
+ * 
+ * This allows us to TRIM unallocated ranges if needed, and also to build up
+ * an in-memory summary table of the state of the allocated blocks.
+ */
+void hfs_scan_blocks (struct hfsmount *hfsmp) {
+	/*
+	 * Take the allocation file lock.  Journal transactions will block until
+	 * we're done here. 
+	 */
+	
+	int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	/* 
+	 * We serialize here with the HFS mount lock as we're mounting.
+	 * 
+	 * The mount can only proceed once this thread has acquired the bitmap 
+	 * lock, since we absolutely do not want someone else racing in and 
+	 * getting the bitmap lock, doing a read/write of the bitmap file, 
+	 * then us getting the bitmap lock.
+	 * 
+	 * To prevent this, the mount thread takes the HFS mount mutex, starts us 
+	 * up, then immediately msleeps on the scan_var variable in the mount 
+	 * point as a condition variable.  This serialization is safe since 
+	 * if we race in and try to proceed while they're still holding the lock, 
+	 * we'll block trying to acquire the global lock.  Since the mount thread 
+	 * acquires the HFS mutex before starting this function in a new thread, 
+	 * any lock acquisition on our part must be linearizably AFTER the mount thread's. 
+	 *
+	 * Note that the HFS mount mutex is always taken last, and always for only
+	 * a short time.  In this case, we just take it long enough to mark the
+	 * scan-in-flight bit.
+	 */
+	(void) hfs_lock_mount (hfsmp);
+	hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_INFLIGHT;
+	wakeup((caddr_t) &hfsmp->scan_var);
+	hfs_unlock_mount (hfsmp);
+
+	/* Initialize the summary table */
+	if (hfs_init_summary (hfsmp)) {
+		printf("hfs: could not initialize summary table for %s\n", hfsmp->vcbVN);
+	}	
+
+	/*
+	 * ScanUnmapBlocks assumes that the bitmap lock is held when you 
+	 * call the function. We don't care if there were any errors issuing unmaps.
+	 *
+	 * It will also attempt to build up the summary table for subsequent
+	 * allocator use, as configured.
+	 */
+	(void) ScanUnmapBlocks(hfsmp);
+
+	(void) hfs_lock_mount (hfsmp);
+	hfsmp->scan_var &= ~HFS_ALLOCATOR_SCAN_INFLIGHT;
+	hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
+	wakeup((caddr_t) &hfsmp->scan_var);
+	hfs_unlock_mount (hfsmp);
+
+	buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
+	
+	hfs_systemfile_unlock(hfsmp, flags);
+
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
+            int journal_replay_only, vfs_context_t context)
+{
+	struct proc *p = vfs_context_proc(context);
+	int retval = E_NONE;
+	struct hfsmount	*hfsmp = NULL;
+	struct buf *bp;
+	dev_t dev;
+	HFSMasterDirectoryBlock *mdbp = NULL;
+	int ronly;
+#if QUOTA
+	int i;
+#endif
+	int mntwrapper;
+	kauth_cred_t cred;
+	u_int64_t disksize;
+	daddr64_t log_blkcnt;
+	u_int32_t log_blksize;
+	u_int32_t phys_blksize;
+	u_int32_t minblksize;
+	u_int32_t iswritable;
+	daddr64_t mdb_offset;
+	int isvirtual = 0;
+	int isroot = !journal_replay_only && args == NULL;
+	u_int32_t device_features = 0;
+	int isssd;
+
+	ronly = mp && vfs_isrdonly(mp);
+	dev = vnode_specrdev(devvp);
+	cred = p ? vfs_context_ucred(context) : NOCRED;
+	mntwrapper = 0;
+
+	bp = NULL;
+	hfsmp = NULL;
+	mdbp = NULL;
+	minblksize = kHFSBlockSize;
+
+	/* Advisory locking should be handled at the VFS layer */
+	if (mp)
+		vfs_setlocklocal(mp);
+
+	/* Get the logical block size (treated as physical block size everywhere) */
+	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
+		}
+		retval = ENXIO;
+		goto error_exit;
+	}
+	if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
+		printf("hfs: logical block size 0x%x looks bad.  Not mounting.\n", log_blksize);
+		retval = ENXIO;
+		goto error_exit;
+	}
+	
+	/* Get the physical block size. */
+	retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
+	if (retval) {
+		if ((retval != ENOTSUP) && (retval != ENOTTY)) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
+			}
+			retval = ENXIO;
+			goto error_exit;
+		}
+		/* If device does not support this ioctl, assume that physical 
+		 * block size is same as logical block size 
+		 */
+		phys_blksize = log_blksize;
+	}
+	if (phys_blksize == 0 || phys_blksize > MAXBSIZE) {
+		printf("hfs: physical block size 0x%x looks bad.  Not mounting.\n", phys_blksize);
+		retval = ENXIO;
+		goto error_exit;
+	}
+
+	/* Switch to 512 byte sectors (temporarily) */
+	if (log_blksize > 512) {
+		u_int32_t size512 = 512;
+
+		if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
+			}
+			retval = ENXIO;
+			goto error_exit;
+		}
+	}
+	/* Get the number of 512 byte physical blocks. */
+	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
+		/* resetting block size may fail if getting block count did */
+		(void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
+		}
+		retval = ENXIO;
+		goto error_exit;
+	}
+	/* Compute an accurate disk size (i.e. within 512 bytes) */
+	disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
+
+	/*
+	 * On Tiger it is not necessary to switch the device 
+	 * block size to be 4k if there are more than 31-bits
+	 * worth of blocks but to insure compatibility with
+	 * pre-Tiger systems we have to do it.
+	 *
+	 * If the device size is not a multiple of 4K (8 * 512), then
+	 * switching the logical block size isn't going to help because
+	 * we will be unable to write the alternate volume header.
+	 * In this case, just leave the logical block size unchanged.
+	 */
+	if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
+		minblksize = log_blksize = 4096;
+		if (phys_blksize < log_blksize)
+			phys_blksize = log_blksize;
+	}
+	
+	/*
+	 * The cluster layer is not currently prepared to deal with a logical
+	 * block size larger than the system's page size.  (It can handle 
+	 * blocks per page, but not multiple pages per block.)  So limit the
+	 * logical block size to the page size.
+	 */
+	if (log_blksize > PAGE_SIZE) {
+		log_blksize = PAGE_SIZE;
+	}
+
+	/* Now switch to our preferred physical block size. */
+	if (log_blksize > 512) {
+		if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
+			if (HFS_MOUNT_DEBUG) { 
+				printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
+			}
+			retval = ENXIO;
+			goto error_exit;
+		}
+		/* Get the count of physical blocks. */
+		if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
+			if (HFS_MOUNT_DEBUG) { 
+				printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
+			}
+			retval = ENXIO;
+			goto error_exit;
+		}
+	}
+	/*
+	 * At this point:
+	 *   minblksize is the minimum physical block size
+	 *   log_blksize has our preferred physical block size
+	 *   log_blkcnt has the total number of physical blocks
+	 */
+
+	mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
+	if ((retval = (int)buf_meta_bread(devvp, 
+				HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), 
+				phys_blksize, cred, &bp))) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
+		}
+		goto error_exit;
+	}
+	mdbp = hfs_malloc(kMDBSize);
+	bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
+	buf_brelse(bp);
+	bp = NULL;
+
+	hfsmp = hfs_mallocz(sizeof(struct hfsmount));
+
+	hfs_chashinit_finish(hfsmp);
+	
+	/* Init the ID lookup hashtable */
+	hfs_idhash_init (hfsmp);
+
+	/*
+	 * See if the disk supports unmap (trim).
+	 *
+	 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
+	 * returned by vfs_ioattr.  We need to call VNOP_IOCTL ourselves.
+	 */
+	if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
+		if (device_features & DK_FEATURE_UNMAP) {
+			hfsmp->hfs_flags |= HFS_UNMAP;
+		}
+
+		if(device_features & DK_FEATURE_BARRIER)
+			hfsmp->hfs_flags |= HFS_FEATURE_BARRIER;
+	}
+
+	/* 
+	 * See if the disk is a solid state device, too.  We need this to decide what to do about 
+	 * hotfiles.
+	 */
+	if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
+		if (isssd) {
+			hfsmp->hfs_flags |= HFS_SSD;
+		}
+	}
+
+	/* See if the underlying device is Core Storage or not */
+	dk_corestorage_info_t cs_info;
+	memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
+	if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, context) == 0) {
+		hfsmp->hfs_flags |= HFS_CS;
+		if (isroot && (cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA)) {
+			hfsmp->hfs_flags |= HFS_CS_METADATA_PIN;
+		}
+		if (isroot && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES)) {
+			hfsmp->hfs_flags |= HFS_CS_HOTFILE_PIN;
+			hfsmp->hfs_cs_hotfile_size = cs_info.hotfile_size;
+		}
+		if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_SWAPFILE)) {
+			hfsmp->hfs_flags |= HFS_CS_SWAPFILE_PIN;
+
+			struct vfsioattr ioattr;
+			vfs_ioattr(mp, &ioattr);
+			ioattr.io_flags |= VFS_IOATTR_FLAGS_SWAPPIN_SUPPORTED;
+			ioattr.io_max_swappin_available = cs_info.swapfile_pinning;
+			vfs_setioattr(mp, &ioattr);
+		}
+	}
+
+	/*
+	 *  Init the volume information structure
+	 */
+	
+	lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
+	lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
+	lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
+	lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
+
+	if (mp)
+		vfs_setfsprivate(mp, hfsmp);
+	hfsmp->hfs_mp = mp;			/* Make VFSTOHFS work */
+	hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
+	hfsmp->hfs_devvp = devvp;
+	vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
+	hfsmp->hfs_logical_block_size = log_blksize;
+	hfsmp->hfs_logical_block_count = log_blkcnt;
+	hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
+	hfsmp->hfs_physical_block_size = phys_blksize;
+	hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
+	hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
+	if (ronly)
+		hfsmp->hfs_flags |= HFS_READ_ONLY;
+	if (mp && ((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
+		hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
+
+#if QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		dqfileinit(&hfsmp->hfs_qfiles[i]);
+#endif
+
+	if (args) {
+		hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
+		if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
+		hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
+		if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
+		vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);				/* tell the VFS */
+		if (args->hfs_mask != (mode_t)VNOVAL) {
+			hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
+			if (args->flags & HFSFSMNT_NOXONFILES) {
+				hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
+			} else {
+				hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
+			}
+		} else {
+			hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;		/* 0777: rwx---rwx */
+			hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;	/* 0666: no --x by default? */
+		}
+		if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
+			mntwrapper = 1;
+	} else {
+		/* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
+		if (mp && ((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
+			hfsmp->hfs_uid = UNKNOWNUID;
+			hfsmp->hfs_gid = UNKNOWNGID;
+			vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid);			/* tell the VFS */
+			hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS;		/* 0777: rwx---rwx */
+			hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE;	/* 0666: no --x by default? */
+		}
+	}
+
+	/* Find out if disk media is writable. */
+	if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
+		if (iswritable)
+			hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
+		else
+			hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
+	}
+
+	// Reservations
+	rl_init(&hfsmp->hfs_reserved_ranges[0]);
+	rl_init(&hfsmp->hfs_reserved_ranges[1]);
+
+	// record the current time at which we're mounting this volume
+	struct timeval tv;
+	microtime(&tv);
+	hfsmp->hfs_mount_time = tv.tv_sec;
+
+	/* Mount a standard HFS disk */
+	if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
+	    (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
+#if CONFIG_HFS_STD 
+		/* If only journal replay is requested, exit immediately */
+		if (journal_replay_only) {
+			retval = 0;
+			goto error_exit;
+		}
+
+		/* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
+		if (vfs_isrdwr(mp)) {
+			retval = EROFS;
+			goto error_exit;
+		}
+
+		printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
+
+		/* Treat it as if it's read-only and not writeable */
+		hfsmp->hfs_flags |= HFS_READ_ONLY;
+		hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
+
+		if ((vfs_flags(mp) & MNT_ROOTFS)) {
+			retval = EINVAL;  /* Cannot root from HFS standard disks */
+			goto error_exit;
+		}
+		/* HFS disks can only use 512 byte physical blocks */
+		if (log_blksize > kHFSBlockSize) {
+			log_blksize = kHFSBlockSize;
+			if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
+				retval = ENXIO;
+				goto error_exit;
+			}
+			if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
+				retval = ENXIO;
+				goto error_exit;
+			}
+			hfsmp->hfs_logical_block_size = log_blksize;
+			hfsmp->hfs_logical_block_count = log_blkcnt;
+			hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
+			hfsmp->hfs_physical_block_size = log_blksize;
+			hfsmp->hfs_log_per_phys = 1;
+		}
+		if (args) {
+			hfsmp->hfs_encoding = args->hfs_encoding;
+			HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
+
+			/* establish the timezone */
+			gTimeZone = args->hfs_timezone;
+		}
+
+		retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
+					&hfsmp->hfs_get_hfsname);
+		if (retval)
+			goto error_exit;
+
+		retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
+		if (retval)
+			(void) hfs_relconverter(hfsmp->hfs_encoding);
+#else
+		/* On platforms where HFS Standard is not supported, deny the mount altogether */
+		retval = EINVAL;
+		goto error_exit;
+#endif
+
+	} 
+	else { /* Mount an HFS Plus disk */
+		HFSPlusVolumeHeader *vhp;
+		off_t embeddedOffset;
+		int   jnl_disable = 0;
+	
+		/* Get the embedded Volume Header */
+		if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
+			embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
+			embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
+			                  (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
+
+			/* 
+			 * Cooperative Fusion is not allowed on embedded HFS+ 
+			 * filesystems (HFS+ inside HFS standard wrapper)
+			 */
+			hfsmp->hfs_flags &= ~HFS_CS_METADATA_PIN;
+
+			/*
+			 * If the embedded volume doesn't start on a block
+			 * boundary, then switch the device to a 512-byte
+			 * block size so everything will line up on a block
+			 * boundary.
+			 */
+			if ((embeddedOffset % log_blksize) != 0) {
+				printf("hfs_mountfs: embedded volume offset not"
+				    " a multiple of physical block size (%d);"
+				    " switching to 512\n", log_blksize);
+				log_blksize = 512;
+				if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
+				    (caddr_t)&log_blksize, FWRITE, context)) {
+
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
+					}				
+					retval = ENXIO;
+					goto error_exit;
+				}
+				if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
+				    (caddr_t)&log_blkcnt, 0, context)) {
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
+					}
+					retval = ENXIO;
+					goto error_exit;
+				}
+				/* Note: relative block count adjustment */
+				hfsmp->hfs_logical_block_count *=
+				    hfsmp->hfs_logical_block_size / log_blksize;
+				
+				/* Update logical /physical block size */
+				hfsmp->hfs_logical_block_size = log_blksize;
+				hfsmp->hfs_physical_block_size = log_blksize;
+				
+				phys_blksize = log_blksize;
+				hfsmp->hfs_log_per_phys = 1;
+			}
+
+			disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
+			           (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
+
+			hfsmp->hfs_logical_block_count = disksize / log_blksize;
+	
+			hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
+			
+			mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
+
+			if (bp) {
+				buf_markinvalid(bp);
+				buf_brelse(bp);
+				bp = NULL;
+			}
+			retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+					phys_blksize, cred, &bp);
+			if (retval) {
+				if (HFS_MOUNT_DEBUG) { 
+					printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
+				}
+				goto error_exit;
+			}
+			bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
+			buf_brelse(bp);
+			bp = NULL;
+			vhp = (HFSPlusVolumeHeader*) mdbp;
+
+		} 
+		else { /* pure HFS+ */ 
+			embeddedOffset = 0;
+			vhp = (HFSPlusVolumeHeader*) mdbp;
+		}
+
+		retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
+		if (retval)
+			goto error_exit;
+
+		/*
+		 * If allocation block size is less than the physical block size,
+		 * invalidate the buffer read in using native physical block size
+		 * to ensure data consistency.
+		 *
+		 * HFS Plus reserves one allocation block for the Volume Header.
+		 * If the physical size is larger, then when we read the volume header,
+		 * we will also end up reading in the next allocation block(s).
+		 * If those other allocation block(s) is/are modified, and then the volume
+		 * header is modified, the write of the volume header's buffer will write
+		 * out the old contents of the other allocation blocks.
+		 *
+		 * We assume that the physical block size is same as logical block size.
+		 * The physical block size value is used to round down the offsets for
+		 * reading and writing the primary and alternate volume headers.
+		 *
+		 * The same logic is also in hfs_MountHFSPlusVolume to ensure that
+		 * hfs_mountfs, hfs_MountHFSPlusVolume and later are doing the I/Os
+		 * using same block size.
+		 */
+		if (SWAP_BE32(vhp->blockSize) < hfsmp->hfs_physical_block_size) {
+			phys_blksize = hfsmp->hfs_logical_block_size;
+			hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
+			hfsmp->hfs_log_per_phys = 1;
+			// There should be one bp associated with devvp in buffer cache.
+			retval = buf_invalidateblks(devvp, 0, 0, 0);
+			if (retval)
+				goto error_exit;
+		}
+
+		if (isroot && ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0)) {
+			vfs_set_root_unmounted_cleanly();
+		}
+
+		/*
+		 * On inconsistent disks, do not allow read-write mount
+		 * unless it is the boot volume being mounted.  We also
+		 * always want to replay the journal if the journal_replay_only
+		 * flag is set because that will (most likely) get the
+		 * disk into a consistent state before fsck_hfs starts
+		 * looking at it.
+		 */
+		if (!journal_replay_only
+			&& !(vfs_flags(mp) & MNT_ROOTFS)
+			&& (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
+			&& !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
+			
+			if (HFS_MOUNT_DEBUG) { 
+				printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
+			}
+			retval = EINVAL;
+			goto error_exit;
+		}
+
+
+		// XXXdbg
+		//
+		hfsmp->jnl = NULL;
+		hfsmp->jvp = NULL;
+		if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) && 
+		    args->journal_disable) {
+		    jnl_disable = 1;
+		}
+				
+		//
+		// We only initialize the journal here if the last person
+		// to mount this volume was journaling aware.  Otherwise
+		// we delay journal initialization until later at the end
+		// of hfs_MountHFSPlusVolume() because the last person who
+		// mounted it could have messed things up behind our back
+		// (so we need to go find the .journal file, make sure it's
+		// the right size, re-sync up if it was moved, etc).
+		//
+		if (   (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
+			&& (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
+			&& !jnl_disable) {
+			
+			// if we're able to init the journal, mark the mount
+			// point as journaled.
+			//
+			if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
+				if (mp)
+					vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+			} else {
+				if (retval == EROFS) {
+					// EROFS is a special error code that means the volume has an external
+					// journal which we couldn't find.  in that case we do not want to
+					// rewrite the volume header - we'll just refuse to mount the volume.
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
+					}
+					retval = EINVAL;
+					goto error_exit;
+				}
+
+				// if the journal failed to open, then set the lastMountedVersion
+				// to be "FSK!" which fsck_hfs will see and force the fsck instead
+				// of just bailing out because the volume is journaled.
+				if (!ronly) {
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
+					}
+
+					HFSPlusVolumeHeader *jvhp;
+
+				    hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
+				    
+				    if (mdb_offset == 0) {
+					mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
+				    }
+
+				    bp = NULL;
+				    retval = (int)buf_meta_bread(devvp, 
+						    HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 
+						    phys_blksize, cred, &bp);
+				    if (retval == 0) {
+					jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
+					    
+					if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
+						printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
+					    jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
+					    buf_bwrite(bp);
+					} else {
+					    buf_brelse(bp);
+					}
+					bp = NULL;
+				    } else if (bp) {
+					buf_brelse(bp);
+					// clear this so the error exit path won't try to use it
+					bp = NULL;
+				    }
+				}
+
+				// if this isn't the root device just bail out.
+				// If it is the root device we just continue on
+				// in the hopes that fsck_hfs will be able to
+				// fix any damage that exists on the volume.
+				if (mp && !(vfs_flags(mp) & MNT_ROOTFS)) {
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
+					}
+				    retval = EINVAL;
+				    goto error_exit;
+				}
+			}
+		}
+
+		/* Either the journal is replayed successfully, or there 
+		 * was nothing to replay, or no journal exists.  In any case,
+		 * return success.
+		 */
+		if (journal_replay_only) {
+			retval = 0;
+			goto error_exit;
+		}
+
+#if CONFIG_HFS_STD
+		(void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
+#endif
+
+		retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
+		/*
+		 * If the backend didn't like our physical blocksize
+		 * then retry with physical blocksize of 512.
+		 */
+		if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
+			printf("hfs_mountfs: could not use physical block size "
+					"(%d) switching to 512\n", log_blksize);
+			log_blksize = 512;
+			if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
+				if (HFS_MOUNT_DEBUG) { 
+					printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
+				}
+				retval = ENXIO;
+				goto error_exit;
+			}
+			if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
+				if (HFS_MOUNT_DEBUG) { 
+					printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
+				}
+				retval = ENXIO;
+				goto error_exit;
+			}
+			set_fsblocksize(devvp);
+			/* Note: relative block count adjustment (in case this is an embedded volume). */
+			hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
+			hfsmp->hfs_logical_block_size = log_blksize;
+			hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
+	
+			hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
+
+			if (hfsmp->jnl && hfsmp->jvp == devvp) {
+			    // close and re-open this with the new block size
+			    journal_close(hfsmp->jnl);
+			    hfsmp->jnl = NULL;
+			    if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
+					vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+				} else {
+					// if the journal failed to open, then set the lastMountedVersion
+					// to be "FSK!" which fsck_hfs will see and force the fsck instead
+					// of just bailing out because the volume is journaled.
+					if (!ronly) {
+						if (HFS_MOUNT_DEBUG) { 
+							printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
+						}
+				    	HFSPlusVolumeHeader *jvhp;
+
+				    	hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
+				    
+				    	if (mdb_offset == 0) {
+							mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
+				    	}
+
+				   	 	bp = NULL;
+				    	retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 
+							phys_blksize, cred, &bp);
+				    	if (retval == 0) {
+							jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
+					    
+							if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
+								printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
+					    		jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
+					    		buf_bwrite(bp);
+							} else {
+					    		buf_brelse(bp);
+							}
+							bp = NULL;
+				    	} else if (bp) {
+							buf_brelse(bp);
+							// clear this so the error exit path won't try to use it
+							bp = NULL;
+				    	}
+					}
+
+					// if this isn't the root device just bail out.
+					// If it is the root device we just continue on
+					// in the hopes that fsck_hfs will be able to
+					// fix any damage that exists on the volume.
+					if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
+						if (HFS_MOUNT_DEBUG) { 
+							printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
+						}
+				    	retval = EINVAL;
+				    	goto error_exit;
+					}
+				}
+			}
+
+			/* Try again with a smaller block size... */
+			retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
+			if (retval && HFS_MOUNT_DEBUG) {
+				printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval); 
+			}
+		}
+#if CONFIG_HFS_STD
+		if (retval)
+			(void) hfs_relconverter(0);
+#endif
+	}
+
+	// save off a snapshot of the mtime from the previous mount
+	// (for matador).
+	hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
+
+	if ( retval ) {
+		if (HFS_MOUNT_DEBUG) { 
+			printf("hfs_mountfs: encountered failure %d \n", retval);
+		}
+		goto error_exit;
+	}
+
+	struct vfsstatfs *vsfs = vfs_statfs(mp);
+	vsfs->f_fsid.val[0] = dev;
+	vsfs->f_fsid.val[1] = vfs_typenum(mp);
+
+	vfs_setmaxsymlen(mp, 0);
+
+#if CONFIG_HFS_STD
+	if (ISSET(hfsmp->hfs_flags, HFS_STANDARD)) {
+		/* HFS standard doesn't support extended readdir! */
+		mount_set_noreaddirext (mp);
+	}
+#endif
+
+	if (args) {
+		/*
+		 * Set the free space warning levels for a non-root volume:
+		 *
+		 * Set the "danger" limit to 1% of the volume size or 150MB, whichever is less.
+		 * Set the "warning" limit to 2% of the volume size or 500MB, whichever is less.
+		 * Set the "near warning" limit to 10% of the volume size or 1GB, whichever is less.
+		 * And last, set the "desired" freespace level to to 12% of the volume size or 1.2GB,
+		 * whichever is less.
+		 */
+		hfsmp->hfs_freespace_notify_dangerlimit =
+			MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
+		hfsmp->hfs_freespace_notify_warninglimit =
+			MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
+		hfsmp->hfs_freespace_notify_nearwarninglimit =
+			MIN(HFS_NEARLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_NEARLOWDISKTRIGGERFRACTION);
+		hfsmp->hfs_freespace_notify_desiredlevel =
+			MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
+	} else {
+		/*
+		 * Set the free space warning levels for the root volume:
+		 *
+		 * Set the "danger" limit to 5% of the volume size or 512MB, whichever is less.
+		 * Set the "warning" limit to 10% of the volume size or 1GB, whichever is less.
+		 * Set the "near warning" limit to 10.5% of the volume size or 1.1GB, whichever is less.
+		 * And last, set the "desired" freespace level to to 11% of the volume size or 1.25GB,
+		 * whichever is less.
+		 *
+		 * NOTE: While those are the default limits, KernelEventAgent (as of 3/2016)
+		 * will unilaterally override these to the following on OSX only:
+		 *    Danger: 3GB
+		 *    Warning: Min (2% of root volume, 10GB), with a floor of 10GB
+		 *    Desired: Warning Threshold + 1.5GB  
+		 */
+		hfsmp->hfs_freespace_notify_dangerlimit =
+			MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
+		hfsmp->hfs_freespace_notify_warninglimit =
+			MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
+		hfsmp->hfs_freespace_notify_nearwarninglimit =
+			MIN(HFS_ROOTNEARLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTNEARLOWDISKTRIGGERFRACTION);
+		hfsmp->hfs_freespace_notify_desiredlevel =
+			MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
+				(HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
+	};
+	
+	/* Check if the file system exists on virtual device, like disk image */
+	if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
+		if (isvirtual) {
+			hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
+		}
+	}
+
+	if (!isroot
+		&& !ISSET(hfsmp->hfs_flags, HFS_VIRTUAL_DEVICE)
+		&& hfs_is_ejectable(vfs_statfs(mp)->f_mntfromname)) {
+		SET(hfsmp->hfs_flags, HFS_RUN_SYNCER);
+	}
+
+	const char *dev_name = (hfsmp->hfs_devvp
+							? vnode_getname_printable(hfsmp->hfs_devvp) : NULL);
+
+	printf("hfs: mounted %s on device %s\n",
+		   (hfsmp->vcbVN[0] ? (const char*) hfsmp->vcbVN : "unknown"),
+		   dev_name ?: "unknown device");
+
+	if (dev_name)
+		vnode_putname_printable(dev_name);
+
+	/*
+	 * Start looking for free space to drop below this level and generate a
+	 * warning immediately if needed:
+	 */
+	hfsmp->hfs_notification_conditions = 0;
+	hfs_generate_volume_notifications(hfsmp);
+
+	if (ronly == 0) {
+		(void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+	}
+	hfs_free(mdbp, kMDBSize);
+	return (0);
+
+error_exit:
+	if (bp)
+		buf_brelse(bp);
+
+	hfs_free(mdbp, kMDBSize);
+
+	hfs_close_jvp(hfsmp);
+
+	if (hfsmp) {
+		if (hfsmp->hfs_devvp) {
+			vnode_rele(hfsmp->hfs_devvp);
+		}
+		hfs_locks_destroy(hfsmp);
+		hfs_delete_chash(hfsmp);
+		hfs_idhash_destroy (hfsmp);
+
+		hfs_free(hfsmp, sizeof(*hfsmp));
+		if (mp)
+			vfs_setfsprivate(mp, NULL);
+	}
+	return (retval);
+}
+
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+static int
+hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
+{
+	return (0);
+}
+
+
+/*
+ * unmount system call
+ */
+int
+hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
+{
+	struct proc *p = vfs_context_proc(context);
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	int retval = E_NONE;
+	int flags;
+	int force;
+	int started_tr = 0;
+
+	flags = 0;
+	force = 0;
+	if (mntflags & MNT_FORCE) {
+		flags |= FORCECLOSE;
+		force = 1;
+	}
+
+	const char *dev_name = (hfsmp->hfs_devvp
+							? vnode_getname_printable(hfsmp->hfs_devvp) : NULL);
+
+	printf("hfs: unmount initiated on %s on device %s\n",
+		   (hfsmp->vcbVN[0] ? (const char*) hfsmp->vcbVN : "unknown"),
+		   dev_name ?: "unknown device");
+
+	if (dev_name)
+		vnode_putname_printable(dev_name);
+
+	if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
+ 		return (retval);
+
+	if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
+		(void) hfs_recording_suspend(hfsmp);
+    
+	hfs_syncer_free(hfsmp);
+    
+	if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
+		if (hfsmp->hfs_summary_table) {
+			int err = 0;
+			/* 
+		 	 * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress 
+			 */
+			if (hfsmp->hfs_allocation_vp) {
+				err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			}
+			hfs_free(hfsmp->hfs_summary_table, hfsmp->hfs_summary_bytes);
+			hfsmp->hfs_summary_table = NULL;
+			hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
+			
+			if (err == 0 && hfsmp->hfs_allocation_vp){
+				hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
+			}
+
+		}
+	}
+	
+	/*
+	 * Flush out the b-trees, volume bitmap and Volume Header
+	 */
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
+		retval = hfs_start_transaction(hfsmp);
+		if (retval == 0) {
+		    started_tr = 1;
+		} else if (!force) {
+		    goto err_exit;
+		}
+
+		if (hfsmp->hfs_startup_vp) {
+			(void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
+			hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
+			if (retval && !force)
+				goto err_exit;
+		}
+
+		if (hfsmp->hfs_attribute_vp) {
+			(void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
+			hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
+			if (retval && !force)
+				goto err_exit;
+		}
+
+		(void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
+		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+		if (retval && !force)
+			goto err_exit;
+		
+		(void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
+		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+		if (retval && !force)
+			goto err_exit;
+			
+		if (hfsmp->hfs_allocation_vp) {
+			(void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
+			hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
+			if (retval && !force)
+				goto err_exit;
+		}
+
+		if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
+			retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
+			if (retval && !force)
+				goto err_exit;
+		}
+
+		/* If runtime corruption was detected, indicate that the volume
+		 * was not unmounted cleanly.
+		 */
+		if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
+			HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
+		} else {
+			HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
+		}
+
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			int i;
+			u_int32_t min_start = hfsmp->totalBlocks;
+			
+			// set the nextAllocation pointer to the smallest free block number
+			// we've seen so on the next mount we won't rescan unnecessarily
+			lck_spin_lock(&hfsmp->vcbFreeExtLock);
+			for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
+				if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
+					min_start = hfsmp->vcbFreeExt[i].startBlock;
+				}
+			}
+			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+			if (min_start < hfsmp->nextAllocation) {
+				hfsmp->nextAllocation = min_start;
+			}
+		}
+
+		retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+		if (retval) {
+			HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
+			if (!force)
+				goto err_exit;	/* could not flush everything */
+		}
+
+		if (started_tr) {
+		    hfs_end_transaction(hfsmp);
+		    started_tr = 0;
+		}
+	}
+
+	if (hfsmp->jnl) {
+		hfs_flush(hfsmp, HFS_FLUSH_FULL);
+	}
+	
+	/*
+	 *	Invalidate our caches and release metadata vnodes
+	 */
+	(void) hfsUnmount(hfsmp, p);
+
+#if CONFIG_HFS_STD
+	if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
+		(void) hfs_relconverter(hfsmp->hfs_encoding);
+	}
+#endif
+
+	// XXXdbg
+	if (hfsmp->jnl) {
+	    journal_close(hfsmp->jnl);
+	    hfsmp->jnl = NULL;
+	}
+
+	VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
+
+	hfs_close_jvp(hfsmp);
+
+	/*
+	 * Last chance to dump unreferenced system files.
+	 */
+	(void) vflush(mp, NULLVP, FORCECLOSE);
+
+#if HFS_SPARSE_DEV
+	/* Drop our reference on the backing fs (if any). */
+	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingvp) {
+		struct vnode * tmpvp;
+
+		hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+		tmpvp = hfsmp->hfs_backingvp;
+		hfsmp->hfs_backingvp = NULLVP;
+		vnode_rele(tmpvp);
+	}
+#endif /* HFS_SPARSE_DEV */
+
+	vnode_rele(hfsmp->hfs_devvp);
+
+	hfs_locks_destroy(hfsmp);
+	hfs_delete_chash(hfsmp);
+	hfs_idhash_destroy(hfsmp);
+
+	hfs_assert(TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS])
+		   && TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS]));
+	hfs_assert(!hfsmp->lockedBlocks);
+
+	hfs_free(hfsmp, sizeof(*hfsmp));
+
+	// decrement kext retain count
+#if	TARGET_OS_OSX
+	OSDecrementAtomic(&hfs_active_mounts);
+	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
+#endif
+
+#if HFS_LEAK_DEBUG && TARGET_OS_OSX
+	if (hfs_active_mounts == 0) {
+		if (hfs_dump_allocations())
+			Debugger(NULL);
+		else {
+			printf("hfs: last unmount and nothing was leaked!\n");
+			msleep(hfs_unmount, NULL, PINOD, "hfs_unmount",
+				   &(struct timespec){ 5, 0 });
+		}
+	}
+#endif
+
+	return (0);
+
+  err_exit:
+	if (started_tr) {
+		hfs_end_transaction(hfsmp);
+	}
+	return retval;
+}
+
+
+/*
+ * Return the root of a filesystem.
+ */
+int hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
+{
+	return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
+}
+
+
+/*
+ * Do operations associated with quotas
+ */
+#if !QUOTA
+static int
+hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
+{
+	return (ENOTSUP);
+}
+#else
+static int
+hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
+{
+	struct proc *p = vfs_context_proc(context);
+	int cmd, type, error;
+
+	if (uid == ~0U)
+		uid = kauth_cred_getuid(vfs_context_ucred(context));
+	cmd = cmds >> SUBCMDSHIFT;
+
+	switch (cmd) {
+	case Q_SYNC:
+	case Q_QUOTASTAT:
+		break;
+	case Q_GETQUOTA:
+		if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
+			break;
+		/* fall through */
+	default:
+		if ( (error = vfs_context_suser(context)) )
+			return (error);
+	}
+
+	type = cmds & SUBCMDMASK;
+	if ((u_int)type >= MAXQUOTAS)
+		return (EINVAL);
+	if ((error = vfs_busy(mp, LK_NOWAIT)) != 0)
+		return (error);
+
+	switch (cmd) {
+
+	case Q_QUOTAON:
+		error = hfs_quotaon(p, mp, type, datap);
+		break;
+
+	case Q_QUOTAOFF:
+		error = hfs_quotaoff(p, mp, type);
+		break;
+
+	case Q_SETQUOTA:
+		error = hfs_setquota(mp, uid, type, datap);
+		break;
+
+	case Q_SETUSE:
+		error = hfs_setuse(mp, uid, type, datap);
+		break;
+
+	case Q_GETQUOTA:
+		error = hfs_getquota(mp, uid, type, datap);
+		break;
+
+	case Q_SYNC:
+		error = hfs_qsync(mp);
+		break;
+
+	case Q_QUOTASTAT:
+		error = hfs_quotastat(mp, type, datap);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+	vfs_unbusy(mp);
+
+	return (error);
+}
+#endif /* QUOTA */
+
+/* Subtype is composite of bits */
+#define HFS_SUBTYPE_JOURNALED      0x01
+#define HFS_SUBTYPE_CASESENSITIVE  0x02
+/* bits 2 - 6 reserved */
+#define HFS_SUBTYPE_STANDARDHFS    0x80
+
+/*
+ * Get file system statistics.
+ */
+int
+hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
+{
+	ExtendedVCB *vcb = VFSTOVCB(mp);
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+	u_int16_t subtype = 0;
+
+	sbp->f_bsize = (u_int32_t)vcb->blockSize;
+	sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
+	sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
+	sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
+	sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
+	sbp->f_files = (u_int64_t)HFS_MAX_FILES;
+	sbp->f_ffree = (u_int64_t)hfs_free_cnids(hfsmp);
+
+	/*
+	 * Subtypes (flavors) for HFS
+	 *   0:   Mac OS Extended
+	 *   1:   Mac OS Extended (Journaled) 
+	 *   2:   Mac OS Extended (Case Sensitive) 
+	 *   3:   Mac OS Extended (Case Sensitive, Journaled) 
+	 *   4 - 127:   Reserved
+	 * 128:   Mac OS Standard
+	 * 
+	 */
+	if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
+		/* HFS+ & variants */
+		if (hfsmp->jnl) {
+			subtype |= HFS_SUBTYPE_JOURNALED;
+		}
+		if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
+			subtype |= HFS_SUBTYPE_CASESENSITIVE;
+		}
+	}
+#if CONFIG_HFS_STD
+	else {
+		/* HFS standard */
+		subtype = HFS_SUBTYPE_STANDARDHFS;
+	} 
+#endif
+	sbp->f_fssubtype = subtype;
+
+	return (0);
+}
+
+
+//
+// XXXdbg -- this is a callback to be used by the journal to
+//           get meta data blocks flushed out to disk.
+//
+// XXXdbg -- be smarter and don't flush *every* block on each
+//           call.  try to only flush some so we don't wind up
+//           being too synchronous.
+//
+void
+hfs_sync_metadata(void *arg)
+{
+	struct mount *mp = (struct mount *)arg;
+	struct hfsmount *hfsmp;
+	ExtendedVCB *vcb;
+	buf_t	bp;
+	int  retval;
+	daddr64_t priIDSector;
+	hfsmp = VFSTOHFS(mp);
+	vcb = HFSTOVCB(hfsmp);
+
+	// now make sure the super block is flushed
+	priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+				  HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
+
+	retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+			HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+			hfsmp->hfs_physical_block_size, NOCRED, &bp);
+	if ((retval != 0 ) && (retval != ENXIO)) {
+		printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
+		       (int)priIDSector, retval);
+	}
+
+	if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
+	    buf_bwrite(bp);
+	} else if (bp) {
+	    buf_brelse(bp);
+	}
+	
+	/* Note that these I/Os bypass the journal (no calls to journal_start_modify_block) */
+
+	// the alternate super block...
+	// XXXdbg - we probably don't need to do this each and every time.
+	//          hfs_btreeio.c:FlushAlternate() should flag when it was
+	//          written...
+	if (hfsmp->hfs_partition_avh_sector) {
+		retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+				HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_partition_avh_sector, hfsmp->hfs_log_per_phys),
+				hfsmp->hfs_physical_block_size, NOCRED, &bp);
+		if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
+		    /* 
+			 * note this I/O can fail if the partition shrank behind our backs! 
+			 * So failure should be OK here.
+			 */
+			buf_bwrite(bp);
+		} else if (bp) {
+		    buf_brelse(bp);
+		}
+	}
+
+	/* Is the FS's idea of the AVH different than the partition ? */
+	if ((hfsmp->hfs_fs_avh_sector) && (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)) {
+		retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+				HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys),
+				hfsmp->hfs_physical_block_size, NOCRED, &bp);
+		if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
+		    buf_bwrite(bp);
+		} else if (bp) {
+		    buf_brelse(bp);
+		}
+	}
+
+}
+
+
+struct hfs_sync_cargs {
+	kauth_cred_t  cred;
+	struct proc	 *p;
+	int			  waitfor;
+	int			  error;
+	int			  atime_only_syncs;
+	time_t		  sync_start_time;
+};
+
+
+static int
+hfs_sync_callback(struct vnode *vp, void *cargs)
+{
+	struct cnode *cp = VTOC(vp);
+	struct hfs_sync_cargs *args;
+	int error;
+
+	args = (struct hfs_sync_cargs *)cargs;
+
+	if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
+		return (VNODE_RETURNED);
+	}
+
+	hfs_dirty_t dirty_state = hfs_is_dirty(cp);
+
+	bool sync = dirty_state == HFS_DIRTY || vnode_hasdirtyblks(vp);
+
+	if (!sync && dirty_state == HFS_DIRTY_ATIME
+		&& args->atime_only_syncs < 256) {
+		// We only update if the atime changed more than 60s ago
+		if (args->sync_start_time - cp->c_attr.ca_atime > 60) {
+			sync = true;
+			++args->atime_only_syncs;
+		}
+	}
+
+	if (sync) {
+		error = hfs_fsync(vp, args->waitfor, 0, args->p);
+
+		if (error)
+		        args->error = error;
+	} else if (cp->c_touch_acctime)
+		hfs_touchtimes(VTOHFS(vp), cp);
+
+	hfs_unlock(cp);
+	return (VNODE_RETURNED);
+}
+
+
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+int
+hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
+{
+	struct proc *p = vfs_context_proc(context);
+	struct cnode *cp;
+	struct hfsmount *hfsmp;
+	ExtendedVCB *vcb;
+	struct vnode *meta_vp[4];
+	int i;
+	int error, allerror = 0;
+	struct hfs_sync_cargs args;
+
+	hfsmp = VFSTOHFS(mp);
+
+	// Back off if hfs_changefs or a freeze is underway
+	hfs_lock_mount(hfsmp);
+	if ((hfsmp->hfs_flags & HFS_IN_CHANGEFS)
+	    || hfsmp->hfs_freeze_state != HFS_THAWED) {
+		hfs_unlock_mount(hfsmp);
+		return 0;
+	}
+
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		hfs_unlock_mount(hfsmp);
+		return (EROFS);
+	}
+
+	++hfsmp->hfs_syncers;
+	hfs_unlock_mount(hfsmp);
+
+	args.cred = kauth_cred_get();
+	args.waitfor = waitfor;
+	args.p = p;
+	args.error = 0;
+	args.atime_only_syncs = 0;
+
+	struct timeval tv;
+	microtime(&tv);
+
+	args.sync_start_time = tv.tv_sec;
+
+	/*
+	 * hfs_sync_callback will be called for each vnode
+	 * hung off of this mount point... the vnode will be
+	 * properly referenced and unreferenced around the callback
+	 */
+	vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
+
+	if (args.error)
+	        allerror = args.error;
+
+	vcb = HFSTOVCB(hfsmp);
+
+	meta_vp[0] = vcb->extentsRefNum;
+	meta_vp[1] = vcb->catalogRefNum;
+	meta_vp[2] = vcb->allocationsRefNum;  /* This is NULL for standard HFS */
+	meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
+
+	/* Now sync our three metadata files */
+	for (i = 0; i < 4; ++i) {
+		struct vnode *btvp;
+
+		btvp = meta_vp[i];;
+		if ((btvp==0) || (vnode_mount(btvp) != mp))
+			continue;
+
+		/* XXX use hfs_systemfile_lock instead ? */
+		(void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		cp = VTOC(btvp);
+
+		if (!hfs_is_dirty(cp) && !vnode_hasdirtyblks(btvp)) {
+			hfs_unlock(VTOC(btvp));
+			continue;
+		}
+		error = vnode_get(btvp);
+		if (error) {
+			hfs_unlock(VTOC(btvp));
+			continue;
+		}
+		if ((error = hfs_fsync(btvp, waitfor, 0, p)))
+			allerror = error;
+
+		hfs_unlock(cp);
+		vnode_put(btvp);
+	};
+
+
+#if CONFIG_HFS_STD
+	/*
+	 * Force stale file system control information to be flushed.
+	 */
+	if (vcb->vcbSigWord == kHFSSigWord) {
+		if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
+			allerror = error;
+		}
+	}
+#endif
+
+#if QUOTA
+	hfs_qsync(mp);
+#endif /* QUOTA */
+
+	hfs_hotfilesync(hfsmp, vfs_context_kernel());
+
+	/*
+	 * Write back modified superblock.
+	 */
+	if (IsVCBDirty(vcb)) {
+		error = hfs_flushvolumeheader(hfsmp, waitfor == MNT_WAIT ? HFS_FVH_WAIT : 0);
+		if (error)
+			allerror = error;
+	}
+
+	if (hfsmp->jnl) {
+	    hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
+	}
+
+	hfs_lock_mount(hfsmp);
+	boolean_t wake = (!--hfsmp->hfs_syncers
+					  && hfsmp->hfs_freeze_state == HFS_WANT_TO_FREEZE);
+	hfs_unlock_mount(hfsmp);
+	if (wake)
+		wakeup(&hfsmp->hfs_freeze_state);
+
+	return (allerror);
+}
+
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the cnode id is valid
+ * - call hfs_vget() to get the locked cnode
+ * - check for an unallocated cnode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ *   those rights via. exflagsp and credanonp
+ */
+static int
+hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
+{
+	struct hfsfid *hfsfhp;
+	struct vnode *nvp;
+	int result;
+
+	*vpp = NULL;
+	hfsfhp = (struct hfsfid *)fhp;
+
+	if (fhlen < (int)sizeof(struct hfsfid))
+		return (EINVAL);
+
+	result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
+	if (result) {
+		if (result == ENOENT)
+			result = ESTALE;
+		return result;
+	}
+
+	/* 
+	 * We used to use the create time as the gen id of the file handle,
+	 * but it is not static enough because it can change at any point 
+	 * via system calls.  We still don't have another volume ID or other
+	 * unique identifier to use for a generation ID across reboots that
+	 * persists until the file is removed.  Using only the CNID exposes
+	 * us to the potential wrap-around case, but as of 2/2008, it would take
+	 * over 2 months to wrap around if the machine did nothing but allocate
+	 * CNIDs.  Using some kind of wrap counter would only be effective if
+	 * each file had the wrap counter associated with it.  For now, 
+	 * we use only the CNID to identify the file as it's good enough.
+	 */	 
+
+	*vpp = nvp;
+
+	hfs_unlock(VTOC(nvp));
+	return (0);
+}
+
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+static int
+hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
+{
+	struct cnode *cp;
+	struct hfsfid *hfsfhp;
+
+	if (ISHFS(VTOVCB(vp)))
+		return (ENOTSUP);	/* hfs standard is not exportable */
+
+	if (*fhlenp < (int)sizeof(struct hfsfid))
+		return (EOVERFLOW);
+
+	cp = VTOC(vp);
+	hfsfhp = (struct hfsfid *)fhp;
+	/* only the CNID is used to identify the file now */
+	hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
+	hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
+	*fhlenp = sizeof(struct hfsfid);
+	
+	return (0);
+}
+
+
+/*
+ * Initialize HFS filesystems, done only once per boot.
+ *
+ * HFS is not a kext-based file system.  This makes it difficult to find 
+ * out when the last HFS file system was unmounted and call hfs_uninit() 
+ * to deallocate data structures allocated in hfs_init().  Therefore we 
+ * never deallocate memory allocated by lock attribute and group initializations 
+ * in this function.
+ */
+static int
+hfs_init(__unused struct vfsconf *vfsp)
+{
+	static int done = 0;
+
+	if (done)
+		return (0);
+	done = 1;
+	hfs_chashinit();
+
+	BTReserveSetup();
+	
+	hfs_lock_attr    = lck_attr_alloc_init();
+	hfs_group_attr   = lck_grp_attr_alloc_init();
+	hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
+	hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
+	hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
+	
+#if HFS_COMPRESSION
+	decmpfs_init();
+#endif
+
+	journal_init();
+
+	return (0);
+}
+
+
+/*
+ * Destroy all locks, mutexes and spinlocks in hfsmp on unmount or failed mount
+ */ 
+static void 
+hfs_locks_destroy(struct hfsmount *hfsmp)
+{
+
+	lck_mtx_destroy(&hfsmp->hfs_mutex, hfs_mutex_group);
+	lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
+	lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group);
+	lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
+
+	return;
+}
+
+
+static int
+hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
+{
+	struct hfsmount * hfsmp;
+	char fstypename[MFSNAMELEN];
+
+	if (vp == NULL)
+		return (EINVAL);
+	
+	if (!vnode_isvroot(vp))
+		return (EINVAL);
+
+	vnode_vfsname(vp, fstypename);
+	if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
+		return (EINVAL);
+
+	hfsmp = VTOHFS(vp);
+
+	if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
+		return (EINVAL);
+
+	*hfsmpp = hfsmp;
+
+	return (0);
+}
+
+// Replace user-space value
+static errno_t ureplace(user_addr_t oldp, size_t *oldlenp,
+						user_addr_t newp, size_t newlen,
+						void *data, size_t len)
+{
+	errno_t error;
+	if (!oldlenp)
+		return EFAULT;
+	if (oldp && *oldlenp < len)
+		return ENOMEM;
+	if (newp && newlen != len)
+		return EINVAL;
+	*oldlenp = len;
+	if (oldp) {
+		error = copyout(data, oldp, len);
+		if (error)
+			return error;
+	}
+	return newp ? copyin(newp, data, len) : 0;
+}
+
+#define UREPLACE(oldp, oldlenp, newp, newlenp, v)	\
+	ureplace(oldp, oldlenp, newp, newlenp, &v, sizeof(v))
+
+static hfsmount_t *hfs_mount_from_cwd(vfs_context_t ctx)
+{
+	vnode_t vp = vfs_context_cwd(ctx);
+
+	if (!vp)
+		return NULL;
+
+	/*
+	 * We could use vnode_tag, but it is probably more future proof to
+	 * compare fstypename.
+	 */
+	char fstypename[MFSNAMELEN];
+	vnode_vfsname(vp, fstypename);
+
+	if (strcmp(fstypename, "hfs"))
+		return NULL;
+
+	return VTOHFS(vp);
+}
+
+/*
+ * HFS filesystem related variables.
+ */
+int
+hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
+			user_addr_t newp, size_t newlen, vfs_context_t context)
+{
+	int error;
+	struct hfsmount *hfsmp;
+	struct proc *p = NULL;
+
+	/* all sysctl names at this level are terminal */
+#if  TARGET_OS_OSX
+	p = vfs_context_proc(context);
+	if (name[0] == HFS_ENCODINGBIAS) {
+		int bias;
+
+		bias = hfs_getencodingbias();
+
+		error = UREPLACE(oldp, oldlenp, newp, newlen, bias);
+		if (error || !newp)
+			return error;
+
+		hfs_setencodingbias(bias);
+
+		return 0;
+	} else
+#endif //OSX
+	if (name[0] == HFS_EXTEND_FS) {
+		u_int64_t  newsize = 0;
+		vnode_t vp = vfs_context_cwd(context);
+
+		if (newp == USER_ADDR_NULL || vp == NULLVP
+			|| newlen != sizeof(quad_t) || !oldlenp)
+			return EINVAL;
+		if ((error = hfs_getmountpoint(vp, &hfsmp)))
+			return (error);
+
+		/* Start with the 'size' set to the current number of bytes in the filesystem */
+		newsize = ((uint64_t)hfsmp->totalBlocks) * ((uint64_t)hfsmp->blockSize);
+
+		error = UREPLACE(oldp, oldlenp, newp, newlen, newsize);
+		if (error)
+			return error;
+
+		return hfs_extendfs(hfsmp, newsize, context);
+	} else if (name[0] == HFS_ENABLE_JOURNALING) {
+		// make the file system journaled...
+		vnode_t jvp;
+		ExtendedVCB *vcb;
+		struct cat_attr jnl_attr;
+	    struct cat_attr	jinfo_attr;
+		struct cat_fork jnl_fork;
+		struct cat_fork jinfo_fork;
+		buf_t jib_buf;
+		uint64_t jib_blkno;
+		uint32_t tmpblkno;
+		uint64_t journal_byte_offset;
+		uint64_t journal_size;
+		vnode_t jib_vp = NULLVP;
+		struct JournalInfoBlock local_jib;
+		int err = 0;
+		void *jnl = NULL;
+		int lockflags;
+
+		/* Only root can enable journaling */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return (EPERM);
+		}
+		if (namelen != 4)
+			return EINVAL;
+		hfsmp = hfs_mount_from_cwd(context);
+		if (!hfsmp)
+			return EINVAL;
+
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return EROFS;
+		}
+		if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
+			printf("hfs: can't make a plain hfs volume journaled.\n");
+			return EINVAL;
+		}
+
+		if (hfsmp->jnl) {
+		    printf("hfs: volume %s is already journaled!\n", hfsmp->vcbVN);
+		    return EAGAIN;
+		}
+		vcb = HFSTOVCB(hfsmp);
+
+		/* Set up local copies of the initialization info */
+		tmpblkno = (uint32_t) name[1];
+		jib_blkno = (uint64_t) tmpblkno;
+		journal_byte_offset = (uint64_t) name[2];
+		journal_byte_offset *= hfsmp->blockSize;
+		journal_byte_offset += hfsmp->hfsPlusIOPosOffset;
+		journal_size = (uint64_t)((unsigned)name[3]);
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
+		if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
+			BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
+
+			printf("hfs: volume has a btree w/non-contiguous nodes.  can not enable journaling.\n");
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			return EINVAL;
+		}
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+		// make sure these both exist!
+		if (   GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
+			|| GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
+
+			return EINVAL;
+		}
+
+		/*
+		 * At this point, we have a copy of the metadata that lives in the catalog for the
+		 * journal info block.  Compare that the journal info block's single extent matches
+		 * that which was passed into this sysctl.  
+		 *
+		 * If it is different, deny the journal enable call.
+		 */
+		if (jinfo_fork.cf_blocks > 1) {
+			/* too many blocks */
+			return EINVAL;
+		}
+
+		if (jinfo_fork.cf_extents[0].startBlock != jib_blkno) {
+			/* Wrong block */
+			return EINVAL;
+		}
+
+		/*   
+		 * We want to immediately purge the vnode for the JIB.
+		 * 
+		 * Because it was written to from userland, there's probably 
+		 * a vnode somewhere in the vnode cache (possibly with UBC backed blocks). 
+		 * So we bring the vnode into core, then immediately do whatever 
+		 * we can to flush/vclean it out.  This is because those blocks will be 
+		 * interpreted as user data, which may be treated separately on some platforms
+		 * than metadata.  If the vnode is gone, then there cannot be backing blocks
+		 * in the UBC.
+		 */
+		if (hfs_vget (hfsmp, jinfo_attr.ca_fileid, &jib_vp, 1, 0)) {
+			return EINVAL;
+		} 
+		/*
+		 * Now we have a vnode for the JIB. recycle it. Because we hold an iocount
+		 * on the vnode, we'll just mark it for termination when the last iocount
+		 * (hopefully ours), is dropped.
+		 */
+		vnode_recycle (jib_vp);
+		err = vnode_put (jib_vp);
+		if (err) {
+			return EINVAL;	
+		}
+
+		/* Initialize the local copy of the JIB (just like hfs.util) */
+		memset (&local_jib, 'Z', sizeof(struct JournalInfoBlock));
+		local_jib.flags = SWAP_BE32(kJIJournalInFSMask);
+		/* Note that the JIB's offset is in bytes */
+		local_jib.offset = SWAP_BE64(journal_byte_offset);
+		local_jib.size = SWAP_BE64(journal_size);  
+
+		/* 
+		 * Now write out the local JIB.  This essentially overwrites the userland
+		 * copy of the JIB.  Read it as BLK_META to treat it as a metadata read/write.
+		 */
+		jib_buf = buf_getblk (hfsmp->hfs_devvp, 
+				jib_blkno * (hfsmp->blockSize / hfsmp->hfs_logical_block_size), 
+				hfsmp->blockSize, 0, 0, BLK_META);
+		char* buf_ptr = (char*) buf_dataptr (jib_buf);
+
+		/* Zero out the portion of the block that won't contain JIB data */
+		memset (buf_ptr, 0, hfsmp->blockSize);
+
+		bcopy(&local_jib, buf_ptr, sizeof(local_jib));
+		if (buf_bwrite (jib_buf)) {
+			return EIO;
+		}
+
+		/* Force a flush track cache */
+		hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+
+		/* Now proceed with full volume sync */
+		hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
+
+		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
+			   (off_t)name[2], (off_t)name[3]);
+
+		//
+		// XXXdbg - note that currently (Sept, 08) hfs_util does not support
+		//          enabling the journal on a separate device so it is safe
+		//          to just copy hfs_devvp here.  If hfs_util gets the ability
+		//          to dynamically enable the journal on a separate device then
+		//          we will have to do the same thing as hfs_early_journal_init()
+		//          to locate and open the journal device.
+		//
+		jvp = hfsmp->hfs_devvp;
+		jnl = journal_create(jvp, journal_byte_offset, journal_size, 
+							 hfsmp->hfs_devvp,
+							 hfsmp->hfs_logical_block_size,
+							 0,
+							 0,
+							 hfs_sync_metadata, hfsmp->hfs_mp,
+							 hfsmp->hfs_mp);
+
+		/*
+		 * Set up the trim callback function so that we can add
+		 * recently freed extents to the free extent cache once
+		 * the transaction that freed them is written to the
+		 * journal on disk.
+		 */
+		if (jnl)
+			journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
+
+		if (jnl == NULL) {
+			printf("hfs: FAILED to create the journal!\n");
+			return EIO;
+		} 
+
+		hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+		/*
+		 * Flush all dirty metadata buffers.
+		 */
+		buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
+		if (hfsmp->hfs_attribute_vp)
+			buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
+
+		HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
+		HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
+		hfsmp->jvp = jvp;
+		hfsmp->jnl = jnl;
+
+		// save this off for the hack-y check in hfs_remove()
+		hfsmp->jnl_start        = (u_int32_t)name[2];
+		hfsmp->jnl_size         = (off_t)((unsigned)name[3]);
+		hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
+		hfsmp->hfs_jnlfileid    = jnl_attr.ca_fileid;
+
+		vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+
+		hfs_unlock_global (hfsmp);
+		hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+
+		{
+			fsid_t fsid;
+		
+			fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
+			fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
+			vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
+		}
+		return 0;
+	} else if (name[0] == HFS_DISABLE_JOURNALING) {
+		// clear the journaling bit
+
+		/* Only root can disable journaling */
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return (EPERM);
+		}
+
+		hfsmp = hfs_mount_from_cwd(context);
+		if (!hfsmp)
+			return EINVAL;
+
+		/* 
+		 * Disabling journaling is disallowed on volumes with directory hard links
+		 * because we have not tested the relevant code path.
+		 */  
+		if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
+			printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
+			return EPERM;
+		}
+
+		printf("hfs: disabling journaling for %s\n", hfsmp->vcbVN);
+
+		hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+		// Lights out for you buddy!
+		journal_close(hfsmp->jnl);
+		hfsmp->jnl = NULL;
+
+		hfs_close_jvp(hfsmp);
+		vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+		hfsmp->jnl_start        = 0;
+		hfsmp->hfs_jnlinfoblkid = 0;
+		hfsmp->hfs_jnlfileid    = 0;
+		
+		HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
+		
+		hfs_unlock_global (hfsmp);
+
+		hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
+
+		{
+			fsid_t fsid;
+		
+			fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
+			fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
+			vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
+		}
+		return 0;
+	} else if (name[0] == VFS_CTL_QUERY) {
+#if TARGET_OS_IPHONE 
+		return EPERM;
+#else //!TARGET_OS_IPHONE
+    	struct sysctl_req *req;
+    	union union_vfsidctl vc;
+    	struct mount *mp;
+ 	    struct vfsquery vq;
+	
+		req = CAST_DOWN(struct sysctl_req *, oldp);	/* we're new style vfs sysctl. */
+		if (req == NULL) {
+			return EFAULT;
+		}
+        
+        error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
+		if (error) return (error);
+
+		mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
+        if (mp == NULL) return (ENOENT);
+        
+		hfsmp = VFSTOHFS(mp);
+		bzero(&vq, sizeof(vq));
+		vq.vq_flags = hfsmp->hfs_notification_conditions;
+		return SYSCTL_OUT(req, &vq, sizeof(vq));;
+#endif // TARGET_OS_IPHONE
+	} else if (name[0] == HFS_REPLAY_JOURNAL) {
+		vnode_t devvp = NULL;
+		int device_fd;
+		if (namelen != 2) {
+			return (EINVAL);
+		}
+		device_fd = name[1];
+		error = file_vnode(device_fd, &devvp);
+		if (error) {
+			return error;
+		}
+		error = vnode_getwithref(devvp);
+		if (error) {
+			file_drop(device_fd);
+			return error;
+		}
+		error = hfs_journal_replay(devvp, context);
+		file_drop(device_fd);
+		vnode_put(devvp);
+		return error;
+	}
+#if DEBUG || TARGET_OS_OSX
+	else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
+		if (!kauth_cred_issuser(kauth_cred_get())) {
+			return (EPERM);
+		}
+
+		int old = hfs_resize_debug;
+
+		int res = UREPLACE(oldp, oldlenp, newp, newlen, hfs_resize_debug);
+
+		if (old != hfs_resize_debug) {
+			printf("hfs: %s resize debug\n",
+				   hfs_resize_debug ? "enabled" : "disabled");
+		}
+
+		return res;
+	}
+#endif // DEBUG || OSX
+
+	return (ENOTSUP);
+}
+
+/* 
+ * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
+ * the build_path ioctl.  We use it to leverage the code below that updates
+ * the origin list cache if necessary
+ */
+
+int
+hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
+{
+	int error;
+	int lockflags;
+	struct hfsmount *hfsmp;
+
+	hfsmp = VFSTOHFS(mp);
+
+	error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
+	if (error)
+		return error;
+
+	/*
+	 * If the look-up was via the object ID (rather than the link ID),
+	 * then we make sure there's a parent here.  We can't leave this
+	 * until hfs_vnop_getattr because if there's a problem getting the
+	 * parent at that point, all the caller will do is call
+	 * hfs_vfs_vget again and we'll end up in an infinite loop.
+	 */
+
+	cnode_t *cp = VTOC(*vpp);
+
+	if (ISSET(cp->c_flag, C_HARDLINK) && ino == cp->c_fileid) {
+		hfs_lock_always(cp, HFS_SHARED_LOCK);
+
+		if (!hfs_haslinkorigin(cp)) {
+			if (!hfs_lock_upgrade(cp))
+				hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+
+			if (cp->c_cnid == cp->c_fileid) {
+				/*
+				 * Descriptor is stale, so we need to refresh it.  We
+				 * pick the first link.
+				 */
+				cnid_t link_id;
+
+				error = hfs_first_link(hfsmp, cp, &link_id);
+
+				if (!error) {
+					lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+					error = cat_findname(hfsmp, link_id, &cp->c_desc);
+					hfs_systemfile_unlock(hfsmp, lockflags);
+				}
+			} else {
+				// We'll use whatever link the descriptor happens to have
+				error = 0;
+			}
+			if (!error)
+				hfs_savelinkorigin(cp, cp->c_parentcnid);
+		}
+
+		hfs_unlock(cp);
+
+		if (error) {
+			vnode_put(*vpp);
+			*vpp = NULL;
+		}
+	}
+
+	return error;
+}
+
+
+/*
+ * Look up an HFS object by ID.
+ *
+ * The object is returned with an iocount reference and the cnode locked.
+ *
+ * If the object is a file then it will represent the data fork.
+ */
+int
+hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
+{
+	struct vnode *vp = NULLVP;
+	struct cat_desc cndesc;
+	struct cat_attr cnattr;
+	struct cat_fork cnfork;
+	u_int32_t linkref = 0;
+	int error;
+	
+	/* Check for cnids that should't be exported. */
+	if ((cnid < kHFSFirstUserCatalogNodeID) &&
+	    (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
+		return (ENOENT);
+	}
+	/* Don't export our private directories. */
+	if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+	    cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+		return (ENOENT);
+	}
+	/*
+	 * Check the hash first
+	 */
+	vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
+	if (vp) {
+		*vpp = vp;
+		return(0);
+	}
+
+	bzero(&cndesc, sizeof(cndesc));
+	bzero(&cnattr, sizeof(cnattr));
+	bzero(&cnfork, sizeof(cnfork));
+
+	/*
+	 * Not in hash, lookup in catalog
+	 */
+	if (cnid == kHFSRootParentID) {
+		static char hfs_rootname[] = "/";
+
+		cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
+		cndesc.cd_namelen = 1;
+		cndesc.cd_parentcnid = kHFSRootParentID;
+		cndesc.cd_cnid = kHFSRootFolderID;
+		cndesc.cd_flags = CD_ISDIR;
+
+		cnattr.ca_fileid = kHFSRootFolderID;
+		cnattr.ca_linkcount = 1;
+		cnattr.ca_entries = 1;
+		cnattr.ca_dircount = 1;
+		cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
+	} else {
+		int lockflags;
+		cnid_t pid;
+		const char *nameptr;
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+		if (error) {
+			*vpp = NULL;
+			return (error);
+		}
+
+		/*
+		 * Check for a raw hardlink inode and save its linkref.
+		 */
+		pid = cndesc.cd_parentcnid;
+		nameptr = (const char *)cndesc.cd_nameptr;
+
+		if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
+			cndesc.cd_namelen > HFS_INODE_PREFIX_LEN &&
+		    (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
+			linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
+
+		} else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
+				   cndesc.cd_namelen > HFS_DIRINODE_PREFIX_LEN &&
+		           (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
+			linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
+
+		} else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
+				   cndesc.cd_namelen > HFS_DELETE_PREFIX_LEN &&
+		           (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
+			*vpp = NULL;
+			cat_releasedesc(&cndesc);
+			return (ENOENT);  /* open unlinked file */
+		}
+	}
+
+	/*
+	 * Finish initializing cnode descriptor for hardlinks.
+	 *
+	 * We need a valid name and parent for reverse lookups.
+	 */
+	if (linkref) {
+		cnid_t lastid;
+		struct cat_desc linkdesc;
+		int linkerr = 0;
+		
+		cnattr.ca_linkref = linkref;
+		bzero (&linkdesc, sizeof (linkdesc));
+
+		/* 
+		 * If the caller supplied the raw inode value, then we don't know exactly
+		 * which hardlink they wanted. It's likely that they acquired the raw inode
+		 * value BEFORE the item became a hardlink, in which case, they probably
+		 * want the oldest link.  So request the oldest link from the catalog.
+		 * 
+		 * Unfortunately, this requires that we iterate through all N hardlinks. On the plus
+		 * side, since we know that we want the last linkID, we can also have this one
+		 * call give us back the name of the last ID, since it's going to have it in-hand...
+		 */
+		linkerr = hfs_lookup_lastlink (hfsmp, linkref, &lastid, &linkdesc);
+		if ((linkerr == 0) && (lastid != 0)) {
+			/* 
+			 * Release any lingering buffers attached to our local descriptor.
+			 * Then copy the name and other business into the cndesc 
+			 */
+			cat_releasedesc (&cndesc);
+			bcopy (&linkdesc, &cndesc, sizeof(linkdesc));	
+		}	
+		/* If it failed, the linkref code will just use whatever it had in-hand below. */
+	}
+
+	if (linkref) {
+		int newvnode_flags = 0;
+		
+		error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
+								&cnfork, &vp, &newvnode_flags);
+		if (error == 0) {
+			VTOC(vp)->c_flag |= C_HARDLINK;
+			vnode_setmultipath(vp);
+		}
+	} else {
+		int newvnode_flags = 0;
+
+		void *buf = hfs_malloc(MAXPATHLEN);
+
+		/* Supply hfs_getnewvnode with a component name. */
+		struct componentname cn = {
+			.cn_nameiop = LOOKUP,
+			.cn_flags	= ISLASTCN,
+			.cn_pnlen	= MAXPATHLEN,
+			.cn_namelen = cndesc.cd_namelen,
+			.cn_pnbuf	= buf,
+			.cn_nameptr = buf
+		};
+
+		bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
+
+		error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, 
+								&cnfork, &vp, &newvnode_flags);
+
+		if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
+			hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
+		}
+
+		hfs_free(buf, MAXPATHLEN);
+	}
+	cat_releasedesc(&cndesc);
+
+	*vpp = vp;
+	if (vp && skiplock) {
+		hfs_unlock(VTOC(vp));
+	}
+	return (error);
+}
+
+
+/*
+ * Flush out all the files in a filesystem.
+ */
+static int
+#if QUOTA
+hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
+#else
+hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
+#endif /* QUOTA */
+{
+	struct hfsmount *hfsmp;
+	struct vnode *skipvp = NULLVP;
+	int error;
+	int accounted_root_usecounts;
+#if QUOTA
+	int i;
+#endif
+
+	hfsmp = VFSTOHFS(mp);
+
+	accounted_root_usecounts = 0;
+#if QUOTA
+	/*
+	 * The open quota files have an indirect reference on
+	 * the root directory vnode.  We must account for this
+	 * extra reference when doing the intial vflush.
+	 */
+	if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
+		/* Find out how many quota files we have open. */
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
+				++accounted_root_usecounts;
+		}
+	}
+#endif /* QUOTA */
+
+	if (accounted_root_usecounts > 0) {
+		/* Obtain the root vnode so we can skip over it. */
+		skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
+	}
+
+	error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
+	if (error != 0)
+		return(error);
+
+	error = vflush(mp, skipvp, SKIPSYSTEM | flags);
+
+	if (skipvp) {
+		/*
+		 * See if there are additional references on the
+		 * root vp besides the ones obtained from the open
+		 * quota files and CoreStorage.
+		 */
+		if ((error == 0) &&
+		    (vnode_isinuse(skipvp,  accounted_root_usecounts))) {
+			error = EBUSY;  /* root directory is still open */
+		}
+		hfs_unlock(VTOC(skipvp));
+		/* release the iocount from the hfs_chash_getvnode call above. */
+		vnode_put(skipvp);
+	}
+	if (error && (flags & FORCECLOSE) == 0)
+		return (error);
+
+#if QUOTA
+	if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
+				continue;
+			hfs_quotaoff(p, mp, i);
+		}
+	}
+#endif /* QUOTA */
+
+	if (skipvp) {
+		error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
+	}
+
+	return (error);
+}
+
+/*
+ * Update volume encoding bitmap (HFS Plus only)
+ * 
+ * Mark a legacy text encoding as in-use (as needed)
+ * in the volume header of this HFS+ filesystem.
+ */
+void
+hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
+{
+#define  kIndexMacUkrainian	48  /* MacUkrainian encoding is 152 */
+#define  kIndexMacFarsi		49  /* MacFarsi encoding is 140 */
+
+	u_int32_t	index;
+
+	switch (encoding) {
+	case kTextEncodingMacUkrainian:
+		index = kIndexMacUkrainian;
+		break;
+	case kTextEncodingMacFarsi:
+		index = kIndexMacFarsi;
+		break;
+	default:
+		index = encoding;
+		break;
+	}
+
+	/* Only mark the encoding as in-use if it wasn't already set */
+	if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
+		hfs_lock_mount (hfsmp);
+		hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
+		MarkVCBDirty(hfsmp);
+		hfs_unlock_mount(hfsmp);
+	}
+}
+
+/*
+ * Update volume stats
+ *
+ * On journal volumes this will cause a volume header flush
+ */
+int
+hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
+{
+	struct timeval tv;
+
+	microtime(&tv);
+
+	hfs_lock_mount (hfsmp);
+
+	MarkVCBDirty(hfsmp);
+	hfsmp->hfs_mtime = tv.tv_sec;
+
+	switch (op) {
+	case VOL_UPDATE:
+		break;
+	case VOL_MKDIR:
+		if (hfsmp->hfs_dircount != 0xFFFFFFFF)
+			++hfsmp->hfs_dircount;
+		if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
+			++hfsmp->vcbNmRtDirs;
+		break;
+	case VOL_RMDIR:
+		if (hfsmp->hfs_dircount != 0)
+			--hfsmp->hfs_dircount;
+		if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
+			--hfsmp->vcbNmRtDirs;
+		break;
+	case VOL_MKFILE:
+		if (hfsmp->hfs_filecount != 0xFFFFFFFF)
+			++hfsmp->hfs_filecount;
+		if (inroot && hfsmp->vcbNmFls != 0xFFFF)
+			++hfsmp->vcbNmFls;
+		break;
+	case VOL_RMFILE:
+		if (hfsmp->hfs_filecount != 0)
+			--hfsmp->hfs_filecount;
+		if (inroot && hfsmp->vcbNmFls != 0xFFFF)
+			--hfsmp->vcbNmFls;
+		break;
+	}
+
+	hfs_unlock_mount (hfsmp);
+
+	if (hfsmp->jnl) {
+		hfs_flushvolumeheader(hfsmp, 0);
+	}
+
+	return (0);
+}
+
+
+#if CONFIG_HFS_STD
+/* HFS Standard MDB flush */
+static int
+hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
+{
+	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
+	struct filefork *fp;
+	HFSMasterDirectoryBlock	*mdb;
+	struct buf *bp = NULL;
+	int retval;
+	int sector_size;
+	ByteCount namelen;
+
+	sector_size = hfsmp->hfs_logical_block_size;
+	retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
+	if (retval) {
+		if (bp)
+			buf_brelse(bp);
+		return retval;
+	}
+
+	hfs_lock_mount (hfsmp);
+
+	mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
+    
+	mdb->drCrDate	= SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
+	mdb->drLsMod	= SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
+	mdb->drAtrb	= SWAP_BE16 (vcb->vcbAtrb);
+	mdb->drNmFls	= SWAP_BE16 (vcb->vcbNmFls);
+	mdb->drAllocPtr	= SWAP_BE16 (vcb->nextAllocation);
+	mdb->drClpSiz	= SWAP_BE32 (vcb->vcbClpSiz);
+	mdb->drNxtCNID	= SWAP_BE32 (vcb->vcbNxtCNID);
+	mdb->drFreeBks	= SWAP_BE16 (vcb->freeBlocks);
+
+	namelen = strlen((char *)vcb->vcbVN);
+	retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
+	/* Retry with MacRoman in case that's how it was exported. */
+	if (retval)
+		retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
+	
+	mdb->drVolBkUp	= SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
+	mdb->drWrCnt	= SWAP_BE32 (vcb->vcbWrCnt);
+	mdb->drNmRtDirs	= SWAP_BE16 (vcb->vcbNmRtDirs);
+	mdb->drFilCnt	= SWAP_BE32 (vcb->vcbFilCnt);
+	mdb->drDirCnt	= SWAP_BE32 (vcb->vcbDirCnt);
+	
+	bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
+
+	fp = VTOF(vcb->extentsRefNum);
+	mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
+	mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
+	mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
+	mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
+	mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
+	mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
+	mdb->drXTFlSize	= SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
+	mdb->drXTClpSiz	= SWAP_BE32 (fp->ff_clumpsize);
+	FTOC(fp)->c_flag &= ~C_MODIFIED;
+	
+	fp = VTOF(vcb->catalogRefNum);
+	mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
+	mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
+	mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
+	mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
+	mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
+	mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
+	mdb->drCTFlSize	= SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
+	mdb->drCTClpSiz	= SWAP_BE32 (fp->ff_clumpsize);
+	FTOC(fp)->c_flag &= ~C_MODIFIED;
+
+	MarkVCBClean( vcb );
+
+	hfs_unlock_mount (hfsmp);
+
+	/* If requested, flush out the alternate MDB */
+	if (altflush) {
+		struct buf *alt_bp = NULL;
+
+		if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_partition_avh_sector, sector_size, NOCRED, &alt_bp) == 0) {
+			bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
+
+			(void) VNOP_BWRITE(alt_bp);
+		} else if (alt_bp)
+			buf_brelse(alt_bp);
+	}
+
+	if (waitfor != MNT_WAIT)
+		buf_bawrite(bp);
+	else 
+		retval = VNOP_BWRITE(bp);
+
+	return (retval);
+}
+#endif
+
+/*
+ *  Flush any dirty in-memory mount data to the on-disk
+ *  volume header.
+ *
+ *  Note: the on-disk volume signature is intentionally
+ *  not flushed since the on-disk "H+" and "HX" signatures
+ *  are always stored in-memory as "H+".
+ */
+int
+hfs_flushvolumeheader(struct hfsmount *hfsmp, 
+					  hfs_flush_volume_header_options_t options)
+{
+	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
+	struct filefork *fp;
+	HFSPlusVolumeHeader *volumeHeader, *altVH;
+	int retval;
+	struct buf *bp, *alt_bp;
+	int i;
+	daddr64_t priIDSector;
+	bool critical = false;
+	u_int16_t  signature;
+	u_int16_t  hfsversion;
+	daddr64_t avh_sector;
+	bool altflush = ISSET(options, HFS_FVH_WRITE_ALT);
+
+	if (ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)
+		&& !hfs_header_needs_flushing(hfsmp)) {
+		return 0;
+	}
+
+	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		return(0);
+	}
+#if CONFIG_HFS_STD
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return hfs_flushMDB(hfsmp, ISSET(options, HFS_FVH_WAIT) ? MNT_WAIT : 0, altflush);
+	}
+#endif
+	priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+				  HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    return EINVAL;
+	}
+
+	bp = NULL;
+	alt_bp = NULL;
+
+	retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+			HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+			hfsmp->hfs_physical_block_size, NOCRED, &bp);
+	if (retval) {
+		printf("hfs: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN);
+		goto err_exit;
+	}
+
+	volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + 
+			HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
+
+	/*
+	 * Sanity check what we just read.  If it's bad, try the alternate
+	 * instead.
+	 */
+	signature = SWAP_BE16 (volumeHeader->signature);
+	hfsversion   = SWAP_BE16 (volumeHeader->version);
+	if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
+	    (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
+	    (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
+		printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d\n",
+			       	vcb->vcbVN, signature, hfsversion, 
+				SWAP_BE32 (volumeHeader->blockSize));
+		hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
+
+		/* Almost always we read AVH relative to the partition size */
+		avh_sector = hfsmp->hfs_partition_avh_sector;
+
+		if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector) {
+			/* 
+			 * The two altVH offsets do not match --- which means that a smaller file 
+			 * system exists in a larger partition.  Verify that we have the correct 
+			 * alternate volume header sector as per the current parititon size.  
+			 * The GPT device that we are mounted on top could have changed sizes 
+			 * without us knowing. 
+			 *
+			 * We're in a transaction, so it's safe to modify the partition_avh_sector 
+			 * field if necessary.
+			 */
+
+			uint64_t sector_count;
+
+			/* Get underlying device block count */
+			if ((retval = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCGETBLOCKCOUNT, 
+							(caddr_t)&sector_count, 0, vfs_context_current()))) {
+				printf("hfs_flushVH: err %d getting block count (%s) \n", retval, vcb->vcbVN);
+				retval = ENXIO;
+				goto err_exit;
+			}
+			
+			/* Partition size was changed without our knowledge */
+			if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count) {
+				hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 
+					HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count);
+				/* Note: hfs_fs_avh_sector will remain unchanged */
+				printf ("hfs_flushVH: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n",
+						hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+
+				/* 
+				 * We just updated the offset for AVH relative to 
+				 * the partition size, so the content of that AVH
+				 * will be invalid.  But since we are also maintaining 
+				 * a valid AVH relative to the file system size, we 
+				 * can read it since primary VH and partition AVH 
+				 * are not valid. 
+				 */
+				avh_sector = hfsmp->hfs_fs_avh_sector;
+			}
+		}
+
+		printf ("hfs: trying alternate (for %s) avh_sector=%qu\n", 
+				(avh_sector == hfsmp->hfs_fs_avh_sector) ? "file system" : "partition", avh_sector);
+
+		if (avh_sector) {
+			retval = buf_meta_bread(hfsmp->hfs_devvp, 
+			    HFS_PHYSBLK_ROUNDDOWN(avh_sector, hfsmp->hfs_log_per_phys),
+			    hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
+			if (retval) {
+				printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
+				goto err_exit;
+			}
+			
+			altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) + 
+				HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
+			signature = SWAP_BE16(altVH->signature);
+			hfsversion = SWAP_BE16(altVH->version);
+			
+			if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
+			    (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
+			    (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
+				printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
+				    vcb->vcbVN, signature, hfsversion,
+				    SWAP_BE32(altVH->blockSize));
+				retval = EIO;
+				goto err_exit;
+			}
+			
+			/* The alternate is plausible, so use it. */
+			bcopy(altVH, volumeHeader, kMDBSize);
+			buf_brelse(alt_bp);
+			alt_bp = NULL;
+		} else {
+			/* No alternate VH, nothing more we can do. */
+			retval = EIO;
+			goto err_exit;
+		}
+	}
+
+	if (hfsmp->jnl) {
+		journal_modify_block_start(hfsmp->jnl, bp);
+	}
+
+	/*
+	 * For embedded HFS+ volumes, update create date if it changed
+	 * (ie from a setattrlist call)
+	 */
+	if ((vcb->hfsPlusIOPosOffset != 0) &&
+	    (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
+		struct buf *bp2;
+		HFSMasterDirectoryBlock	*mdb;
+
+		retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+				HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
+				hfsmp->hfs_physical_block_size, NOCRED, &bp2);
+		if (retval) {
+			if (bp2)
+				buf_brelse(bp2);
+			retval = 0;
+		} else {
+			mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
+				HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
+
+			if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
+			  {
+				if (hfsmp->jnl) {
+				    journal_modify_block_start(hfsmp->jnl, bp2);
+				}
+
+				mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate);	/* pick up the new create date */
+
+				if (hfsmp->jnl) {
+					journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
+				} else {
+					(void) VNOP_BWRITE(bp2);		/* write out the changes */
+				}
+			  }
+			else
+			  {
+				buf_brelse(bp2);						/* just release it */
+			  }
+		  }	
+	}
+
+	hfs_lock_mount (hfsmp);
+
+	/* Note: only update the lower 16 bits worth of attributes */
+	volumeHeader->attributes       = SWAP_BE32 (vcb->vcbAtrb);
+	volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
+	if (hfsmp->jnl) {
+		volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
+	} else {
+		volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
+	}
+	volumeHeader->createDate	= SWAP_BE32 (vcb->localCreateDate);  /* volume create date is in local time */
+	volumeHeader->modifyDate	= SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
+	volumeHeader->backupDate	= SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
+	volumeHeader->fileCount		= SWAP_BE32 (vcb->vcbFilCnt);
+	volumeHeader->folderCount	= SWAP_BE32 (vcb->vcbDirCnt);
+	volumeHeader->totalBlocks	= SWAP_BE32 (vcb->totalBlocks);
+	volumeHeader->freeBlocks	= SWAP_BE32 (vcb->freeBlocks + vcb->reclaimBlocks);
+	volumeHeader->nextAllocation	= SWAP_BE32 (vcb->nextAllocation);
+	volumeHeader->rsrcClumpSize	= SWAP_BE32 (vcb->vcbClpSiz);
+	volumeHeader->dataClumpSize	= SWAP_BE32 (vcb->vcbClpSiz);
+	volumeHeader->nextCatalogID	= SWAP_BE32 (vcb->vcbNxtCNID);
+	volumeHeader->writeCount	= SWAP_BE32 (vcb->vcbWrCnt);
+	volumeHeader->encodingsBitmap	= SWAP_BE64 (vcb->encodingsBitmap);
+
+	if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
+		bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
+		critical = true;
+	}
+
+	if (!altflush && !ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)) {
+		goto done;
+	}
+
+	/* Sync Extents over-flow file meta data */
+	fp = VTOF(vcb->extentsRefNum);
+	if (FTOC(fp)->c_flag & C_MODIFIED) {
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			volumeHeader->extentsFile.extents[i].startBlock	=
+				SWAP_BE32 (fp->ff_extents[i].startBlock);
+			volumeHeader->extentsFile.extents[i].blockCount	=
+				SWAP_BE32 (fp->ff_extents[i].blockCount);
+		}
+		volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
+		volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
+		volumeHeader->extentsFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
+		FTOC(fp)->c_flag &= ~C_MODIFIED;
+		altflush = true;
+	}
+
+	/* Sync Catalog file meta data */
+	fp = VTOF(vcb->catalogRefNum);
+	if (FTOC(fp)->c_flag & C_MODIFIED) {
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			volumeHeader->catalogFile.extents[i].startBlock	=
+				SWAP_BE32 (fp->ff_extents[i].startBlock);
+			volumeHeader->catalogFile.extents[i].blockCount	=
+				SWAP_BE32 (fp->ff_extents[i].blockCount);
+		}
+		volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
+		volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
+		volumeHeader->catalogFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
+		FTOC(fp)->c_flag &= ~C_MODIFIED;
+		altflush = true;
+	}
+
+	/* Sync Allocation file meta data */
+	fp = VTOF(vcb->allocationsRefNum);
+	if (FTOC(fp)->c_flag & C_MODIFIED) {
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			volumeHeader->allocationFile.extents[i].startBlock =
+				SWAP_BE32 (fp->ff_extents[i].startBlock);
+			volumeHeader->allocationFile.extents[i].blockCount =
+				SWAP_BE32 (fp->ff_extents[i].blockCount);
+		}
+		volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
+		volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
+		volumeHeader->allocationFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
+		FTOC(fp)->c_flag &= ~C_MODIFIED;
+		altflush = true;
+	}
+
+	/* Sync Attribute file meta data */
+	if (hfsmp->hfs_attribute_vp) {
+		fp = VTOF(hfsmp->hfs_attribute_vp);
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			volumeHeader->attributesFile.extents[i].startBlock =
+				SWAP_BE32 (fp->ff_extents[i].startBlock);
+			volumeHeader->attributesFile.extents[i].blockCount =
+				SWAP_BE32 (fp->ff_extents[i].blockCount);
+		}
+		if (ISSET(FTOC(fp)->c_flag, C_MODIFIED)) {
+			FTOC(fp)->c_flag &= ~C_MODIFIED;
+			altflush = true;
+		}
+		volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
+		volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
+		volumeHeader->attributesFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
+	}
+
+	/* Sync Startup file meta data */
+	if (hfsmp->hfs_startup_vp) {
+		fp = VTOF(hfsmp->hfs_startup_vp);
+		if (FTOC(fp)->c_flag & C_MODIFIED) {
+			for (i = 0; i < kHFSPlusExtentDensity; i++) {
+				volumeHeader->startupFile.extents[i].startBlock =
+					SWAP_BE32 (fp->ff_extents[i].startBlock);
+				volumeHeader->startupFile.extents[i].blockCount =
+					SWAP_BE32 (fp->ff_extents[i].blockCount);
+			}
+			volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
+			volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
+			volumeHeader->startupFile.clumpSize   = SWAP_BE32 (fp->ff_clumpsize);
+			FTOC(fp)->c_flag &= ~C_MODIFIED;
+			altflush = true;
+		}
+	}
+
+	if (altflush)
+		critical = true;
+ 
+done:
+	MarkVCBClean(hfsmp);
+	hfs_unlock_mount (hfsmp);
+
+	/* If requested, flush out the alternate volume header */
+	if (altflush) {
+		/* 
+		 * The two altVH offsets do not match --- which means that a smaller file 
+		 * system exists in a larger partition.  Verify that we have the correct 
+		 * alternate volume header sector as per the current parititon size.  
+		 * The GPT device that we are mounted on top could have changed sizes 
+		 * without us knowning. 
+		 *
+		 * We're in a transaction, so it's safe to modify the partition_avh_sector 
+		 * field if necessary.
+		 */
+		if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector) {
+			uint64_t sector_count;
+
+			/* Get underlying device block count */
+			if ((retval = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCGETBLOCKCOUNT, 
+							(caddr_t)&sector_count, 0, vfs_context_current()))) {
+				printf("hfs_flushVH: err %d getting block count (%s) \n", retval, vcb->vcbVN);
+				retval = ENXIO;
+				goto err_exit;
+			}
+			
+			/* Partition size was changed without our knowledge */
+			if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count) {
+				hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 
+					HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count);
+				/* Note: hfs_fs_avh_sector will remain unchanged */
+				printf ("hfs_flushVH: altflush: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n",
+						hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+			}
+		}
+
+		/*
+		 * First see if we need to write I/O to the "secondary" AVH 
+		 * located at FS Size - 1024 bytes, because this one will 
+		 * always go into the journal.  We put this AVH into the journal
+		 * because even if the filesystem size has shrunk, this LBA should be 
+		 * reachable after the partition-size modification has occurred.  
+		 * The one where we need to be careful is partitionsize-1024, since the
+		 * partition size should hopefully shrink. 
+		 *
+		 * Most of the time this block will not execute.
+		 */
+		if ((hfsmp->hfs_fs_avh_sector) && 
+				(hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)) {
+			if (buf_meta_bread(hfsmp->hfs_devvp, 
+						HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys),
+						hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
+				if (hfsmp->jnl) {
+					journal_modify_block_start(hfsmp->jnl, alt_bp);
+				}
+
+				bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + 
+						HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), 
+						kMDBSize);
+
+				if (hfsmp->jnl) {
+					journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
+				} else {
+					(void) VNOP_BWRITE(alt_bp);
+				}
+			} else if (alt_bp) {
+				buf_brelse(alt_bp);
+			}
+		}
+		
+		/* 
+		 * Flush out alternate volume header located at 1024 bytes before
+		 * end of the partition as part of journal transaction.  In 
+		 * most cases, this will be the only alternate volume header 
+		 * that we need to worry about because the file system size is 
+		 * same as the partition size, therefore hfs_fs_avh_sector is 
+		 * same as hfs_partition_avh_sector. This is the "priority" AVH. 
+		 *
+		 * However, do not always put this I/O into the journal.  If we skipped the
+		 * FS-Size AVH write above, then we will put this I/O into the journal as 
+		 * that indicates the two were in sync.  However, if the FS size is
+		 * not the same as the partition size, we are tracking two.  We don't
+		 * put it in the journal in that case, since if the partition
+		 * size changes between uptimes, and we need to replay the journal,
+		 * this I/O could generate an EIO if during replay it is now trying 
+		 * to access blocks beyond the device EOF.  
+		 */
+		if (hfsmp->hfs_partition_avh_sector) {
+			if (buf_meta_bread(hfsmp->hfs_devvp, 
+						HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_partition_avh_sector, hfsmp->hfs_log_per_phys),
+						hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
+
+				/* only one AVH, put this I/O in the journal. */
+				if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) {
+					journal_modify_block_start(hfsmp->jnl, alt_bp);
+				}
+
+				bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + 
+						HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), 
+						kMDBSize);
+
+				/* If journaled and we only have one AVH to track */
+				if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) {
+					journal_modify_block_end (hfsmp->jnl, alt_bp, NULL, NULL);
+				} else {
+					/* 
+					 * If we don't have a journal or there are two AVH's at the
+					 * moment, then this one doesn't go in the journal.  Note that 
+					 * this one may generate I/O errors, since the partition
+					 * can be resized behind our backs at any moment and this I/O 
+					 * may now appear to be beyond the device EOF.
+					 */
+					(void) VNOP_BWRITE(alt_bp);
+					hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+				}		
+			} else if (alt_bp) {
+				buf_brelse(alt_bp);
+			}
+		}
+	}
+
+	/* Finish modifying the block for the primary VH */
+	if (hfsmp->jnl) {
+		journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+	} else {
+		if (!ISSET(options, HFS_FVH_WAIT)) {
+			buf_bawrite(bp);
+		} else {
+			retval = VNOP_BWRITE(bp);
+			/* When critical data changes, flush the device cache */
+			if (critical && (retval == 0)) {
+				hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+			}
+		}
+	}
+	hfs_end_transaction(hfsmp);
+ 
+	return (retval);
+
+err_exit:
+	if (alt_bp)
+		buf_brelse(alt_bp);
+	if (bp)
+		buf_brelse(bp);
+	hfs_end_transaction(hfsmp);
+	return retval;
+}
+
+
+/*
+ * Creates a UUID from a unique "name" in the HFS UUID Name space.
+ * See version 3 UUID.
+ */
+void
+hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result_uuid)
+{
+
+	if (uuid_is_null(hfsmp->hfs_full_uuid)) {
+		uuid_t result;
+
+		MD5_CTX  md5c;
+		uint8_t  rawUUID[8];
+
+		((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
+		((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
+
+		MD5Init( &md5c );
+		MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
+		MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
+		MD5Final( result, &md5c );
+
+		result[6] = 0x30 | ( result[6] & 0x0F );
+		result[8] = 0x80 | ( result[8] & 0x3F );
+	
+		uuid_copy(hfsmp->hfs_full_uuid, result);
+	}
+	uuid_copy (result_uuid, hfsmp->hfs_full_uuid);
+
+}
+
+/*
+ * Get file system attributes.
+ */
+static int
+hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
+{
+#define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST | ATTR_FILE_CLUMPSIZE))
+#define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_DATA_PROTECT_FLAGS))
+
+	ExtendedVCB *vcb = VFSTOVCB(mp);
+	struct hfsmount *hfsmp = VFSTOHFS(mp);
+
+	int searchfs_on = 0;
+	int exchangedata_on = 1;
+
+#if CONFIG_SEARCHFS
+	searchfs_on = 1;
+#endif
+
+#if CONFIG_PROTECT
+	if (cp_fs_protected(mp)) {
+		exchangedata_on = 0;
+	}
+#endif
+
+	VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
+	VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
+	VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
+	VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
+	VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
+	VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
+	VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
+	VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
+	VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
+	/* XXX needs clarification */
+	VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
+	VFSATTR_RETURN(fsap, f_files, (u_int64_t)HFS_MAX_FILES);
+	VFSATTR_RETURN(fsap, f_ffree, (u_int64_t)hfs_free_cnids(hfsmp));
+
+	fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
+	fsap->f_fsid.val[1] = vfs_typenum(mp);
+	VFSATTR_SET_SUPPORTED(fsap, f_fsid);
+
+	VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
+	VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
+
+	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
+		vol_capabilities_attr_t *cap;
+	
+		cap = &fsap->f_capabilities;
+
+		if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
+			/* HFS+ & variants */
+			cap->capabilities[VOL_CAPABILITIES_FORMAT] =
+				VOL_CAP_FMT_PERSISTENTOBJECTIDS |
+				VOL_CAP_FMT_SYMBOLICLINKS |
+				VOL_CAP_FMT_HARDLINKS |
+				VOL_CAP_FMT_JOURNAL |
+				VOL_CAP_FMT_ZERO_RUNS |
+				(hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
+				(hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
+				VOL_CAP_FMT_CASE_PRESERVING |
+				VOL_CAP_FMT_FAST_STATFS | 
+				VOL_CAP_FMT_2TB_FILESIZE |
+				VOL_CAP_FMT_HIDDEN_FILES |
+#if HFS_COMPRESSION
+				VOL_CAP_FMT_DECMPFS_COMPRESSION |
+#endif
+#if CONFIG_HFS_DIRLINK
+				VOL_CAP_FMT_DIR_HARDLINKS |
+#endif
+#ifdef VOL_CAP_FMT_DOCUMENT_ID
+				VOL_CAP_FMT_DOCUMENT_ID |
+#endif /* VOL_CAP_FMT_DOCUMENT_ID */
+#ifdef VOL_CAP_FMT_WRITE_GENERATION_COUNT
+				VOL_CAP_FMT_WRITE_GENERATION_COUNT |
+#endif /* VOL_CAP_FMT_WRITE_GENERATION_COUNT */
+				VOL_CAP_FMT_PATH_FROM_ID;
+		}
+#if CONFIG_HFS_STD
+		else {
+			/* HFS standard */
+			cap->capabilities[VOL_CAPABILITIES_FORMAT] =
+				VOL_CAP_FMT_PERSISTENTOBJECTIDS |
+				VOL_CAP_FMT_CASE_PRESERVING |
+				VOL_CAP_FMT_FAST_STATFS |
+				VOL_CAP_FMT_HIDDEN_FILES |
+				VOL_CAP_FMT_PATH_FROM_ID;
+		}
+#endif
+
+		/*
+		 * The capabilities word in 'cap' tell you whether or not 
+		 * this particular filesystem instance has feature X enabled.
+		 */
+
+		cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
+			VOL_CAP_INT_ATTRLIST |
+			VOL_CAP_INT_NFSEXPORT |
+			VOL_CAP_INT_READDIRATTR |
+			VOL_CAP_INT_ALLOCATE |
+			VOL_CAP_INT_VOL_RENAME |
+			VOL_CAP_INT_ADVLOCK |
+			VOL_CAP_INT_FLOCK |
+#if VOL_CAP_INT_RENAME_EXCL
+			VOL_CAP_INT_RENAME_EXCL |
+#endif
+#if NAMEDSTREAMS
+			VOL_CAP_INT_EXTENDED_ATTR |
+			VOL_CAP_INT_NAMEDSTREAMS;
+#else
+			VOL_CAP_INT_EXTENDED_ATTR;
+#endif
+		
+		/* HFS may conditionally support searchfs and exchangedata depending on the runtime */
+
+		if (searchfs_on) {
+			cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_SEARCHFS;
+		}
+		if (exchangedata_on) {
+			cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXCHANGEDATA;
+		}
+
+		cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
+		cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
+
+		cap->valid[VOL_CAPABILITIES_FORMAT] =
+			VOL_CAP_FMT_PERSISTENTOBJECTIDS |
+			VOL_CAP_FMT_SYMBOLICLINKS |
+			VOL_CAP_FMT_HARDLINKS |
+			VOL_CAP_FMT_JOURNAL |
+			VOL_CAP_FMT_JOURNAL_ACTIVE |
+			VOL_CAP_FMT_NO_ROOT_TIMES |
+			VOL_CAP_FMT_SPARSE_FILES |
+			VOL_CAP_FMT_ZERO_RUNS |
+			VOL_CAP_FMT_CASE_SENSITIVE |
+			VOL_CAP_FMT_CASE_PRESERVING |
+			VOL_CAP_FMT_FAST_STATFS |
+			VOL_CAP_FMT_2TB_FILESIZE |
+			VOL_CAP_FMT_OPENDENYMODES |
+			VOL_CAP_FMT_HIDDEN_FILES |
+			VOL_CAP_FMT_PATH_FROM_ID |
+			VOL_CAP_FMT_DECMPFS_COMPRESSION |
+#ifdef VOL_CAP_FMT_DOCUMENT_ID
+			VOL_CAP_FMT_DOCUMENT_ID |
+#endif /* VOL_CAP_FMT_DOCUMENT_ID */
+#ifdef VOL_CAP_FMT_WRITE_GENERATION_COUNT
+			VOL_CAP_FMT_WRITE_GENERATION_COUNT |
+#endif /* VOL_CAP_FMT_WRITE_GENERATION_COUNT */
+			VOL_CAP_FMT_DIR_HARDLINKS;
+
+		/*
+		 * Bits in the "valid" field tell you whether or not the on-disk
+		 * format supports feature X.
+		 */
+
+		cap->valid[VOL_CAPABILITIES_INTERFACES] =
+			VOL_CAP_INT_ATTRLIST |
+			VOL_CAP_INT_NFSEXPORT |
+			VOL_CAP_INT_READDIRATTR |
+			VOL_CAP_INT_COPYFILE |
+			VOL_CAP_INT_ALLOCATE |
+			VOL_CAP_INT_VOL_RENAME |
+			VOL_CAP_INT_ADVLOCK |
+			VOL_CAP_INT_FLOCK |
+			VOL_CAP_INT_MANLOCK |
+#if VOL_CAP_INT_RENAME_EXCL
+			VOL_CAP_INT_RENAME_EXCL |
+#endif
+
+#if NAMEDSTREAMS
+			VOL_CAP_INT_EXTENDED_ATTR |
+			VOL_CAP_INT_NAMEDSTREAMS;
+#else
+			VOL_CAP_INT_EXTENDED_ATTR;
+#endif
+
+		/* HFS always supports exchangedata and searchfs in the on-disk format natively */
+		cap->valid[VOL_CAPABILITIES_INTERFACES] |= (VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_EXCHANGEDATA);
+
+
+		cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
+		cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
+		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
+	}
+	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
+		vol_attributes_attr_t *attrp = &fsap->f_attributes;
+
+        	attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
+#if CONFIG_PROTECT
+        	attrp->validattr.commonattr |= ATTR_CMN_DATA_PROTECT_FLAGS;
+#endif // CONFIG_PROTECT
+
+        	attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
+        	attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
+        	attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
+        	attrp->validattr.forkattr = 0;
+
+		attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
+#if CONFIG_PROTECT
+		attrp->nativeattr.commonattr |= ATTR_CMN_DATA_PROTECT_FLAGS;
+#endif // CONFIG_PROTECT
+		
+       	attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
+        	attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
+        	attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
+        	attrp->nativeattr.forkattr = 0;
+		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
+	}
+	fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
+	fsap->f_create_time.tv_nsec = 0;
+	VFSATTR_SET_SUPPORTED(fsap, f_create_time);
+	fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
+	fsap->f_modify_time.tv_nsec = 0;
+	VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
+	// We really don't have volume access time, they should check the root node, fake it up
+	if (VFSATTR_IS_ACTIVE(fsap, f_access_time)) {
+		struct timeval tv;
+
+		microtime(&tv);
+		fsap->f_access_time.tv_sec = tv.tv_sec;
+		fsap->f_access_time.tv_nsec = 0;
+		VFSATTR_SET_SUPPORTED(fsap, f_access_time);
+	}
+
+	fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
+	fsap->f_backup_time.tv_nsec = 0;
+	VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
+	
+	if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
+		u_int16_t subtype = 0;
+
+		/*
+		 * Subtypes (flavors) for HFS
+		 *   0:   Mac OS Extended
+		 *   1:   Mac OS Extended (Journaled) 
+		 *   2:   Mac OS Extended (Case Sensitive) 
+		 *   3:   Mac OS Extended (Case Sensitive, Journaled) 
+		 *   4 - 127:   Reserved
+		 * 128:   Mac OS Standard
+		 * 
+		 */
+		if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
+			if (hfsmp->jnl) {
+				subtype |= HFS_SUBTYPE_JOURNALED;
+			}
+			if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
+				subtype |= HFS_SUBTYPE_CASESENSITIVE;
+			}
+		}
+#if CONFIG_HFS_STD
+		else {
+			subtype = HFS_SUBTYPE_STANDARDHFS;
+		} 
+#endif
+		fsap->f_fssubtype = subtype;
+		VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
+	}
+
+	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
+		strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
+		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
+	}
+	if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
+		hfs_getvoluuid(hfsmp, fsap->f_uuid);
+		VFSATTR_SET_SUPPORTED(fsap, f_uuid);
+	}
+	return (0);
+}
+
+/*
+ * Perform a volume rename.  Requires the FS' root vp.
+ */
+static int
+hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
+{
+	ExtendedVCB *vcb = VTOVCB(vp);
+	struct cnode *cp = VTOC(vp);
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct cat_desc to_desc;
+	struct cat_desc todir_desc;
+	struct cat_desc new_desc;
+	cat_cookie_t cookie;
+	int lockflags;
+	int error = 0;
+	char converted_volname[256];
+	size_t volname_length = 0;
+	size_t conv_volname_length = 0;
+	
+
+	/*
+	 * Ignore attempts to rename a volume to a zero-length name.
+	 */
+	if (name[0] == 0)
+		return(0);
+
+	bzero(&to_desc, sizeof(to_desc));
+	bzero(&todir_desc, sizeof(todir_desc));
+	bzero(&new_desc, sizeof(new_desc));
+	bzero(&cookie, sizeof(cookie));
+
+	todir_desc.cd_parentcnid = kHFSRootParentID;
+	todir_desc.cd_cnid = kHFSRootFolderID;
+	todir_desc.cd_flags = CD_ISDIR;
+
+	to_desc.cd_nameptr = (const u_int8_t *)name;
+	to_desc.cd_namelen = strlen(name);
+	to_desc.cd_parentcnid = kHFSRootParentID;
+	to_desc.cd_cnid = cp->c_cnid;
+	to_desc.cd_flags = CD_ISDIR;
+
+	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) {
+		if ((error = hfs_start_transaction(hfsmp)) == 0) {
+			if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
+				lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+				error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
+
+				/*
+				 * If successful, update the name in the VCB, ensure it's terminated.
+				 */
+				if (error == 0) {
+					strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
+
+					volname_length = strlen ((const char*)vcb->vcbVN);
+					/* Send the volume name down to CoreStorage if necessary */	
+					error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
+					if (error == 0) {
+						(void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+					}
+					error = 0;
+				}
+				
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				cat_postflight(hfsmp, &cookie, p);
+			
+				if (error)
+					MarkVCBDirty(vcb);
+				(void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+			}
+			hfs_end_transaction(hfsmp);
+		}			
+		if (!error) {
+			/* Release old allocated name buffer */
+			if (cp->c_desc.cd_flags & CD_HASBUF) {
+				const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
+		
+				cp->c_desc.cd_nameptr = 0;
+				cp->c_desc.cd_namelen = 0;
+				cp->c_desc.cd_flags &= ~CD_HASBUF;
+				vfs_removename(tmp_name);
+			}			
+			/* Update cnode's catalog descriptor */
+			replace_desc(cp, &new_desc);
+			vcb->volumeNameEncodingHint = new_desc.cd_encoding;
+			cp->c_touch_chgtime = TRUE;
+		}
+
+		hfs_unlock(cp);
+	}
+	
+	return(error);
+}
+
+/*
+ * Get file system attributes.
+ */
+static int
+hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, vfs_context_t context)
+{
+	kauth_cred_t cred = vfs_context_ucred(context);
+	int error = 0;
+
+	/*
+	 * Must be superuser or owner of filesystem to change volume attributes
+	 */
+	if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
+		return(EACCES);
+
+	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
+		vnode_t root_vp;
+		
+		error = hfs_vfs_root(mp, &root_vp, context);
+		if (error)
+			goto out;
+
+		error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
+		(void) vnode_put(root_vp);
+		if (error)
+			goto out;
+
+		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
+	}
+
+out:
+	return error;
+}
+
+/* If a runtime corruption is detected, set the volume inconsistent 
+ * bit in the volume attributes.  The volume inconsistent bit is a persistent
+ * bit which represents that the volume is corrupt and needs repair.  
+ * The volume inconsistent bit can be set from the kernel when it detects
+ * runtime corruption or from file system repair utilities like fsck_hfs when
+ * a repair operation fails.  The bit should be cleared only from file system 
+ * verify/repair utility like fsck_hfs when a verify/repair succeeds.
+ */
+void hfs_mark_inconsistent(struct hfsmount *hfsmp,
+								  hfs_inconsistency_reason_t reason)
+{
+	hfs_lock_mount (hfsmp);
+	if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
+		hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
+		MarkVCBDirty(hfsmp);
+	}
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
+		switch (reason) {
+		case HFS_INCONSISTENCY_DETECTED:
+			printf("hfs_mark_inconsistent: Runtime corruption detected on %s, fsck will be forced on next mount.\n", 
+				   hfsmp->vcbVN);
+			break;
+		case HFS_ROLLBACK_FAILED:
+			printf("hfs_mark_inconsistent: Failed to roll back; volume `%s' might be inconsistent; fsck will be forced on next mount.\n", 
+				   hfsmp->vcbVN);
+			break;
+		case HFS_OP_INCOMPLETE:
+			printf("hfs_mark_inconsistent: Failed to complete operation; volume `%s' might be inconsistent; fsck will be forced on next mount.\n", 
+				   hfsmp->vcbVN);
+			break;
+		case HFS_FSCK_FORCED:
+			printf("hfs_mark_inconsistent: fsck requested for `%s'; fsck will be forced on next mount.\n",
+				   hfsmp->vcbVN);
+			break;	
+		}
+	}
+	hfs_unlock_mount (hfsmp);
+}
+
+/* Replay the journal on the device node provided.  Returns zero if
+ * journal replay succeeded or no journal was supposed to be replayed.
+ */
+static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
+{
+	int retval = 0;
+	int error = 0;
+
+	/* Replay allowed only on raw devices */
+	if (!vnode_ischr(devvp) && !vnode_isblk(devvp))
+		return EINVAL;
+
+	retval = hfs_mountfs(devvp, NULL, NULL, /* journal_replay_only: */ 1, context);
+	buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
+	
+	/* FSYNC the devnode to be sure all data has been flushed */
+	error = VNOP_FSYNC(devvp, MNT_WAIT, context);
+	if (error) {
+		retval = error;
+	}
+
+	return retval;
+}
+
+
+/* 
+ * Cancel the syncer
+ */
+static void
+hfs_syncer_free(struct hfsmount *hfsmp)
+{
+    if (hfsmp && ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)) {
+        hfs_syncer_lock(hfsmp);
+		CLR(hfsmp->hfs_flags, HFS_RUN_SYNCER);
+        hfs_syncer_unlock(hfsmp);
+
+        // Wait for the syncer thread to finish
+        if (hfsmp->hfs_syncer_thread) {
+			hfs_syncer_wakeup(hfsmp);
+			hfs_syncer_lock(hfsmp);
+			while (hfsmp->hfs_syncer_thread)
+				hfs_syncer_wait(hfsmp, NULL);
+			hfs_syncer_unlock(hfsmp);
+        }
+    }
+}
+
+static int hfs_vfs_ioctl(struct mount *mp, u_long command, caddr_t data,
+						 __unused int flags, __unused vfs_context_t context)
+{
+	switch (command) {
+#if CONFIG_PROTECT
+	case FIODEVICELOCKED:
+		cp_device_locked_callback(mp, (cp_lock_state_t)data);
+		return 0;
+#endif
+	}
+	return ENOTTY;
+}
+
+/*
+ * hfs vfs operations.
+ */
+const struct vfsops hfs_vfsops = {
+	.vfs_mount     = hfs_mount,
+	.vfs_start     = hfs_start,
+	.vfs_unmount   = hfs_unmount,
+	.vfs_root      = hfs_vfs_root,
+	.vfs_quotactl  = hfs_quotactl,
+	.vfs_getattr   = hfs_vfs_getattr,
+	.vfs_sync      = hfs_sync,
+	.vfs_vget      = hfs_vfs_vget,
+	.vfs_fhtovp    = hfs_fhtovp,
+	.vfs_vptofh    = hfs_vptofh,
+	.vfs_init      = hfs_init,
+	.vfs_sysctl    = hfs_sysctl,
+	.vfs_setattr   = hfs_vfs_setattr,
+	.vfs_ioctl	   = hfs_vfs_ioctl,
+};
diff --git a/core/hfs_vfsutils.c b/core/hfs_vfsutils.c
new file mode 100644
index 0000000..fa2d856
--- /dev/null
+++ b/core/hfs_vfsutils.c
@@ -0,0 +1,4462 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*	@(#)hfs_vfsutils.c	4.0
+*
+*	(c) 1997-2002 Apple Inc.  All Rights Reserved
+*
+*	hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
+*
+*/
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/vm.h>
+#include <sys/buf.h>
+#include <sys/ubc.h>
+#include <sys/unistd.h>
+#include <sys/utfconv.h>
+#include <sys/kauth.h>
+#include <sys/fcntl.h>
+#include <sys/fsctl.h>
+#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <kern/clock.h>
+#include <stdbool.h>
+#include <miscfs/specfs/specdev.h>
+#include <libkern/OSAtomic.h>
+#include <IOKit/IOLib.h>
+
+/* for parsing boot-args */
+#include <pexpert/pexpert.h>
+#include <kern/kalloc.h>
+#include <kern/zalloc.h>
+
+#include "hfs_iokit.h"
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_dbg.h"
+#include "hfs_mount.h"
+#include "hfs_endian.h"
+#include "hfs_cnode.h"
+#include "hfs_fsctl.h"
+#include "hfs_cprotect.h"
+
+#include "FileMgrInternal.h"
+#include "BTreesInternal.h"
+#include "HFSUnicodeWrappers.h"
+
+/* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
+extern int hfs_resize_debug;
+
+static void ReleaseMetaFileVNode(struct vnode *vp);
+static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
+
+static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
+static void hfs_thaw_locked(struct hfsmount *hfsmp);
+
+#define HFS_MOUNT_DEBUG 1
+
+
+//*******************************************************************************
+// Note: Finder information in the HFS/HFS+ metadata are considered opaque and
+//       hence are not in the right byte order on little endian machines. It is
+//       the responsibility of the finder and other clients to swap the data.
+//*******************************************************************************
+
+//*******************************************************************************
+//	Routine:	hfs_MountHFSVolume
+//
+//
+//*******************************************************************************
+unsigned char hfs_catname[] = "Catalog B-tree";
+unsigned char hfs_extname[] = "Extents B-tree";
+unsigned char hfs_vbmname[] = "Volume Bitmap";
+unsigned char hfs_attrname[] = "Attribute B-tree";
+unsigned char hfs_startupname[] = "Startup File";
+
+#if CONFIG_HFS_STD
+OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
+		__unused struct proc *p)
+{
+	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
+	int error;
+	ByteCount utf8chars;
+	struct cat_desc cndesc;
+	struct cat_attr cnattr;
+	struct cat_fork fork;
+	int newvnode_flags = 0;
+
+	/* Block size must be a multiple of 512 */
+	if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
+	    (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
+		return (EINVAL);
+
+	/* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
+	if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
+	    ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
+		return (EINVAL);
+	}
+	hfsmp->hfs_flags |= HFS_STANDARD;
+	/*
+	 * The MDB seems OK: transfer info from it into VCB
+	 * Note - the VCB starts out clear (all zeros)
+	 *
+	 */
+	vcb->vcbSigWord		= SWAP_BE16 (mdb->drSigWord);
+	vcb->hfs_itime		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
+	vcb->localCreateDate	= SWAP_BE32 (mdb->drCrDate);
+	vcb->vcbLsMod		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
+	vcb->vcbAtrb		= SWAP_BE16 (mdb->drAtrb);
+	vcb->vcbNmFls		= SWAP_BE16 (mdb->drNmFls);
+	vcb->vcbVBMSt		= SWAP_BE16 (mdb->drVBMSt);
+	vcb->nextAllocation	= SWAP_BE16 (mdb->drAllocPtr);
+	vcb->totalBlocks	= SWAP_BE16 (mdb->drNmAlBlks);
+	vcb->allocLimit		= vcb->totalBlocks;
+	vcb->blockSize		= SWAP_BE32 (mdb->drAlBlkSiz);
+	vcb->vcbClpSiz		= SWAP_BE32 (mdb->drClpSiz);
+	vcb->vcbAlBlSt		= SWAP_BE16 (mdb->drAlBlSt);
+	vcb->vcbNxtCNID		= SWAP_BE32 (mdb->drNxtCNID);
+	vcb->freeBlocks		= SWAP_BE16 (mdb->drFreeBks);
+	vcb->vcbVolBkUp		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
+	vcb->vcbWrCnt		= SWAP_BE32 (mdb->drWrCnt);
+	vcb->vcbNmRtDirs	= SWAP_BE16 (mdb->drNmRtDirs);
+	vcb->vcbFilCnt		= SWAP_BE32 (mdb->drFilCnt);
+	vcb->vcbDirCnt		= SWAP_BE32 (mdb->drDirCnt);
+	bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
+		vcb->vcbWrCnt++;	/* Compensate for write of MDB on last flush */
+
+	/* convert hfs encoded name into UTF-8 string */
+	error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
+	/*
+	 * When an HFS name cannot be encoded with the current
+	 * volume encoding we use MacRoman as a fallback.
+	 */
+	if (error || (utf8chars == 0)) {
+		error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
+		/* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
+		if (error) {
+			goto MtVolErr;
+		}
+	}
+
+	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
+	vcb->vcbVBMIOSize = kHFSBlockSize;
+
+	/* Generate the partition-based AVH location */
+	hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+	                                          hfsmp->hfs_logical_block_count);
+	
+	/* HFS standard is read-only, so just stuff the FS location in here, too */
+	hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;	
+
+	bzero(&cndesc, sizeof(cndesc));
+	cndesc.cd_parentcnid = kHFSRootParentID;
+	cndesc.cd_flags |= CD_ISMETA;
+	bzero(&cnattr, sizeof(cnattr));
+	cnattr.ca_linkcount = 1;
+	cnattr.ca_mode = S_IFREG;
+	bzero(&fork, sizeof(fork));
+
+	/*
+	 * Set up Extents B-tree vnode
+	 */
+	cndesc.cd_nameptr = hfs_extname;
+	cndesc.cd_namelen = strlen((char *)hfs_extname);
+	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
+	fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
+	fork.cf_blocks = fork.cf_size / vcb->blockSize;
+	fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
+	fork.cf_vblocks = 0;
+	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
+	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
+	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
+	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
+	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
+	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
+	cnattr.ca_blocks = fork.cf_blocks;
+
+	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
+	                        &hfsmp->hfs_extents_vp, &newvnode_flags);
+	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
+		}
+		goto MtVolErr;
+	}
+	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
+	                                 (KeyCompareProcPtr)CompareExtentKeys));
+	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
+		}
+		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+		goto MtVolErr;
+	}
+	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
+
+	/*
+	 * Set up Catalog B-tree vnode...
+	 */ 
+	cndesc.cd_nameptr = hfs_catname;
+	cndesc.cd_namelen = strlen((char *)hfs_catname);
+	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
+	fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
+	fork.cf_blocks = fork.cf_size / vcb->blockSize;
+	fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
+	fork.cf_vblocks = 0;
+	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
+	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
+	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
+	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
+	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
+	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
+	cnattr.ca_blocks = fork.cf_blocks;
+
+	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
+	                        &hfsmp->hfs_catalog_vp, &newvnode_flags);
+	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
+		}
+		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+		goto MtVolErr;
+	}
+	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
+	                                 (KeyCompareProcPtr)CompareCatalogKeys));
+	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
+		}
+		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+		goto MtVolErr;
+	}
+	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
+
+	/*
+	 * Set up dummy Allocation file vnode (used only for locking bitmap)
+	 */  
+	cndesc.cd_nameptr = hfs_vbmname;
+	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
+	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
+	bzero(&fork, sizeof(fork));
+	cnattr.ca_blocks = 0;
+
+	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
+	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
+	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
+		}
+		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+		goto MtVolErr;
+	}
+	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
+
+      	/* mark the volume dirty (clear clean unmount bit) */
+	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
+
+    if (error == noErr) {
+		error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
+		}
+	}
+	
+    if (error == noErr) {
+		/* If the disk isn't write protected.. */
+        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
+            MarkVCBDirty (vcb); //	mark VCB dirty so it will be written
+		}
+	}
+	
+	/*
+	 * all done with system files so we can unlock now...
+	 */
+	hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
+	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+	
+	if (error == noErr) {
+		/* If successful, then we can just return once we've unlocked the cnodes */
+		return error;
+	}
+
+    //--	Release any resources allocated so far before exiting with an error:
+MtVolErr:
+	hfsUnmount(hfsmp, NULL);
+
+    return (error);
+}
+
+#endif
+
+//*******************************************************************************
+//
+// Sanity check Volume Header Block:
+//		Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
+//		not been endian-swapped and represents the on-disk contents of this sector.
+//		This routine will not change the endianness of vhp block.
+//
+//*******************************************************************************
+OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
+{
+	u_int16_t signature;
+	u_int16_t hfs_version;
+	u_int32_t blockSize;
+
+	signature = SWAP_BE16(vhp->signature);
+	hfs_version = SWAP_BE16(vhp->version);
+
+	if (signature == kHFSPlusSigWord) {
+		if (hfs_version != kHFSPlusVersion) {
+			printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
+			return (EINVAL);
+		}
+	} else if (signature == kHFSXSigWord) {
+		if (hfs_version != kHFSXVersion) {
+			printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
+			return (EINVAL);
+		}
+	} else {
+		/* Removed printf for invalid HFS+ signature because it gives
+		 * false error for UFS root volume
+		 */
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
+		}
+		return (EINVAL);
+	}
+
+	/* Block size must be at least 512 and a power of 2 */
+	blockSize = SWAP_BE32(vhp->blockSize);
+	if (blockSize < 512 || !powerof2(blockSize)) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
+		}
+		return (EINVAL);
+	}
+
+	if (blockSize < hfsmp->hfs_logical_block_size) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
+					blockSize, hfsmp->hfs_logical_block_size);
+		}
+		return (EINVAL);
+	}
+	return 0;
+}
+
+//*******************************************************************************
+//	Routine:	hfs_MountHFSPlusVolume
+//
+//
+//*******************************************************************************
+
+OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
+	off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
+{
+	register ExtendedVCB *vcb;
+	struct cat_desc cndesc;
+	struct cat_attr cnattr;
+	struct cat_fork cfork;
+	u_int32_t blockSize;
+	daddr64_t spare_sectors;
+	struct BTreeInfoRec btinfo;
+	u_int16_t  signature;
+	u_int16_t  hfs_version;
+	int newvnode_flags = 0;
+	int  i;
+	OSErr retval;
+	char converted_volname[256];
+	size_t volname_length = 0;
+	size_t conv_volname_length = 0;
+	bool async_bitmap_scan;
+
+	signature = SWAP_BE16(vhp->signature);
+	hfs_version = SWAP_BE16(vhp->version);
+
+	retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
+	if (retval)
+		return retval;
+
+	if (signature == kHFSXSigWord) {
+		/* The in-memory signature is always 'H+'. */
+		signature = kHFSPlusSigWord;
+		hfsmp->hfs_flags |= HFS_X;
+	}
+
+	blockSize = SWAP_BE32(vhp->blockSize);
+	/* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
+	    (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
+		}
+		return (EINVAL);
+	}
+
+	/* Make sure we can live with the physical block size. */
+	if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
+	    (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
+					hfsmp->hfs_logical_block_size);
+		}
+		return (ENXIO);
+	}
+
+	/*
+	 * If allocation block size is less than the physical block size,
+	 * same data could be cached in two places and leads to corruption.
+	 *
+	 * HFS Plus reserves one allocation block for the Volume Header.
+	 * If the physical size is larger, then when we read the volume header,
+	 * we will also end up reading in the next allocation block(s).
+	 * If those other allocation block(s) is/are modified, and then the volume
+	 * header is modified, the write of the volume header's buffer will write
+	 * out the old contents of the other allocation blocks.
+	 *
+	 * We assume that the physical block size is same as logical block size.
+	 * The physical block size value is used to round down the offsets for
+	 * reading and writing the primary and alternate volume headers.
+	 *
+	 * The same logic to ensure good hfs_physical_block_size is also in
+	 * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
+	 * later are doing the I/Os using same block size.
+	 */
+	if (blockSize < hfsmp->hfs_physical_block_size) {
+		hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
+		hfsmp->hfs_log_per_phys = 1;
+	}
+
+	/*
+	 * The VolumeHeader seems OK: transfer info from it into VCB
+	 * Note - the VCB starts out clear (all zeros)
+	 */
+	vcb = HFSTOVCB(hfsmp);
+
+	vcb->vcbSigWord	= signature;
+	vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
+	vcb->vcbLsMod	= to_bsd_time(SWAP_BE32(vhp->modifyDate));
+	vcb->vcbAtrb	= SWAP_BE32(vhp->attributes);
+	vcb->vcbClpSiz	= SWAP_BE32(vhp->rsrcClumpSize);
+	vcb->vcbNxtCNID	= SWAP_BE32(vhp->nextCatalogID);
+	vcb->vcbVolBkUp	= to_bsd_time(SWAP_BE32(vhp->backupDate));
+	vcb->vcbWrCnt	= SWAP_BE32(vhp->writeCount);
+	vcb->vcbFilCnt	= SWAP_BE32(vhp->fileCount);
+	vcb->vcbDirCnt	= SWAP_BE32(vhp->folderCount);
+	
+	/* copy 32 bytes of Finder info */
+	bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));    
+
+	vcb->vcbAlBlSt = 0;		/* hfs+ allocation blocks start at first block of volume */
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
+		vcb->vcbWrCnt++;	/* compensate for write of Volume Header on last flush */
+
+	/* Now fill in the Extended VCB info */
+	vcb->nextAllocation	= SWAP_BE32(vhp->nextAllocation);
+	vcb->totalBlocks	= SWAP_BE32(vhp->totalBlocks);
+	vcb->allocLimit		= vcb->totalBlocks;
+	vcb->freeBlocks		= SWAP_BE32(vhp->freeBlocks);
+	vcb->blockSize		= blockSize;
+	vcb->encodingsBitmap	= SWAP_BE64(vhp->encodingsBitmap);
+	vcb->localCreateDate	= SWAP_BE32(vhp->createDate);
+	
+	vcb->hfsPlusIOPosOffset	= embeddedOffset;
+
+	/* Default to no free block reserve */
+	vcb->reserveBlocks = 0;
+
+	/*
+	 * Update the logical block size in the mount struct
+	 * (currently set up from the wrapper MDB) using the
+	 * new blocksize value:
+	 */
+	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
+	vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
+
+	/*
+	 * Validate and initialize the location of the alternate volume header.
+	 *
+	 * Note that there may be spare sectors beyond the end of the filesystem that still 
+	 * belong to our partition. 
+	 */
+
+	spare_sectors = hfsmp->hfs_logical_block_count -
+	                (((daddr64_t)vcb->totalBlocks * blockSize) /
+	                   hfsmp->hfs_logical_block_size);
+
+	/*
+	 * Differentiate between "innocuous" spare sectors and the more unusual
+	 * degenerate case:
+	 * 
+	 * *** Innocuous spare sectors exist if:
+	 * 
+	 * A) the number of bytes assigned to the partition (by multiplying logical 
+	 * block size * logical block count) is greater than the filesystem size 
+	 * (by multiplying allocation block count and allocation block size)
+	 * 
+	 * and
+	 * 
+	 * B) the remainder is less than the size of a full allocation block's worth of bytes.
+	 * 
+	 * This handles the normal case where there may be a few extra sectors, but the two
+	 * are fundamentally in sync.
+	 *
+	 * *** Degenerate spare sectors exist if:
+	 * A) The number of bytes assigned to the partition (by multiplying logical
+	 * block size * logical block count) is greater than the filesystem size 
+	 * (by multiplying allocation block count and block size).
+	 * 
+	 * and
+	 *
+	 * B) the remainder is greater than a full allocation's block worth of bytes.
+	 * In this case,  a smaller file system exists in a larger partition.  
+	 * This can happen in various ways, including when volume is resized but the 
+	 * partition is yet to be resized.  Under this condition, we have to assume that
+	 * a partition management software may resize the partition to match 
+	 * the file system size in the future.  Therefore we should update 
+	 * alternate volume header at two locations on the disk, 
+	 *   a. 1024 bytes before end of the partition
+	 *   b. 1024 bytes before end of the file system 
+	 */
+
+	if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
+		/* 
+		 * Handle the degenerate case above. FS < partition size.
+		 * AVH located at 1024 bytes from the end of the partition
+		 */
+		hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
+
+		/* AVH located at 1024 bytes from the end of the filesystem */
+		hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+						(((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
+	} 
+	else {
+		/* Innocuous spare sectors; Partition & FS notion are in sync */
+		hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+					   HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
+
+		hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
+	}
+	if (hfs_resize_debug) {
+		printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n", 
+				hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
+	}
+
+	bzero(&cndesc, sizeof(cndesc));
+	cndesc.cd_parentcnid = kHFSRootParentID;
+	cndesc.cd_flags |= CD_ISMETA;
+	bzero(&cnattr, sizeof(cnattr));
+	cnattr.ca_linkcount = 1;
+	cnattr.ca_mode = S_IFREG;
+
+	/*
+	 * Set up Extents B-tree vnode
+	 */
+	cndesc.cd_nameptr = hfs_extname;
+	cndesc.cd_namelen = strlen((char *)hfs_extname);
+	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
+
+	cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
+	cfork.cf_new_size= 0;
+	cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
+	cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
+	cfork.cf_vblocks = 0;
+	cnattr.ca_blocks = cfork.cf_blocks;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		cfork.cf_extents[i].startBlock =
+				SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
+		cfork.cf_extents[i].blockCount =
+				SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
+	}
+	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+	                         &hfsmp->hfs_extents_vp, &newvnode_flags);
+	if (retval)
+	{
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
+		}
+		goto ErrorExit;
+	}
+
+	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
+
+	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
+	                                  (KeyCompareProcPtr) CompareExtentKeysPlus));
+
+	hfs_unlock(hfsmp->hfs_extents_cp);
+
+	if (retval)
+	{
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
+		}
+		goto ErrorExit;
+	}
+	/*
+	 * Set up Catalog B-tree vnode
+	 */ 
+	cndesc.cd_nameptr = hfs_catname;
+	cndesc.cd_namelen = strlen((char *)hfs_catname);
+	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
+
+	cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
+	cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
+	cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
+	cfork.cf_vblocks = 0;
+	cnattr.ca_blocks = cfork.cf_blocks;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		cfork.cf_extents[i].startBlock =
+				SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
+		cfork.cf_extents[i].blockCount =
+				SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
+	}
+	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+	                         &hfsmp->hfs_catalog_vp, &newvnode_flags);
+	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
+		}
+		goto ErrorExit;
+	}
+	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
+
+	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
+	                                  (KeyCompareProcPtr) CompareExtendedCatalogKeys));
+
+	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
+		}
+		hfs_unlock(hfsmp->hfs_catalog_cp);
+		goto ErrorExit;
+	}
+	if ((hfsmp->hfs_flags & HFS_X) &&
+	    BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
+		if (btinfo.keyCompareType == kHFSBinaryCompare) {
+			hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
+			/* Install a case-sensitive key compare */
+			(void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
+			                  (KeyCompareProcPtr)cat_binarykeycompare);
+		}
+	}
+
+	hfs_unlock(hfsmp->hfs_catalog_cp);
+
+	/*
+	 * Set up Allocation file vnode
+	 */  
+	cndesc.cd_nameptr = hfs_vbmname;
+	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
+	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
+
+	cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
+	cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
+	cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
+	cfork.cf_vblocks = 0;
+	cnattr.ca_blocks = cfork.cf_blocks;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		cfork.cf_extents[i].startBlock =
+				SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
+		cfork.cf_extents[i].blockCount =
+				SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
+	}
+	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
+	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
+		}
+		goto ErrorExit;
+	}
+	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
+	hfs_unlock(hfsmp->hfs_allocation_cp);
+
+	/*
+	 * Set up Attribute B-tree vnode
+	 */
+	if (vhp->attributesFile.totalBlocks != 0) {
+		cndesc.cd_nameptr = hfs_attrname;
+		cndesc.cd_namelen = strlen((char *)hfs_attrname);
+		cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
+	
+		cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
+		cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
+		cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
+		cfork.cf_vblocks = 0;
+		cnattr.ca_blocks = cfork.cf_blocks;
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			cfork.cf_extents[i].startBlock =
+					SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
+			cfork.cf_extents[i].blockCount =
+					SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
+		}
+		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+					 &hfsmp->hfs_attribute_vp, &newvnode_flags);
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
+			}
+			goto ErrorExit;
+		}
+		hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
+		retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
+						  (KeyCompareProcPtr) hfs_attrkeycompare));
+		hfs_unlock(hfsmp->hfs_attribute_cp);
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
+			}
+			goto ErrorExit;
+		}
+
+		/* Initialize vnode for virtual attribute data file that spans the 
+		 * entire file system space for performing I/O to attribute btree
+		 * We hold iocount on the attrdata vnode for the entire duration 
+		 * of mount (similar to btree vnodes)
+		 */
+		retval = init_attrdata_vnode(hfsmp);
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
+			}
+			goto ErrorExit;
+		}
+	}
+
+	/*
+	 * Set up Startup file vnode
+	 */
+	if (vhp->startupFile.totalBlocks != 0) {
+		cndesc.cd_nameptr = hfs_startupname;
+		cndesc.cd_namelen = strlen((char *)hfs_startupname);
+		cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
+	
+		cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
+		cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
+		cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
+		cfork.cf_vblocks = 0;
+		cnattr.ca_blocks = cfork.cf_blocks;
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			cfork.cf_extents[i].startBlock =
+					SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
+			cfork.cf_extents[i].blockCount =
+					SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
+		}
+		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
+					 &hfsmp->hfs_startup_vp, &newvnode_flags);
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
+			}
+			goto ErrorExit;
+		}
+		hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
+		hfs_unlock(hfsmp->hfs_startup_cp);
+	}
+	
+	/* 
+	 * Pick up volume name and create date 
+	 *
+	 * Acquiring the volume name should not manipulate the bitmap, only the catalog
+	 * btree and possibly the extents overflow b-tree.
+	 */
+	retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
+	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
+		}
+		goto ErrorExit;
+	}
+	vcb->hfs_itime = cnattr.ca_itime;
+	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
+	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
+	volname_length = strlen ((const char*)vcb->vcbVN);
+	cat_releasedesc(&cndesc);
+	
+	/* Send the volume name down to CoreStorage if necessary */	
+	retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
+	if (retval == 0) {
+		(void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+	}	
+	
+	/* reset retval == 0. we don't care about errors in volname conversion */
+	retval = 0;
+
+	/* 
+	 * pull in the volume UUID while we are still single-threaded.
+	 * This brings the volume UUID into the cached one dangling off of the HFSMP
+	 * Otherwise it would have to be computed on first access.
+	 */
+	uuid_t throwaway;
+	hfs_getvoluuid (hfsmp, throwaway); 
+
+	/* 
+	 * We now always initiate a full bitmap scan even if the volume is read-only because this is 
+	 * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
+	 * expects. TRIMs will not be delivered to the underlying media if the volume is not 
+	 * read-write though.  
+	 */
+	hfsmp->scan_var = 0;
+
+	/*
+	 * We have to ensure if we can proceed to scan the bitmap allocation
+	 * file asynchronously. If the catalog file is fragmented such that it
+	 * has overflow extents and the volume needs journal transaction we
+	 * cannot scan the bitmap asynchronously. Doing so will cause the mount
+	 * thread to block at journal transaction on bitmap lock, while scan
+	 * thread which hold the bitmap lock exclusively performs disk I/O to
+	 * issue TRIMS to unallocated ranges and build summary table. The
+	 * amount of time the mount thread is blocked depends on the size of
+	 * the volume, type of disk, etc. This blocking can cause the watchdog
+	 * timer to timeout resulting in panic. Thus to ensure we don't timeout
+	 * watchdog in such cases we scan the bitmap synchronously.
+	 *
+	 * Please NOTE: Currently this timeout only seem to happen for non SSD
+	 * drives. Possibly reading a big fragmented allocation file to
+	 * construct the summary table takes enough time to timeout watchdog.
+	 * Thus we check if we need to scan the bitmap synchronously only if
+	 * the disk is not SSD.
+	 */
+	async_bitmap_scan = true;
+	if (!ISSET(hfsmp->hfs_flags, HFS_SSD) && hfsmp->hfs_catalog_cp) {
+		bool catalog_has_overflow_extents;
+		bool journal_transaction_needed;
+
+		catalog_has_overflow_extents = false;
+		if ((hfsmp->hfs_catalog_vp != NULL) &&
+				(overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
+			catalog_has_overflow_extents = true;
+		}
+
+		journal_transaction_needed = false;
+		if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) &&
+					(hfsmp->hfs_flags & HFS_READ_ONLY))) {
+			journal_transaction_needed = true;
+		}
+
+		if (catalog_has_overflow_extents && journal_transaction_needed)
+			async_bitmap_scan = false;
+	}
+
+	if (async_bitmap_scan) {
+		thread_t allocator_scanner;
+
+		/* Take the HFS mount mutex and wait on scan_var */
+		hfs_lock_mount (hfsmp);
+
+
+		/*
+		 * Scan the bitmap asynchronously.
+		 */
+		kernel_thread_start ((thread_continue_t)hfs_scan_blocks, hfsmp,
+				&allocator_scanner);
+
+		/*
+		 * Wait until it registers that it's got the appropriate locks
+		 * (or that it is finished).
+		 */
+		while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|
+						HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
+			msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD,
+					"hfs_scan_blocks", 0);
+		}
+
+		hfs_unlock_mount(hfsmp);
+
+		thread_deallocate (allocator_scanner);
+	} else {
+
+		/*
+		 * Initialize the summary table and then scan the bitmap
+		 * synchronously. Since we are scanning the bitmap
+		 * synchronously we don't need to hold the bitmap lock.
+		 */
+		if (hfs_init_summary (hfsmp)) {
+			printf ("hfs: could not initialize summary table for "
+					"%s\n", hfsmp->vcbVN);
+		}
+
+		(void)ScanUnmapBlocks (hfsmp);
+
+		/*
+		 * We need to set that the allocator scan is completed because
+		 * hot file clustering waits for this condition later.
+		 */
+		hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
+		buf_invalidateblks (hfsmp->hfs_allocation_vp, 0, 0, 0);
+	}
+
+	/* mark the volume dirty (clear clean unmount bit) */
+	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
+	if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
+		hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
+	}
+
+	/* kHFSHasFolderCount is only supported/updated on HFSX volumes */
+	if ((hfsmp->hfs_flags & HFS_X) != 0) {
+		hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
+	}
+
+	//
+	// Check if we need to do late journal initialization.  This only
+	// happens if a previous version of MacOS X (or 9) touched the disk.
+	// In that case hfs_late_journal_init() will go re-locate the journal 
+	// and journal_info_block files and validate that they're still kosher.
+	//
+	if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
+		&& (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
+		&& (hfsmp->jnl == NULL)) {
+
+		retval = hfs_late_journal_init(hfsmp, vhp, args);
+		if (retval != 0) {
+			if (retval == EROFS) {
+				// EROFS is a special error code that means the volume has an external
+				// journal which we couldn't find.  in that case we do not want to
+				// rewrite the volume header - we'll just refuse to mount the volume.
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
+				}
+				retval = EINVAL;
+				goto ErrorExit;
+			}
+
+			hfsmp->jnl = NULL;
+			
+			// if the journal failed to open, then set the lastMountedVersion
+			// to be "FSK!" which fsck_hfs will see and force the fsck instead
+			// of just bailing out because the volume is journaled.
+			if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
+				HFSPlusVolumeHeader *jvhp;
+				daddr64_t mdb_offset;
+				struct buf *bp = NULL;
+				
+				hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
+				    
+				mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
+
+				bp = NULL;
+				retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+						HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+						hfsmp->hfs_physical_block_size, cred, &bp);
+				if (retval == 0) {
+					jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
+					    
+					if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
+						printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
+						jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
+					   	buf_bwrite(bp);
+					} else {
+						buf_brelse(bp);
+					}
+					bp = NULL;
+				} else if (bp) {
+					buf_brelse(bp);
+					// clear this so the error exit path won't try to use it
+					bp = NULL;
+			    }
+			}
+			
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
+			}
+			retval = EINVAL;
+			goto ErrorExit;
+		} else if (hfsmp->jnl) {
+			vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+		}
+	} else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
+		struct cat_attr jinfo_attr, jnl_attr;
+		
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		    vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+		}
+
+		// if we're here we need to fill in the fileid's for the
+		// journal and journal_info_block.
+		hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
+		hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
+		if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
+			printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
+			printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
+		}
+
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		    vcb->vcbAtrb |= kHFSVolumeJournaledMask;
+		}
+
+		if (hfsmp->jnl == NULL) {
+		    vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
+		}
+	}
+
+	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )	// if the disk is not write protected
+	{
+		MarkVCBDirty( vcb );	// mark VCB dirty so it will be written
+	}
+
+	if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
+		hfs_pin_fs_metadata(hfsmp);
+	}
+	/*
+	 * Distinguish 3 potential cases involving content protection:
+	 * 1. mount point bit set; vcbAtrb does not support it. Fail.
+	 * 2. mount point bit set; vcbattrb supports it. we're good.
+	 * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
+	 */
+	if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
+		/* Does the mount point support it ? */
+		if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
+			/* Case 1 above */
+			retval = EINVAL;
+			goto ErrorExit;
+		}
+	}
+	else {
+		/* not requested in the mount point. Is it in FS? */
+		if (vcb->vcbAtrb & kHFSContentProtectionMask) {
+			/* Case 3 above */
+			vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
+		}
+	}
+
+	/* At this point, if the mount point flag is set, we can enable it. */
+	if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
+		/* Cases 2+3 above */
+#if CONFIG_PROTECT
+		/* Get the EAs as needed. */
+		int cperr = 0;
+		struct cp_root_xattr *xattr = NULL;
+		xattr = hfs_malloc(sizeof(*xattr));
+
+		/* go get the EA to get the version information */
+		cperr = cp_getrootxattr (hfsmp, xattr);
+		/* 
+		 * If there was no EA there, then write one out. 
+		 * Assuming EA is not present on the root means 
+		 * this is an erase install or a very old FS
+		 */
+
+		if (cperr == 0) {
+			/* Have to run a valid CP version. */
+			if (!cp_is_supported_version(xattr->major_version)) {
+				cperr = EINVAL;
+			}
+		}
+		else if (cperr == ENOATTR) {
+			printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
+			bzero(xattr, sizeof(struct cp_root_xattr));
+			xattr->major_version = CP_CURRENT_VERS;
+			xattr->minor_version = CP_MINOR_VERS;
+			cperr = cp_setrootxattr (hfsmp, xattr);
+		}
+
+		if (cperr) {
+			hfs_free(xattr, sizeof(*xattr));
+			retval = EPERM;
+			goto ErrorExit;
+		}
+
+		/* If we got here, then the CP version is valid. Set it in the mount point */
+		hfsmp->hfs_running_cp_major_vers = xattr->major_version;
+		printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
+		hfsmp->cproot_flags = xattr->flags;
+		hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
+#if HFS_CONFIG_KEY_ROLL
+		hfsmp->hfs_auto_roll_min_key_os_version = xattr->auto_roll_min_version;
+		hfsmp->hfs_auto_roll_max_key_os_version = xattr->auto_roll_max_version;
+#endif
+
+		hfs_free(xattr, sizeof(*xattr));
+
+		/*
+		 * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
+		 * Ensure that the boot-arg's value is valid for FILES (not directories),
+		 * since only files are actually protected for now.
+		 */
+
+		PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
+
+		if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
+			PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
+		}
+
+#if HFS_TMPDBG
+#if !SECURE_KERNEL
+		PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
+#endif
+#endif
+
+		if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
+			hfsmp->default_cp_class = PROTECTION_CLASS_C;
+		}
+
+#else
+		/* If CONFIG_PROTECT not built, ignore CP */
+		vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);	
+#endif
+	}
+
+	/*
+	 * Establish a metadata allocation zone.
+	 */
+	hfs_metadatazone_init(hfsmp, false);
+
+	/*
+	 * Make any metadata zone adjustments.
+	 */
+	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+		/* Keep the roving allocator out of the metadata zone. */
+		if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
+		    vcb->nextAllocation <= hfsmp->hfs_metazone_end) {	    
+			HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
+		}
+	} else {
+		if (vcb->nextAllocation <= 1) {
+			vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
+		}
+	}
+	vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
+
+	/* Setup private/hidden directories for hardlinks. */
+	hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
+	hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
+
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) 
+		hfs_remove_orphans(hfsmp);
+
+	/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
+	{
+		retval = hfs_erase_unused_nodes(hfsmp);
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
+			}
+
+			goto ErrorExit;
+		}
+	}
+		
+	/*
+	 * Allow hot file clustering if conditions allow.
+	 */
+	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
+	    ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
+		//
+		// Wait until the bitmap scan completes before we initializes the
+		// hotfile area so that we do not run into any issues with the
+		// bitmap being read while hotfiles is initializing itself.  On
+		// some older/slower machines, without this interlock, the bitmap
+		// would sometimes get corrupted at boot time.
+		//
+		hfs_lock_mount(hfsmp);
+		while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
+			(void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
+		}
+		hfs_unlock_mount(hfsmp);
+		
+		/*
+		 * Note: at this point we are not allowed to fail the
+		 *       mount operation because the HotFile init code
+		 *       in hfs_recording_init() will lookup vnodes with
+		 *       VNOP_LOOKUP() which hangs vnodes off the mount
+		 *       (and if we were to fail, VFS is not prepared to
+		 *       clean that up at this point.  Since HotFiles are
+		 *       optional, this is not a big deal.
+		 */
+		(void) hfs_recording_init(hfsmp);
+	}
+
+	/* Force ACLs on HFS+ file systems. */
+	vfs_setextendedsecurity(HFSTOVFS(hfsmp));
+
+	/* Enable extent-based extended attributes by default */
+	hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
+
+	return (0);
+
+ErrorExit:
+	/*
+	 * A fatal error occurred and the volume cannot be mounted, so 
+	 * release any resources that we acquired...
+	 */
+	hfsUnmount(hfsmp, NULL);
+		
+	if (HFS_MOUNT_DEBUG) {
+		printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
+	}
+	return (retval);
+}
+
+static int
+_pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
+{
+	int err;
+
+	err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	if (err == 0) {
+		err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL);
+		hfs_unlock(VTOC(vp));
+	}
+
+	return err;
+}
+
+void
+hfs_pin_fs_metadata(struct hfsmount *hfsmp)
+{
+	ExtendedVCB *vcb;
+	int err;
+	
+	vcb = HFSTOVCB(hfsmp);
+
+	err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
+	if (err != 0) {
+		printf("hfs: failed to pin extents overflow file %d\n", err);
+	}				
+	err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
+	if (err != 0) {
+		printf("hfs: failed to pin catalog file %d\n", err);
+	}				
+	err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
+	if (err != 0) {
+		printf("hfs: failed to pin bitmap file %d\n", err);
+	}				
+	err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
+	if (err != 0) {
+		printf("hfs: failed to pin extended attr file %d\n", err);
+	}				
+	
+	hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1);
+	hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1);
+			
+	if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
+		// and hey, if we've got a journal, let's pin that too!
+		hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize));
+	}
+}
+
+/*
+ * ReleaseMetaFileVNode
+ *
+ * vp	L - -
+ */
+static void ReleaseMetaFileVNode(struct vnode *vp)
+{
+	struct filefork *fp;
+
+	if (vp && (fp = VTOF(vp))) {
+		if (fp->fcbBTCBPtr != NULL) {
+			(void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			(void) BTClosePath(fp);
+			hfs_unlock(VTOC(vp));
+		}
+
+		/* release the node even if BTClosePath fails */
+		vnode_recycle(vp);
+		vnode_put(vp);
+	}
+}
+
+
+/*************************************************************
+*
+* Unmounts a hfs volume.
+*	At this point vflush() has been called (to dump all non-metadata files)
+*
+*************************************************************/
+
+int
+hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
+{
+	/* Get rid of our attribute data vnode (if any).  This is done 
+	 * after the vflush() during mount, so we don't need to worry 
+	 * about any locks.
+	 */
+	if (hfsmp->hfs_attrdata_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
+		hfsmp->hfs_attrdata_vp = NULLVP;
+	}
+
+	if (hfsmp->hfs_startup_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
+		hfsmp->hfs_startup_cp = NULL;
+		hfsmp->hfs_startup_vp = NULL;
+	}
+	
+	if (hfsmp->hfs_attribute_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
+		hfsmp->hfs_attribute_cp = NULL;
+		hfsmp->hfs_attribute_vp = NULL;
+	}
+
+	if (hfsmp->hfs_catalog_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
+		hfsmp->hfs_catalog_cp = NULL;
+		hfsmp->hfs_catalog_vp = NULL;
+	}
+
+	if (hfsmp->hfs_extents_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+		hfsmp->hfs_extents_cp = NULL;
+		hfsmp->hfs_extents_vp = NULL;
+	}
+
+	if (hfsmp->hfs_allocation_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
+		hfsmp->hfs_allocation_cp = NULL;
+		hfsmp->hfs_allocation_vp = NULL;
+	}
+
+	return (0);
+}
+
+
+/*
+ * Test if fork has overflow extents.
+ *
+ * Returns: 
+ * 	non-zero - overflow extents exist
+ * 	zero     - overflow extents do not exist 
+ */
+bool overflow_extents(struct filefork *fp)
+{
+	u_int32_t blocks;
+
+	//
+	// If the vnode pointer is NULL then we're being called
+	// from hfs_remove_orphans() with a faked-up filefork
+	// and therefore it has to be an HFS+ volume.  Otherwise
+	// we check through the volume header to see what type
+	// of volume we're on.
+	//
+
+#if CONFIG_HFS_STD
+	if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
+		if (fp->ff_extents[2].blockCount == 0)
+			return false;
+
+		blocks = fp->ff_extents[0].blockCount +
+			fp->ff_extents[1].blockCount +
+			fp->ff_extents[2].blockCount;	
+
+		return fp->ff_blocks > blocks;
+	}
+#endif
+
+	if (fp->ff_extents[7].blockCount == 0)
+		return false;
+
+	blocks = fp->ff_extents[0].blockCount +
+		fp->ff_extents[1].blockCount +
+		fp->ff_extents[2].blockCount +
+		fp->ff_extents[3].blockCount +
+		fp->ff_extents[4].blockCount +
+		fp->ff_extents[5].blockCount +
+		fp->ff_extents[6].blockCount +
+		fp->ff_extents[7].blockCount;	
+
+	return fp->ff_blocks > blocks;
+}
+
+static __attribute__((pure))
+boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
+{
+	return (hfsmp->hfs_freeze_state == HFS_FROZEN
+			|| (hfsmp->hfs_freeze_state == HFS_FREEZING
+				&& current_thread() != hfsmp->hfs_freezing_thread));
+}
+
+/*
+ * Lock the HFS global journal lock 
+ */
+int 
+hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) 
+{
+	thread_t thread = current_thread();
+
+	if (hfsmp->hfs_global_lockowner == thread) {
+		panic ("hfs_lock_global: locking against myself!");
+	}
+
+	/*
+	 * This check isn't really necessary but this stops us taking
+	 * the mount lock in most cases.  The essential check is below.
+	 */
+	if (hfs_is_frozen(hfsmp)) {
+		/*
+		 * Unfortunately, there is no easy way of getting a notification
+		 * for when a process is exiting and it's possible for the exiting 
+		 * process to get blocked somewhere else.  To catch this, we
+		 * periodically monitor the frozen process here and thaw if
+		 * we spot that it's exiting.
+		 */
+frozen:
+		hfs_lock_mount(hfsmp);
+
+		struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
+
+		while (hfs_is_frozen(hfsmp)) {
+			if (hfsmp->hfs_freeze_state == HFS_FROZEN
+				&& proc_exiting(hfsmp->hfs_freezing_proc)) {
+				hfs_thaw_locked(hfsmp);
+				break;
+			}
+
+			msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+			       PWAIT, "hfs_lock_global (frozen)", &ts);
+		}
+		hfs_unlock_mount(hfsmp);
+	}
+
+	/* HFS_SHARED_LOCK */
+	if (locktype == HFS_SHARED_LOCK) {
+		lck_rw_lock_shared (&hfsmp->hfs_global_lock);
+		hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
+	}
+	/* HFS_EXCLUSIVE_LOCK */
+	else {
+		lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
+		hfsmp->hfs_global_lockowner = thread;
+	}
+
+	/* 
+	 * We have to check if we're frozen again because of the time
+	 * between when we checked and when we took the global lock.
+	 */
+	if (hfs_is_frozen(hfsmp)) {
+		hfs_unlock_global(hfsmp);
+		goto frozen;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Unlock the HFS global journal lock
+ */
+void 
+hfs_unlock_global (struct hfsmount *hfsmp) 
+{	
+	thread_t thread = current_thread();
+
+	/* HFS_LOCK_EXCLUSIVE */
+	if (hfsmp->hfs_global_lockowner == thread) {
+		hfsmp->hfs_global_lockowner = NULL;
+		lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
+	}
+	/* HFS_LOCK_SHARED */
+	else {
+		lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
+	}
+}
+
+/*
+ * Lock the HFS mount lock
+ * 
+ * Note: this is a mutex, not a rw lock! 
+ */
+inline 
+void hfs_lock_mount (struct hfsmount *hfsmp) {
+	lck_mtx_lock (&(hfsmp->hfs_mutex)); 
+}
+
+/*
+ * Unlock the HFS mount lock
+ *
+ * Note: this is a mutex, not a rw lock! 
+ */
+inline
+void hfs_unlock_mount (struct hfsmount *hfsmp) {
+	lck_mtx_unlock (&(hfsmp->hfs_mutex));
+}
+
+/*
+ * Lock HFS system file(s).
+ *
+ * This function accepts a @flags parameter which indicates which
+ * system file locks are required.  The value it returns should be
+ * used in a subsequent call to hfs_systemfile_unlock.  The caller
+ * should treat this value as opaque; it may or may not have a
+ * relation to the @flags field that is passed in.  The *only*
+ * guarantee that we make is that a value of zero means that no locks
+ * were taken and that there is no need to call hfs_systemfile_unlock
+ * (although it is harmless to do so).  Recursion is supported but
+ * care must still be taken to ensure correct lock ordering.  Note
+ * that requests for certain locks may cause other locks to also be
+ * taken, including locks that are not possible to ask for via the
+ * @flags parameter.
+ */
+int
+hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
+{
+	/*
+	 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
+	 */
+	if (flags & SFL_CATALOG) {
+		if (hfsmp->hfs_catalog_cp
+			&& hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
+#ifdef HFS_CHECK_LOCK_ORDER
+			if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
+				panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
+			}
+			if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
+				panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
+			}
+			if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+				panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
+			}
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+			(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
+			/*
+			 * When the catalog file has overflow extents then
+			 * also acquire the extents b-tree lock if its not
+			 * already requested.
+			 */
+			if (((flags & SFL_EXTENTS) == 0) &&
+			    (hfsmp->hfs_catalog_vp != NULL) && 
+			    (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
+				flags |= SFL_EXTENTS;
+			}
+		} else {
+			flags &= ~SFL_CATALOG;
+		}
+	}
+
+	if (flags & SFL_ATTRIBUTE) {
+		if (hfsmp->hfs_attribute_cp
+			&& hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
+#ifdef HFS_CHECK_LOCK_ORDER
+			if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
+				panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
+			}
+			if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+				panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
+			}
+#endif /* HFS_CHECK_LOCK_ORDER */
+			
+			(void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
+			/*
+			 * When the attribute file has overflow extents then
+			 * also acquire the extents b-tree lock if its not
+			 * already requested.
+			 */
+			if (((flags & SFL_EXTENTS) == 0) &&
+			    (hfsmp->hfs_attribute_vp != NULL) &&
+			    (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
+				flags |= SFL_EXTENTS;
+			}
+		} else {
+			flags &= ~SFL_ATTRIBUTE;
+		}
+	}
+
+	if (flags & SFL_STARTUP) {
+		if (hfsmp->hfs_startup_cp
+			&& hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
+#ifdef HFS_CHECK_LOCK_ORDER
+			if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
+				panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
+			}
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+			(void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
+			/*
+			 * When the startup file has overflow extents then
+			 * also acquire the extents b-tree lock if its not
+			 * already requested.
+			 */
+			if (((flags & SFL_EXTENTS) == 0) &&
+			    (hfsmp->hfs_startup_vp != NULL) &&
+			    (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
+				flags |= SFL_EXTENTS;
+			}
+		} else {
+			flags &= ~SFL_STARTUP;
+		}
+	}
+
+	/* 
+	 * To prevent locks being taken in the wrong order, the extent lock
+	 * gets a bitmap lock as well.
+	 */
+	if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
+		if (hfsmp->hfs_allocation_cp) {
+			(void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			/* 
+			 * The bitmap lock is also grabbed when only extent lock 
+			 * was requested. Set the bitmap lock bit in the lock
+			 * flags which callers will use during unlock.
+			 */
+			flags |= SFL_BITMAP;
+		} else {
+			flags &= ~SFL_BITMAP;
+		}
+	}
+
+	if (flags & SFL_EXTENTS) {
+		/*
+		 * Since the extents btree lock is recursive we always
+		 * need exclusive access.
+		 */
+		if (hfsmp->hfs_extents_cp) {
+			(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+			if (vfs_isswapmount(hfsmp->hfs_mp)) {
+				/*
+				 * because we may need this lock on the pageout path (if a swapfile allocation
+				 * spills into the extents overflow tree), we will grant the holder of this
+				 * lock the privilege of dipping into the reserve free pool in order to prevent
+				 * a deadlock from occurring if we need those pageouts to complete before we
+				 * will make any new pages available on the free list... the deadlock can occur
+				 * if this thread needs to allocate memory while this lock is held
+				 */
+				if (set_vm_privilege(TRUE) == FALSE) {
+					/*
+					 * indicate that we need to drop vm_privilege 
+					 * when we unlock
+					 */
+					flags |= SFL_VM_PRIV;
+				}
+			}
+		} else {
+			flags &= ~SFL_EXTENTS;
+		}
+	}
+
+	return (flags);
+}
+
+/*
+ * unlock HFS system file(s).
+ */
+void
+hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
+{
+	if (!flags)
+		return;
+
+	struct timeval tv;
+	u_int32_t lastfsync;
+	int numOfLockedBuffs;
+
+	if (hfsmp->jnl == NULL) {
+		microuptime(&tv);
+		lastfsync = tv.tv_sec;
+	}
+	if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
+		hfs_unlock(hfsmp->hfs_startup_cp);
+	}
+	if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
+		if (hfsmp->jnl == NULL) {
+			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
+			numOfLockedBuffs = count_lock_queue();
+			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
+			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
+			      kMaxSecsForFsync))) {
+				hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
+			}
+		}
+		hfs_unlock(hfsmp->hfs_attribute_cp);
+	}
+	if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
+		if (hfsmp->jnl == NULL) {
+			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
+			numOfLockedBuffs = count_lock_queue();
+			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
+			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
+			      kMaxSecsForFsync))) {
+				hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
+			}
+		}
+		hfs_unlock(hfsmp->hfs_catalog_cp);
+	}
+	if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
+		hfs_unlock(hfsmp->hfs_allocation_cp);
+	}
+	if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
+		if (hfsmp->jnl == NULL) {
+			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
+			numOfLockedBuffs = count_lock_queue();
+			if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
+			    ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
+			      kMaxSecsForFsync))) {
+				hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
+			}
+		}
+		hfs_unlock(hfsmp->hfs_extents_cp);
+
+		if (flags & SFL_VM_PRIV) {
+			/*
+			 * revoke the vm_privilege we granted this thread
+			 * now that we have unlocked the overflow extents
+			 */
+			set_vm_privilege(FALSE);
+		}
+	}
+}
+
+
+/*
+ * RequireFileLock
+ *
+ * Check to see if a vnode is locked in the current context
+ * This is to be used for debugging purposes only!!
+ */
+#if DEBUG
+void RequireFileLock(FileReference vp, int shareable)
+{
+	int locked;
+
+	/* The extents btree and allocation bitmap are always exclusive. */
+	if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
+	    VTOC(vp)->c_fileid == kHFSAllocationFileID) {
+		shareable = 0;
+	}
+	
+	locked = VTOC(vp)->c_lockowner == current_thread();
+	
+	if (!locked && !shareable) {
+		switch (VTOC(vp)->c_fileid) {
+		case kHFSExtentsFileID:
+			panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+			break;
+		case kHFSCatalogFileID:
+			panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+			break;
+		case kHFSAllocationFileID:
+			/* The allocation file can hide behind the jornal lock. */
+			if (VTOHFS(vp)->jnl == NULL)
+				panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
+			break;
+		case kHFSStartupFileID:
+			panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
+		case kHFSAttributesFileID:
+			panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
+			break;
+		}
+	}
+}
+#endif // DEBUG
+
+
+/*
+ * There are three ways to qualify for ownership rights on an object:
+ *
+ * 1. (a) Your UID matches the cnode's UID.
+ *    (b) The object in question is owned by "unknown"
+ * 2. (a) Permissions on the filesystem are being ignored and
+ *        your UID matches the replacement UID.
+ *    (b) Permissions on the filesystem are being ignored and
+ *        the replacement UID is "unknown".
+ * 3. You are root.
+ *
+ */
+int
+hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
+		__unused struct proc *p, int invokesuperuserstatus)
+{
+	if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
+	    (cnode_uid == UNKNOWNUID) ||  									  /* [1b] */
+	    ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
+	      ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
+	        (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
+	    (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
+		return (0);
+	} else {	
+		return (EPERM);
+	}
+}
+
+
+u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
+                               u_int32_t blockSizeLimit,
+                               u_int32_t baseMultiple) {
+    /*
+       Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
+       specified limit but still an even multiple of the baseMultiple.
+     */
+    int baseBlockCount, blockCount;
+    u_int32_t trialBlockSize;
+
+    if (allocationBlockSize % baseMultiple != 0) {
+        /*
+           Whoops: the allocation blocks aren't even multiples of the specified base:
+           no amount of dividing them into even parts will be a multiple, either then!
+        */
+        return 512;		/* Hope for the best */
+    };
+
+    /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
+       from being handled as two 6K logical blocks instead of 3 4K logical blocks.
+       Even though the former (the result of the loop below) is the larger allocation
+       block size, the latter is more efficient: */
+    if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
+
+    /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
+    baseBlockCount = allocationBlockSize / baseMultiple;				/* Now guaranteed to be an even multiple */
+
+    for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
+        trialBlockSize = blockCount * baseMultiple;
+        if (allocationBlockSize % trialBlockSize == 0) {				/* An even multiple? */
+            if ((trialBlockSize <= blockSizeLimit) &&
+                (trialBlockSize % baseMultiple == 0)) {
+                return trialBlockSize;
+            };
+        };
+    };
+
+    /* Note: we should never get here, since blockCount = 1 should always work,
+       but this is nice and safe and makes the compiler happy, too ... */
+    return 512;
+}
+
+
+u_int32_t
+GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
+			struct cat_attr *fattr, struct cat_fork *forkinfo)
+{
+	struct hfsmount * hfsmp;
+	struct cat_desc jdesc;
+	int lockflags;
+	int error;
+	
+	if (vcb->vcbSigWord != kHFSPlusSigWord)
+		return (0);
+
+	hfsmp = VCBTOHFS(vcb);
+
+	memset(&jdesc, 0, sizeof(struct cat_desc));
+	jdesc.cd_parentcnid = kRootDirID;
+	jdesc.cd_nameptr = (const u_int8_t *)name;
+	jdesc.cd_namelen = strlen(name);
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+	error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (error == 0) {
+		return (fattr->ca_fileid);
+	} else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		return (0);
+	}
+
+	return (0);	/* XXX what callers expect on an error */
+}
+
+
+/*
+ * On HFS Plus Volumes, there can be orphaned files or directories
+ * These are files or directories that were unlinked while busy. 
+ * If the volume was not cleanly unmounted then some of these may
+ * have persisted and need to be removed.
+ */
+void
+hfs_remove_orphans(struct hfsmount * hfsmp)
+{
+	struct BTreeIterator * iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	struct HFSPlusCatalogFile filerec;
+	struct HFSPlusCatalogKey * keyp;
+	struct proc *p = current_proc();
+	FCB *fcb;
+	ExtendedVCB *vcb;
+	char filename[32];
+	char tempname[32];
+	size_t namelen;
+	cat_cookie_t cookie;
+	int catlock = 0;
+	int catreserve = 0;
+	bool started_tr = false;
+	int lockflags;
+	int result;
+	int orphaned_files = 0;
+	int orphaned_dirs = 0;
+
+	bzero(&cookie, sizeof(cookie));
+
+	if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
+		return;
+
+	vcb = HFSTOVCB(hfsmp);
+	fcb = VTOF(hfsmp->hfs_catalog_vp);
+
+	btdata.bufferAddress = &filerec;
+	btdata.itemSize = sizeof(filerec);
+	btdata.itemCount = 1;
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	/* Build a key to "temp" */
+	keyp = (HFSPlusCatalogKey*)&iterator->key;
+	keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+	keyp->nodeName.length = 4;  /* "temp" */
+	keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
+	keyp->nodeName.unicode[0] = 't';
+	keyp->nodeName.unicode[1] = 'e';
+	keyp->nodeName.unicode[2] = 'm';
+	keyp->nodeName.unicode[3] = 'p';
+
+	/*
+	 * Position the iterator just before the first real temp file/dir.
+	 */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+	(void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	/* Visit all the temp files/dirs in the HFS+ private directory. */
+	for (;;) {
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+		result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (result)
+			break;
+		if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
+			break;
+		
+		(void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
+		                      (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
+		
+		(void) snprintf(tempname, sizeof(tempname), "%s%d",
+				HFS_DELETE_PREFIX, filerec.fileID);
+		
+		/*
+		 * Delete all files (and directories) named "tempxxx", 
+		 * where xxx is the file's cnid in decimal.
+		 *
+		 */
+		if (bcmp(tempname, filename, namelen + 1) != 0)
+			continue;
+
+		struct filefork dfork;
+		struct filefork rfork;
+		struct cnode cnode;
+		int mode = 0;
+
+		bzero(&dfork, sizeof(dfork));
+		bzero(&rfork, sizeof(rfork));
+		bzero(&cnode, sizeof(cnode));
+			
+		if (hfs_start_transaction(hfsmp) != 0) {
+			printf("hfs_remove_orphans: failed to start transaction\n");
+			goto exit;
+		}
+		started_tr = true;
+		
+		/*
+		 * Reserve some space in the Catalog file.
+		 */
+		if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
+			printf("hfs_remove_orphans: cat_preflight failed\n");
+			goto exit;
+		}
+		catreserve = 1;
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+		catlock = 1;
+
+		/* Build a fake cnode */
+		cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
+						&dfork.ff_data, &rfork.ff_data);
+		cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+		cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
+		cnode.c_desc.cd_namelen = namelen;
+		cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
+		cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
+
+		/* Position iterator at previous entry */
+		if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
+							NULL, NULL) != 0) {
+			break;
+		}
+
+		/* Truncate the file to zero (both forks) */
+		if (dfork.ff_blocks > 0) {
+			u_int64_t fsize;
+				
+			dfork.ff_cp = &cnode;
+			cnode.c_datafork = &dfork;
+			cnode.c_rsrcfork = NULL;
+			fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
+			while (fsize > 0) {
+				if (fsize > HFS_BIGFILE_SIZE) {
+					fsize -= HFS_BIGFILE_SIZE;
+				} else {
+					fsize = 0;
+				}
+
+				if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, 
+								  cnode.c_attr.ca_fileid, false) != 0) {
+					printf("hfs: error truncating data fork!\n");
+					break;
+				}
+
+				//
+				// if we're iteratively truncating this file down,
+				// then end the transaction and start a new one so
+				// that no one transaction gets too big.
+				//
+				if (fsize > 0) {
+					/* Drop system file locks before starting 
+					 * another transaction to preserve lock order.
+					 */
+					hfs_systemfile_unlock(hfsmp, lockflags);
+					catlock = 0;
+					hfs_end_transaction(hfsmp);
+
+					if (hfs_start_transaction(hfsmp) != 0) {
+						started_tr = false;
+						goto exit;
+					}
+					lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+					catlock = 1;
+				}
+			}
+		}
+
+		if (rfork.ff_blocks > 0) {
+			rfork.ff_cp = &cnode;
+			cnode.c_datafork = NULL;
+			cnode.c_rsrcfork = &rfork;
+			if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
+				printf("hfs: error truncating rsrc fork!\n");
+				break;
+			}
+		}
+
+		// Deal with extended attributes
+		if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
+			// hfs_removeallattr uses its own transactions
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			catlock = false;
+			hfs_end_transaction(hfsmp);
+
+			hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
+
+			if (!started_tr) {
+				if (hfs_start_transaction(hfsmp) != 0) {
+					printf("hfs_remove_orphans: failed to start transaction\n");
+					goto exit;
+				}
+				started_tr = true;
+			}
+
+			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+			catlock = 1;
+		}
+
+		/* Remove the file or folder record from the Catalog */	
+		if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
+			printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			catlock = 0;
+			hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			break;
+		}
+
+		mode = cnode.c_attr.ca_mode & S_IFMT;
+
+		if (mode == S_IFDIR) {
+			orphaned_dirs++;
+		}
+		else {
+			orphaned_files++;
+		}
+
+		/* Update parent and volume counts */	
+		hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+		if (mode == S_IFDIR) {
+			DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+		}
+
+		(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+						 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+
+		/* Drop locks and end the transaction */
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		cat_postflight(hfsmp, &cookie, p);
+		catlock = catreserve = 0;
+
+		/* 
+		   Now that Catalog is unlocked, update the volume info, making
+		   sure to differentiate between files and directories
+		*/
+		if (mode == S_IFDIR) {
+			hfs_volupdate(hfsmp, VOL_RMDIR, 0);
+		}
+		else{
+			hfs_volupdate(hfsmp, VOL_RMFILE, 0);
+		}
+
+		hfs_end_transaction(hfsmp);
+		started_tr = false;
+	} /* end for */
+
+exit:
+
+	if (orphaned_files > 0 || orphaned_dirs > 0)
+		printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
+	if (catlock) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+	if (catreserve) {
+		cat_postflight(hfsmp, &cookie, p);
+	}
+	if (started_tr) {
+		hfs_end_transaction(hfsmp);
+	}
+
+	hfs_free(iterator, sizeof(*iterator));
+	hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
+}
+
+
+/*
+ * This will return the correct logical block size for a given vnode.
+ * For most files, it is the allocation block size, for meta data like
+ * BTrees, this is kept as part of the BTree private nodeSize
+ */
+u_int32_t
+GetLogicalBlockSize(struct vnode *vp)
+{
+u_int32_t logBlockSize;
+	
+	hfs_assert(vp != NULL);
+
+	/* start with default */
+	logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
+
+	if (vnode_issystem(vp)) {
+		if (VTOF(vp)->fcbBTCBPtr != NULL) {
+			BTreeInfoRec			bTreeInfo;
+	
+			/*
+			 * We do not lock the BTrees, because if we are getting block..then the tree
+			 * should be locked in the first place.
+			 * We just want the nodeSize wich will NEVER change..so even if the world
+			 * is changing..the nodeSize should remain the same. Which argues why lock
+			 * it in the first place??
+			 */
+			
+			(void) BTGetInformation	(VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
+					
+			logBlockSize = bTreeInfo.nodeSize;
+
+		} else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
+				logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
+		}
+	}
+
+	hfs_assert(logBlockSize > 0);
+	
+	return logBlockSize;	
+}
+
+#if HFS_SPARSE_DEV
+static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
+{
+	struct vfsstatfs *vfsp;  /* 272 bytes */
+	uint64_t vfreeblks;
+	struct timeval now;
+
+	hfs_lock_mount(hfsmp);
+
+	vnode_t backing_vp = hfsmp->hfs_backingvp;
+	if (!backing_vp) {
+		hfs_unlock_mount(hfsmp);
+		return false;
+	}
+
+	// usecount is not enough; we need iocount
+	if (vnode_get(backing_vp)) {
+		hfs_unlock_mount(hfsmp);
+		*pfree_blks = 0;
+		return true;
+	}
+
+	uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
+	uint32_t bandblks	= hfsmp->hfs_sparsebandblks;
+	uint64_t maxblks	= hfsmp->hfs_backingfs_maxblocks;
+
+	hfs_unlock_mount(hfsmp);
+
+	mount_t backingfs_mp = vnode_mount(backing_vp);
+
+	microtime(&now);
+	if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
+		vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
+		hfsmp->hfs_last_backingstatfs = now.tv_sec;
+	}
+
+	if (!(vfsp = vfs_statfs(backingfs_mp))) {
+		vnode_put(backing_vp);
+		return false;
+	}
+
+	vfreeblks = vfsp->f_bavail;
+	/* Normalize block count if needed. */
+	if (vfsp->f_bsize != hfsmp->blockSize)
+		vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
+	if (vfreeblks > bandblks)
+		vfreeblks -= bandblks;
+	else
+		vfreeblks = 0;
+
+	/* 
+	 * Take into account any delayed allocations.  It is not
+	 * certain what the original reason for the "2 *" is.  Most
+	 * likely it is to allow for additional requirements in the
+	 * host file system and metadata required by disk images.  The
+	 * number of loaned blocks is likely to be small and we will
+	 * stop using them as we get close to the limit.
+	 */
+	loanedblks = 2 * loanedblks;
+	if (vfreeblks > loanedblks)
+		vfreeblks -= loanedblks;
+	else
+		vfreeblks = 0;
+
+	if (maxblks)
+		vfreeblks = MIN(vfreeblks, maxblks);
+
+	vnode_put(backing_vp);
+
+	*pfree_blks = vfreeblks;
+
+	return true;
+}
+#endif
+
+u_int32_t
+hfs_free_cnids(struct hfsmount * hfsmp)
+{
+	return HFS_MAX_FILES - hfsmp->hfs_filecount - hfsmp->hfs_dircount;
+}
+
+u_int32_t
+hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
+{
+	u_int32_t freeblks;
+	u_int32_t rsrvblks;
+	u_int32_t loanblks;
+
+	/*
+	 * We don't bother taking the mount lock
+	 * to look at these values since the values
+	 * themselves are each updated atomically
+	 * on aligned addresses.
+	 */
+	freeblks = hfsmp->freeBlocks;
+	rsrvblks = hfsmp->reserveBlocks;
+	loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
+	if (wantreserve) {
+		if (freeblks > rsrvblks)
+			freeblks -= rsrvblks;
+		else
+			freeblks = 0;
+	}
+	if (freeblks > loanblks)
+		freeblks -= loanblks;
+	else
+		freeblks = 0;
+
+#if HFS_SPARSE_DEV
+	/* 
+	 * When the underlying device is sparse, check the
+	 * available space on the backing store volume.
+	 */
+	uint64_t vfreeblks;
+	if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
+		freeblks = MIN(freeblks, vfreeblks);
+#endif /* HFS_SPARSE_DEV */
+
+	return (freeblks);
+}
+
+/*
+ * Map HFS Common errors (negative) to BSD error codes (positive).
+ * Positive errors (ie BSD errors) are passed through unchanged.
+ */
+short MacToVFSError(OSErr err)
+{
+	if (err >= 0)
+        	return err;
+
+	/* BSD/VFS internal errnos */
+	switch (err) {
+		case HFS_ERESERVEDNAME: /* -8 */
+			return err;
+	}
+
+	switch (err) {
+	case dskFulErr:			/*    -34 */
+	case btNoSpaceAvail:		/* -32733 */
+		return ENOSPC;
+	case fxOvFlErr:			/* -32750 */
+		return EOVERFLOW;
+	
+	case btBadNode:			/* -32731 */
+		return EIO;
+	
+	case memFullErr:		/*  -108 */
+		return ENOMEM;		/*   +12 */
+	
+	case cmExists:			/* -32718 */
+	case btExists:			/* -32734 */
+		return EEXIST;		/*    +17 */
+	
+	case cmNotFound:		/* -32719 */
+	case btNotFound:		/* -32735 */	
+		return ENOENT;		/*     28 */
+	
+	case cmNotEmpty:		/* -32717 */
+		return ENOTEMPTY;	/*     66 */
+	
+	case cmFThdDirErr:		/* -32714 */
+		return EISDIR;		/*     21 */
+	
+	case fxRangeErr:		/* -32751 */
+		return ERANGE;
+	
+	case bdNamErr:			/*   -37 */
+		return ENAMETOOLONG;	/*    63 */
+	
+	case paramErr:			/*   -50 */
+	case fileBoundsErr:		/* -1309 */
+		return EINVAL;		/*   +22 */
+	
+	case fsBTBadNodeSize:
+		return ENXIO;
+
+	default:
+		return EIO;		/*   +5 */
+	}
+}
+
+
+/*
+ * Find the current thread's directory hint for a given index.
+ *
+ * Requires an exclusive lock on directory cnode.
+ *
+ * Use detach if the cnode lock must be dropped while the hint is still active.
+ */
+directoryhint_t *
+hfs_getdirhint(struct cnode *dcp, int index, int detach)
+{
+	struct timeval tv;
+	directoryhint_t *hint;
+	boolean_t need_remove, need_init;
+	const u_int8_t * name;
+
+	microuptime(&tv);
+
+	/*
+	 *  Look for an existing hint first.  If not found, create a new one (when
+	 *  the list is not full) or recycle the oldest hint.  Since new hints are
+	 *  always added to the head of the list, the last hint is always the
+	 *  oldest.
+	 */
+	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
+		if (hint->dh_index == index)
+			break;
+	}
+	if (hint != NULL) { /* found an existing hint */
+		need_init = false;
+		need_remove = true;
+	} else { /* cannot find an existing hint */
+		need_init = true;
+		if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
+			/* Create a default directory hint */
+			hint = hfs_zalloc(HFS_DIRHINT_ZONE);
+			++dcp->c_dirhintcnt;
+			need_remove = false;
+		} else {				/* recycle the last (i.e., the oldest) hint */
+			hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
+			if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
+			    (name = hint->dh_desc.cd_nameptr)) {
+				hint->dh_desc.cd_nameptr = NULL;
+				hint->dh_desc.cd_namelen = 0;
+				hint->dh_desc.cd_flags &= ~CD_HASBUF;				
+				vfs_removename((const char *)name);
+			}
+			need_remove = true;
+		}
+	}
+
+	if (need_remove)
+		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
+
+	if (detach)
+		--dcp->c_dirhintcnt;
+	else
+		TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
+
+	if (need_init) {
+		hint->dh_index = index;
+		hint->dh_desc.cd_flags = 0;
+		hint->dh_desc.cd_encoding = 0;
+		hint->dh_desc.cd_namelen = 0;
+		hint->dh_desc.cd_nameptr = NULL;
+		hint->dh_desc.cd_parentcnid = dcp->c_fileid;
+		hint->dh_desc.cd_hint = dcp->c_childhint;
+		hint->dh_desc.cd_cnid = 0;
+	}
+	hint->dh_time = tv.tv_sec;
+	return (hint);
+}
+
+/*
+ * Release a single directory hint.
+ *
+ * Requires an exclusive lock on directory cnode.
+ */
+void
+hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
+{
+	const u_int8_t * name;
+	directoryhint_t *hint;
+
+	/* Check if item is on list (could be detached) */
+	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
+		if (hint == relhint) {
+			TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
+			--dcp->c_dirhintcnt;
+			break;
+		}
+	}
+	name = relhint->dh_desc.cd_nameptr;
+	if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
+		relhint->dh_desc.cd_nameptr = NULL;
+		relhint->dh_desc.cd_namelen = 0;
+		relhint->dh_desc.cd_flags &= ~CD_HASBUF;
+		vfs_removename((const char *)name);
+	}
+	hfs_zfree(relhint, HFS_DIRHINT_ZONE);
+}
+
+/*
+ * Release directory hints for given directory
+ *
+ * Requires an exclusive lock on directory cnode.
+ */
+void
+hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
+{
+	struct timeval tv;
+	directoryhint_t *hint, *prev;
+	const u_int8_t * name;
+
+	if (stale_hints_only)
+		microuptime(&tv);
+
+	/* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
+	for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
+		if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
+			break;  /* stop here if this entry is too new */
+		name = hint->dh_desc.cd_nameptr;
+		if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
+			hint->dh_desc.cd_nameptr = NULL;
+			hint->dh_desc.cd_namelen = 0;
+			hint->dh_desc.cd_flags &= ~CD_HASBUF;
+			vfs_removename((const char *)name);
+		}
+		prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
+		TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
+		hfs_zfree(hint, HFS_DIRHINT_ZONE);
+		--dcp->c_dirhintcnt;
+	}
+}
+
+/*
+ * Insert a detached directory hint back into the list of dirhints.
+ *
+ * Requires an exclusive lock on directory cnode.
+ */
+void
+hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
+{
+	directoryhint_t *test;
+
+	TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
+		if (test == hint)
+			panic("hfs_insertdirhint: hint %p already on list!", hint);
+	}
+
+	TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
+	++dcp->c_dirhintcnt;
+}
+
+/*
+ * Perform a case-insensitive compare of two UTF-8 filenames.
+ *
+ * Returns 0 if the strings match.
+ */
+int
+hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
+{
+	u_int16_t *ustr1, *ustr2;
+	size_t ulen1, ulen2;
+	size_t maxbytes;
+	int cmp = -1;
+
+	if (len1 != len2)
+		return (cmp);
+
+	maxbytes = kHFSPlusMaxFileNameChars << 1;
+	ustr1 = hfs_malloc(maxbytes << 1);
+	ustr2 = ustr1 + (maxbytes >> 1);
+
+	if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
+		goto out;
+	if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
+		goto out;
+	
+	cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
+out:
+	hfs_free(ustr1, maxbytes << 1);
+	return (cmp);
+}
+
+typedef struct jopen_cb_info {
+	mount_t mp;
+	off_t   jsize;
+	char   *desired_uuid;
+	struct  vnode *jvp;
+	size_t  blksize;
+	int     need_clean;
+	int     need_init;
+} jopen_cb_info;
+
+static int
+journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
+{
+	jopen_cb_info *ji = (jopen_cb_info *)arg;
+	char bsd_name[256];
+	int error;
+
+	strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
+	strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
+
+	if ((error = vnode_lookup(bsd_name, VNODE_LOOKUP_NOFOLLOW, &ji->jvp,
+							  vfs_context_kernel()))) {
+		printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
+		return 1;   // keep iterating
+	}
+
+	struct vnop_open_args oargs = {
+		.a_vp		= ji->jvp,
+		.a_mode		= FREAD | FWRITE,
+		.a_context	= vfs_context_kernel(),
+	};
+
+	if (spec_open(&oargs)) {
+		vnode_put(ji->jvp);
+		ji->jvp = NULL;
+		return 1;
+	}
+
+	// if the journal is dirty and we didn't specify a desired
+	// journal device uuid, then do not use the journal.  but
+	// if the journal is just invalid (e.g. it hasn't been
+	// initialized) then just set the need_init flag.
+	if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
+		error = journal_is_clean(ji->jvp, 0, ji->jsize,
+								 (void *)1, ji->blksize);
+		if (error == EBUSY) {
+			struct vnop_close_args cargs = {
+				.a_vp		= ji->jvp,
+				.a_fflag	= FREAD | FWRITE,
+				.a_context	= vfs_context_kernel()
+			};
+			spec_close(&cargs);
+			vnode_put(ji->jvp);
+			ji->jvp = NULL;
+			return 1;    // keep iterating
+		} else if (error == EINVAL) {
+			ji->need_init = 1;
+		}
+	}
+
+	if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
+		strlcpy(ji->desired_uuid, uuid_str, 128);
+	}
+	vnode_setmountedon(ji->jvp);
+	return 0;   // stop iterating
+}
+
+static vnode_t
+open_journal_dev(mount_t mp,
+				 const char *vol_device,
+				 int need_clean,
+				 char *uuid_str,
+				 char *machine_serial_num,
+				 off_t jsize,
+				 size_t blksize,
+				 int *need_init)
+{
+    int retry_counter=0;
+    jopen_cb_info ji;
+
+	ji.mp			= mp;
+    ji.jsize        = jsize;
+    ji.desired_uuid = uuid_str;
+    ji.jvp          = NULL;
+    ji.blksize      = blksize;
+    ji.need_clean   = need_clean;
+    ji.need_init    = 0;
+
+//    if (uuid_str[0] == '\0') {
+//	    printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
+//    } else {
+//	    printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
+//    }
+    while (ji.jvp == NULL && retry_counter++ < 4) {
+	    if (retry_counter > 1) {
+		    if (uuid_str[0]) {
+			    printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
+		    } else {
+			    printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
+		    }
+		    delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
+	    }
+
+	    hfs_iterate_media_with_content(EXTJNL_CONTENT_TYPE_UUID,
+									   journal_open_cb, &ji);
+    }
+
+    if (ji.jvp == NULL) {
+	    printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
+		   vol_device, uuid_str, machine_serial_num);
+    }
+
+    *need_init = ji.need_init;
+
+    return ji.jvp;
+}
+
+void hfs_close_jvp(hfsmount_t *hfsmp)
+{
+	if (!hfsmp || !hfsmp->jvp || hfsmp->jvp == hfsmp->hfs_devvp)
+		return;
+
+	vnode_clearmountedon(hfsmp->jvp);
+	struct vnop_close_args cargs = {
+		.a_vp		= hfsmp->jvp,
+		.a_fflag	= FREAD | FWRITE,
+		.a_context	= vfs_context_kernel()
+	};
+	spec_close(&cargs);
+	vnode_put(hfsmp->jvp);
+	hfsmp->jvp = NULL;
+}
+
+int
+hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
+					   void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
+					   HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
+{
+	JournalInfoBlock *jibp;
+	struct buf       *jinfo_bp, *bp;
+	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
+	int               retval, write_jibp = 0;
+	uint32_t		  blksize = hfsmp->hfs_logical_block_size;
+	struct vnode     *devvp;
+	struct hfs_mount_args *args = _args;
+	u_int32_t	  jib_flags;
+	u_int64_t	  jib_offset;
+	u_int64_t	  jib_size;
+	const char *dev_name;
+	
+	devvp = hfsmp->hfs_devvp;
+	dev_name = vnode_getname_printable(devvp);
+
+	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
+		arg_flags  = args->journal_flags;
+		arg_tbufsz = args->journal_tbuffer_size;
+	}
+
+	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
+				
+	jinfo_bp = NULL;
+	retval = (int)buf_meta_bread(devvp,
+						(daddr64_t)((embeddedOffset/blksize) + 
+						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+						hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
+	if (retval) {
+		if (jinfo_bp) {
+			buf_brelse(jinfo_bp);
+		}
+		goto cleanup_dev_name;
+	}
+	
+	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
+	jib_flags  = SWAP_BE32(jibp->flags);
+	jib_size   = SWAP_BE64(jibp->size);
+
+	if (jib_flags & kJIJournalInFSMask) {
+		hfsmp->jvp = hfsmp->hfs_devvp;
+		jib_offset = SWAP_BE64(jibp->offset);
+	} else {
+	    int need_init=0;
+	
+	    // if the volume was unmounted cleanly then we'll pick any
+	    // available external journal partition
+	    //
+	    if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
+		    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+	    }
+
+	    hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
+									  dev_name,
+									  !(jib_flags & kJIJournalNeedInitMask),
+									  (char *)&jibp->ext_jnl_uuid[0],
+									  (char *)&jibp->machine_serial_num[0],
+									  jib_size,
+									  hfsmp->hfs_logical_block_size,
+									  &need_init);
+	    if (hfsmp->jvp == NULL) {
+		    buf_brelse(jinfo_bp);
+		    retval = EROFS;
+		    goto cleanup_dev_name;
+	    } else {
+		    if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
+		    }
+	    }
+
+	    jib_offset = 0;
+	    write_jibp = 1;
+	    if (need_init) {
+		    jib_flags |= kJIJournalNeedInitMask;
+	    }
+	}
+
+	// save this off for the hack-y check in hfs_remove()
+	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
+	hfsmp->jnl_size  = jib_size;
+
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
+	    // if the file system is read-only, check if the journal is empty.
+	    // if it is, then we can allow the mount.  otherwise we have to
+	    // return failure.
+	    retval = journal_is_clean(hfsmp->jvp,
+				      jib_offset + embeddedOffset,
+				      jib_size,
+				      devvp,
+				      hfsmp->hfs_logical_block_size);
+
+	    hfsmp->jnl = NULL;
+
+	    buf_brelse(jinfo_bp);
+
+	    if (retval) {
+		    const char *name = vnode_getname_printable(devvp);
+		    printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
+		    name);
+		    vnode_putname_printable(name);
+	    }
+
+	    goto cleanup_dev_name;
+	}
+
+	if (jib_flags & kJIJournalNeedInitMask) {
+		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
+			   jib_offset + embeddedOffset, jib_size);
+		hfsmp->jnl = journal_create(hfsmp->jvp,
+									jib_offset + embeddedOffset,
+									jib_size,
+									devvp,
+									blksize,
+									arg_flags,
+									arg_tbufsz,
+									hfs_sync_metadata, hfsmp->hfs_mp,
+									hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+
+		// no need to start a transaction here... if this were to fail
+		// we'd just re-init it on the next mount.
+		jib_flags &= ~kJIJournalNeedInitMask;
+		jibp->flags  = SWAP_BE32(jib_flags);
+		buf_bwrite(jinfo_bp);
+		jinfo_bp = NULL;
+		jibp     = NULL;
+	} else { 
+		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
+		//	   jib_offset + embeddedOffset,
+		//	   jib_size, SWAP_BE32(vhp->blockSize));
+				
+		hfsmp->jnl = journal_open(hfsmp->jvp,
+								  jib_offset + embeddedOffset,
+								  jib_size,
+								  devvp,
+								  blksize,
+								  arg_flags,
+								  arg_tbufsz,
+								  hfs_sync_metadata, hfsmp->hfs_mp,
+								  hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+
+		if (write_jibp) {
+			buf_bwrite(jinfo_bp);
+		} else {
+			buf_brelse(jinfo_bp);
+		}
+		jinfo_bp = NULL;
+		jibp     = NULL;
+
+		if (hfsmp->jnl && mdbp) {
+			// reload the mdb because it could have changed
+			// if the journal had to be replayed.
+			if (mdb_offset == 0) {
+				mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
+			}
+			bp = NULL;
+			retval = (int)buf_meta_bread(devvp, 
+					HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+					hfsmp->hfs_physical_block_size, cred, &bp);
+			if (retval) {
+				if (bp) {
+					buf_brelse(bp);
+				}
+				printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
+					   retval);
+				goto cleanup_dev_name;
+			}
+			bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
+			buf_brelse(bp);
+			bp = NULL;
+		}
+	}
+
+	// if we expected the journal to be there and we couldn't
+	// create it or open it then we have to bail out.
+	if (hfsmp->jnl == NULL) {
+		printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
+		retval = EINVAL;
+		goto cleanup_dev_name;
+	}
+
+	retval = 0;
+	
+cleanup_dev_name:
+	vnode_putname_printable(dev_name);
+	return retval;
+}
+
+
+//
+// This function will go and re-locate the .journal_info_block and
+// the .journal files in case they moved (which can happen if you
+// run Norton SpeedDisk).  If we fail to find either file we just
+// disable journaling for this volume and return.  We turn off the
+// journaling bit in the vcb and assume it will get written to disk
+// later (if it doesn't on the next mount we'd do the same thing
+// again which is harmless).  If we disable journaling we don't
+// return an error so that the volume is still mountable.
+//
+// If the info we find for the .journal_info_block and .journal files
+// isn't what we had stored, we re-set our cached info and proceed
+// with opening the journal normally.
+//
+static int
+hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
+{
+	JournalInfoBlock *jibp;
+	struct buf       *jinfo_bp;
+	int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
+	int               retval, write_jibp = 0, recreate_journal = 0;
+	struct vnode     *devvp;
+	struct cat_attr   jib_attr, jattr;
+	struct cat_fork   jib_fork, jfork;
+	ExtendedVCB      *vcb;
+	u_int32_t            fid;
+	struct hfs_mount_args *args = _args;
+	u_int32_t	  jib_flags;
+	u_int64_t	  jib_offset;
+	u_int64_t	  jib_size;
+	
+	devvp = hfsmp->hfs_devvp;
+	vcb = HFSTOVCB(hfsmp);
+	
+	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
+		if (args->journal_disable) {
+			return 0;
+		}
+
+		arg_flags  = args->journal_flags;
+		arg_tbufsz = args->journal_tbuffer_size;
+	}
+
+	fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
+	if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
+		printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
+			   fid ? jib_fork.cf_extents[0].startBlock : 0);
+		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+		return 0;
+	}
+	hfsmp->hfs_jnlinfoblkid = fid;
+
+	// make sure the journal_info_block begins where we think it should.
+	if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
+		printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
+			   SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
+
+		vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
+		vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
+		recreate_journal = 1;
+	}
+
+
+	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
+	jinfo_bp = NULL;
+	retval = (int)buf_meta_bread(devvp,
+						(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + 
+						((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
+						hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
+	if (retval) {
+		if (jinfo_bp) {
+			buf_brelse(jinfo_bp);
+		}
+		printf("hfs: can't read journal info block. disabling journaling.\n");
+		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+		return 0;
+	}
+
+	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
+	jib_flags  = SWAP_BE32(jibp->flags);
+	jib_offset = SWAP_BE64(jibp->offset);
+	jib_size   = SWAP_BE64(jibp->size);
+
+	fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
+	if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
+		printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
+			   fid ? jfork.cf_extents[0].startBlock : 0);
+		buf_brelse(jinfo_bp);
+		vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
+		return 0;
+	}
+	hfsmp->hfs_jnlfileid = fid;
+
+	// make sure the journal file begins where we think it should.
+	if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
+		printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
+			   (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
+
+		jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
+		write_jibp   = 1;
+		recreate_journal = 1;
+	}
+
+	// check the size of the journal file.
+	if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
+		printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
+			   jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
+		
+		jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
+		write_jibp = 1;
+		recreate_journal = 1;
+	}
+	
+	if (jib_flags & kJIJournalInFSMask) {
+		hfsmp->jvp = hfsmp->hfs_devvp;
+		jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
+	} else {
+	    const char *dev_name;
+	    int need_init = 0;
+	
+	    dev_name = vnode_getname_printable(devvp);
+
+            // since the journal is empty, just use any available external journal
+	    *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
+
+	    // this fills in the uuid of the device we actually get
+	    hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
+									  dev_name,
+									  !(jib_flags & kJIJournalNeedInitMask),
+									  (char *)&jibp->ext_jnl_uuid[0],
+									  (char *)&jibp->machine_serial_num[0],
+									  jib_size,
+									  hfsmp->hfs_logical_block_size,
+									  &need_init);
+	    if (hfsmp->jvp == NULL) {
+		    buf_brelse(jinfo_bp);
+		    vnode_putname_printable(dev_name);
+		    return EROFS;
+	    } else {
+		    if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
+			    strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
+		    }
+	    }
+	    jib_offset = 0;
+	    recreate_journal = 1;
+	    write_jibp = 1;
+	    if (need_init) {
+		    jib_flags |= kJIJournalNeedInitMask;
+	    }
+	    vnode_putname_printable(dev_name);
+	}
+
+	// save this off for the hack-y check in hfs_remove()
+	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
+	hfsmp->jnl_size  = jib_size;
+
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
+	    // if the file system is read-only, check if the journal is empty.
+	    // if it is, then we can allow the mount.  otherwise we have to
+	    // return failure.
+	    retval = journal_is_clean(hfsmp->jvp,
+				      jib_offset,
+				      jib_size,
+				      devvp,
+		                      hfsmp->hfs_logical_block_size);
+
+	    hfsmp->jnl = NULL;
+
+	    buf_brelse(jinfo_bp);
+
+	    if (retval) {
+		    const char *name = vnode_getname_printable(devvp);
+		    printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n", 
+		    name);
+		    vnode_putname_printable(name);
+	    }
+
+	    return retval;
+	}
+
+	if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
+		printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
+			   jib_offset, jib_size);
+		hfsmp->jnl = journal_create(hfsmp->jvp,
+									jib_offset,
+									jib_size,
+									devvp,
+									hfsmp->hfs_logical_block_size,
+									arg_flags,
+									arg_tbufsz,
+									hfs_sync_metadata, hfsmp->hfs_mp,
+									hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+
+		// no need to start a transaction here... if this were to fail
+		// we'd just re-init it on the next mount.
+		jib_flags &= ~kJIJournalNeedInitMask;
+		write_jibp   = 1;
+
+	} else { 
+		//
+		// if we weren't the last person to mount this volume
+		// then we need to throw away the journal because it
+		// is likely that someone else mucked with the disk.
+		// if the journal is empty this is no big deal.  if the
+		// disk is dirty this prevents us from replaying the
+		// journal over top of changes that someone else made.
+		//
+		arg_flags |= JOURNAL_RESET;
+		
+		//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
+		//	   jib_offset,
+		//	   jib_size, SWAP_BE32(vhp->blockSize));
+				
+		hfsmp->jnl = journal_open(hfsmp->jvp,
+								  jib_offset,
+								  jib_size,
+								  devvp,
+								  hfsmp->hfs_logical_block_size,
+								  arg_flags,
+								  arg_tbufsz,
+								  hfs_sync_metadata, hfsmp->hfs_mp,
+								  hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+	}
+			
+
+	if (write_jibp) {
+		jibp->flags  = SWAP_BE32(jib_flags);
+		jibp->offset = SWAP_BE64(jib_offset);
+		jibp->size   = SWAP_BE64(jib_size);
+
+		buf_bwrite(jinfo_bp);
+	} else {
+		buf_brelse(jinfo_bp);
+	} 
+	jinfo_bp = NULL;
+	jibp     = NULL;
+
+	// if we expected the journal to be there and we couldn't
+	// create it or open it then we have to bail out.
+	if (hfsmp->jnl == NULL) {
+		printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
+		return EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Calculate the allocation zone for metadata.
+ *
+ * This zone includes the following:
+ *	Allocation Bitmap file
+ *	Overflow Extents file
+ *	Journal file
+ *	Quota files
+ *	Clustered Hot files
+ *	Catalog file
+ *
+ *                          METADATA ALLOCATION ZONE
+ * ____________________________________________________________________________
+ * |    |    |     |               |                              |           |
+ * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
+ * |____|____|_____|_______________|______________________________|___________|
+ *
+ * <------------------------------- N * 128 MB ------------------------------->
+ *
+ */
+#define GIGABYTE  (u_int64_t)(1024*1024*1024)
+
+#define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
+#define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
+
+/* Initialize the metadata zone.
+ *
+ * If the size of  the volume is less than the minimum size for
+ * metadata zone, metadata zone is disabled.
+ *
+ * If disable is true, disable metadata zone unconditionally.
+ */
+void
+hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
+{
+	ExtendedVCB  *vcb;
+	u_int64_t  fs_size;
+	u_int64_t  zonesize;
+	u_int64_t  temp;
+	u_int64_t  filesize;
+	u_int32_t  blk;
+	int  items, really_do_it=1;
+
+	vcb = HFSTOVCB(hfsmp);
+	fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
+
+	/*
+	 * For volumes less than 10 GB, don't bother.
+	 */
+	if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
+		really_do_it = 0;
+	}
+	
+	/*
+	 * Skip non-journaled volumes as well.
+	 */
+	if (hfsmp->jnl == NULL) {
+		really_do_it = 0;
+	}
+
+	/* If caller wants to disable metadata zone, do it */
+	if (disable == true) {
+		really_do_it = 0;
+	}
+
+	/*
+	 * Start with space for the boot blocks and Volume Header.
+	 * 1536 = byte offset from start of volume to end of volume header:
+	 * 1024 bytes is the offset from the start of the volume to the
+	 * start of the volume header (defined by the volume format)
+	 * + 512 bytes (the size of the volume header).
+	 */
+	zonesize = roundup(1536, hfsmp->blockSize);
+	
+	/*
+	 * Add the on-disk size of allocation bitmap.
+	 */
+	zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
+	
+	/* 
+	 * Add space for the Journal Info Block and Journal (if they're in
+	 * this file system).
+	 */
+	if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
+		zonesize += hfsmp->blockSize + hfsmp->jnl_size;
+	}
+	
+	/*
+	 * Add the existing size of the Extents Overflow B-tree.
+	 * (It rarely grows, so don't bother reserving additional room for it.)
+	 */
+	zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
+	
+	/*
+	 * If there is an Attributes B-tree, leave room for 11 clumps worth.
+	 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
+	 * When installing a full OS install onto a 20GB volume, we use
+	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
+	 * us with another 3 or 4 clumps worth before we need another extent.
+	 */
+	if (hfsmp->hfs_attribute_cp) {
+		zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
+	}
+	
+	/*
+	 * Leave room for 11 clumps of the Catalog B-tree.
+	 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
+	 * When installing a full OS install onto a 20GB volume, we use
+	 * 7 to 8 clumps worth of space (depending on packages), so that leaves
+	 * us with another 3 or 4 clumps worth before we need another extent.
+	 */
+	zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
+	
+	/*
+	 * Add space for hot file region.
+	 *
+	 * ...for now, use 5 MB per 1 GB (0.5 %)
+	 */
+	filesize = (fs_size / 1024) * 5;
+	if (filesize > HOTBAND_MAXIMUM_SIZE)
+		filesize = HOTBAND_MAXIMUM_SIZE;
+	else if (filesize < HOTBAND_MINIMUM_SIZE)
+		filesize = HOTBAND_MINIMUM_SIZE;
+	/*
+	 * Calculate user quota file requirements.
+	 */
+	if (hfsmp->hfs_flags & HFS_QUOTAS) {
+		items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
+		if (items < QF_MIN_USERS)
+			items = QF_MIN_USERS;
+		else if (items > QF_MAX_USERS)
+			items = QF_MAX_USERS;
+		if (!powerof2(items)) {
+			int x = items;
+			items = 4;
+			while (x>>1 != 1) {
+				x = x >> 1;
+				items = items << 1;
+			}
+		}
+		filesize += (items + 1) * sizeof(struct dqblk);
+		/*
+		 * Calculate group quota file requirements.
+		 *
+		 */
+		items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
+		if (items < QF_MIN_GROUPS)
+			items = QF_MIN_GROUPS;
+		else if (items > QF_MAX_GROUPS)
+			items = QF_MAX_GROUPS;
+		if (!powerof2(items)) {
+			int x = items;
+			items = 4;
+			while (x>>1 != 1) {
+				x = x >> 1;
+				items = items << 1;
+			}
+		}
+		filesize += (items + 1) * sizeof(struct dqblk);
+	}
+	zonesize += filesize;
+
+	/*
+	 * Round up entire zone to a bitmap block's worth.
+	 * The extra space goes to the catalog file and hot file area.
+	 */
+	temp = zonesize;
+	zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
+	hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
+	/*
+	 * If doing the round up for hfs_min_alloc_start would push us past
+	 * allocLimit, then just reset it back to 0.  Though using a value 
+	 * bigger than allocLimit would not cause damage in the block allocator
+	 * code, this value could get stored in the volume header and make it out 
+	 * to disk, making the volume header technically corrupt.
+	 */
+	if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
+		hfsmp->hfs_min_alloc_start = 0;
+	}
+
+	if (really_do_it == 0) {
+		/* If metadata zone needs to be disabled because the 
+		 * volume was truncated, clear the bit and zero out 
+		 * the values that are no longer needed.
+		 */
+		if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
+			/* Disable metadata zone */
+			hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
+			
+			/* Zero out mount point values that are not required */
+			hfsmp->hfs_catalog_maxblks = 0;
+			hfsmp->hfs_hotfile_maxblks = 0;
+			hfsmp->hfs_hotfile_start = 0;
+			hfsmp->hfs_hotfile_end = 0;
+			hfsmp->hfs_hotfile_freeblks = 0;
+			hfsmp->hfs_metazone_start = 0;
+			hfsmp->hfs_metazone_end = 0;
+		}
+		
+		return;
+	}
+	
+	temp = zonesize - temp;  /* temp has extra space */
+	filesize += temp / 3;
+	hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
+
+	if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+		hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
+	} else {
+		hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
+	}
+
+	/* Convert to allocation blocks. */
+	blk = zonesize / vcb->blockSize;
+
+	/* The default metadata zone location is at the start of volume. */
+	hfsmp->hfs_metazone_start = 1;
+	hfsmp->hfs_metazone_end = blk - 1;
+	
+	/* The default hotfile area is at the end of the zone. */
+	if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
+		hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
+		hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
+		hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
+	}
+	else {
+		hfsmp->hfs_hotfile_start = 0;
+		hfsmp->hfs_hotfile_end = 0;
+		hfsmp->hfs_hotfile_freeblks = 0;
+	}
+#if DEBUG
+	printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
+	printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
+	printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
+#endif
+
+	hfsmp->hfs_flags |= HFS_METADATA_ZONE;
+}
+
+
+static u_int32_t
+hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
+{
+	ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
+	int  lockflags;
+	int  freeblocks;
+
+	if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
+		//
+		// This is only used at initialization time and on an ssd
+		// we'll get the real info from the hotfile btree user
+		// info
+		//
+		return 0;
+	}
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	freeblocks = MetaZoneFreeBlocks(vcb);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	/* Minus Extents overflow file reserve. */
+	if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
+		freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
+	}
+
+	/* Minus catalog file reserve. */
+	if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
+		freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
+	}
+	
+	if (freeblocks < 0)
+		freeblocks = 0;
+
+	// printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
+	return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
+}
+
+/*
+ * Determine if a file is a "virtual" metadata file.
+ * This includes journal and quota files.
+ */
+int
+hfs_virtualmetafile(struct cnode *cp)
+{
+	const char * filename;
+
+
+	if (cp->c_parentcnid != kHFSRootFolderID)
+		return (0);
+
+	filename = (const char *)cp->c_desc.cd_nameptr;
+	if (filename == NULL)
+		return (0);
+
+	if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
+	    (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
+	    (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
+	    (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
+	    (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
+		return (1);
+
+	return (0);
+}
+
+void hfs_syncer_lock(struct hfsmount *hfsmp)
+{
+    hfs_lock_mount(hfsmp);
+}
+
+void hfs_syncer_unlock(struct hfsmount *hfsmp)
+{
+    hfs_unlock_mount(hfsmp);
+}
+
+void hfs_syncer_wait(struct hfsmount *hfsmp, struct timespec *ts)
+{
+    msleep(&hfsmp->hfs_syncer_thread, &hfsmp->hfs_mutex, PWAIT,
+           "hfs_syncer_wait", ts);
+}
+
+void hfs_syncer_wakeup(struct hfsmount *hfsmp)
+{
+    wakeup(&hfsmp->hfs_syncer_thread);
+}
+
+uint64_t hfs_usecs_to_deadline(uint64_t usecs)
+{
+    uint64_t deadline;
+    clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
+    return deadline;
+}
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+void hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+    // If we don't have a syncer or we get called by the syncer, just return
+    if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
+		|| current_thread() == hfsmp->hfs_syncer_thread) {
+        return;
+	}
+
+    hfs_syncer_lock(hfsmp);
+
+    if (!timerisset(&hfsmp->hfs_sync_req_oldest))
+        microuptime(&hfsmp->hfs_sync_req_oldest);
+
+    /* If hfs_unmount is running, it will clear the HFS_RUN_SYNCER
+	   flag. Also, we don't want to queue again if there is a sync
+	   outstanding. */
+    if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
+		|| hfsmp->hfs_syncer_thread) {
+        hfs_syncer_unlock(hfsmp);
+        return;
+    }
+
+    hfsmp->hfs_syncer_thread = (void *)1;
+
+    hfs_syncer_unlock(hfsmp);
+
+	kernel_thread_start(hfs_syncer, hfsmp, &hfsmp->hfs_syncer_thread);
+	thread_deallocate(hfsmp->hfs_syncer_thread);
+}
+
+int
+hfs_start_transaction(struct hfsmount *hfsmp)
+{
+	int ret = 0, unlock_on_err = 0;
+	thread_t thread = current_thread();
+
+#ifdef HFS_CHECK_LOCK_ORDER
+	/*
+	 * You cannot start a transaction while holding a system
+	 * file lock. (unless the transaction is nested.)
+	 */
+	if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
+		if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
+			panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
+		}
+		if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
+			panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
+		}
+		if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
+			panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
+		}
+	}
+#endif /* HFS_CHECK_LOCK_ORDER */
+
+again:
+
+	if (hfsmp->jnl) {
+		if (journal_owner(hfsmp->jnl) != thread) {
+			/*
+			 * The global lock should be held shared if journal is 
+			 * active to prevent disabling.  If we're not the owner 
+			 * of the journal lock, verify that we're not already
+			 * holding the global lock exclusive before moving on.	 
+			 */
+			if (hfsmp->hfs_global_lockowner == thread) {
+				ret = EBUSY;
+				goto out;
+			}
+
+			hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+			// Things could have changed
+			if (!hfsmp->jnl) {
+				hfs_unlock_global(hfsmp);
+				goto again;
+			}
+
+			OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+			unlock_on_err = 1;
+		}
+	} else {
+		// No journal
+		if (hfsmp->hfs_global_lockowner != thread) {
+			hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
+
+			// Things could have changed
+			if (hfsmp->jnl) {
+				hfs_unlock_global(hfsmp);
+				goto again;
+			}
+
+			OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+			unlock_on_err = 1;
+		}
+	}
+
+	/* If a downgrade to read-only mount is in progress, no other
+	 * thread than the downgrade thread is allowed to modify 
+	 * the file system.
+	 */
+	if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) && 
+	    hfsmp->hfs_downgrading_thread != thread) {
+		ret = EROFS;
+		goto out;
+	}
+
+	if (hfsmp->jnl) {
+		ret = journal_start_transaction(hfsmp->jnl);
+	} else {
+		ret = 0;
+	}
+
+	if (ret == 0)
+		++hfsmp->hfs_transaction_nesting;
+
+out:
+	if (ret != 0 && unlock_on_err) {
+		hfs_unlock_global (hfsmp);
+		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+	}
+
+    return ret;
+}
+
+int
+hfs_end_transaction(struct hfsmount *hfsmp)
+{
+    int ret;
+
+	hfs_assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
+	hfs_assert(hfsmp->hfs_transaction_nesting > 0);
+
+	if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
+		hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
+
+	bool need_unlock = !--hfsmp->hfs_transaction_nesting;
+
+	if (hfsmp->jnl) {
+		ret = journal_end_transaction(hfsmp->jnl);
+	} else {
+		ret = 0;
+	}
+
+	if (need_unlock) {
+		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+		hfs_unlock_global (hfsmp);
+		hfs_sync_ejectable(hfsmp);
+	}
+
+    return ret;
+}
+
+
+void 
+hfs_journal_lock(struct hfsmount *hfsmp) 
+{
+	/* Only peek at hfsmp->jnl while holding the global lock */
+	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+	if (hfsmp->jnl) {
+		journal_lock(hfsmp->jnl);
+	}
+	hfs_unlock_global (hfsmp);
+}
+
+void 
+hfs_journal_unlock(struct hfsmount *hfsmp) 
+{
+	/* Only peek at hfsmp->jnl while holding the global lock */
+	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+	if (hfsmp->jnl) {
+		journal_unlock(hfsmp->jnl);
+	}
+	hfs_unlock_global (hfsmp);
+}
+
+/*
+ * Flush the contents of the journal to the disk.
+ *
+ *  - HFS_FLUSH_JOURNAL
+ *      Wait to write in-memory journal to the disk consistently.
+ *      This means that the journal still contains uncommitted
+ *      transactions and the file system metadata blocks in
+ *      the journal transactions might be written asynchronously
+ *      to the disk.  But there is no guarantee that they are
+ *      written to the disk before returning to the caller.
+ *      Note that this option is sufficient for file system
+ *      data integrity as it guarantees consistent journal
+ *      content on the disk.
+ *
+ *  - HFS_FLUSH_JOURNAL_META
+ *      Wait to write in-memory journal to the disk
+ *      consistently, and also wait to write all asynchronous
+ *      metadata blocks to its corresponding locations
+ *      consistently on the disk. This is overkill in normal
+ *      scenarios but is useful whenever the metadata blocks
+ *      are required to be consistent on-disk instead of
+ *      just the journalbeing consistent; like before live
+ *      verification and live volume resizing.  The update of the
+ *      metadata doesn't include a barrier of track cache flush.
+ *
+ *  - HFS_FLUSH_FULL
+ *      HFS_FLUSH_JOURNAL + force a track cache flush to media
+ *
+ *  - HFS_FLUSH_CACHE
+ *      Force a track cache flush to media.
+ *
+ *  - HFS_FLUSH_BARRIER
+ *      Barrier-only flush to ensure write order
+ *
+ */
+errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
+{
+	errno_t error = 0;
+	int options = 0;
+	dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
+
+	switch (mode) {
+		case HFS_FLUSH_JOURNAL_META:
+			// wait for journal, metadata blocks and previous async flush to finish
+			SET(options, JOURNAL_WAIT_FOR_IO);
+
+			// no break
+
+		case HFS_FLUSH_JOURNAL:
+		case HFS_FLUSH_JOURNAL_BARRIER:
+		case HFS_FLUSH_FULL:
+
+			if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
+			    !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+				mode = HFS_FLUSH_FULL;
+
+			if (mode == HFS_FLUSH_FULL)
+				SET(options, JOURNAL_FLUSH_FULL);
+
+			/* Only peek at hfsmp->jnl while holding the global lock */
+			hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+
+			if (hfsmp->jnl)
+				error = journal_flush(hfsmp->jnl, options);
+
+			hfs_unlock_global (hfsmp);
+
+			/*
+			 * This may result in a double barrier as
+			 * journal_flush may have issued a barrier itself
+			 */
+			if (mode == HFS_FLUSH_JOURNAL_BARRIER)
+				error = VNOP_IOCTL(hfsmp->hfs_devvp,
+				    DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+				    FWRITE, NULL);
+
+			break;
+
+		case HFS_FLUSH_CACHE:
+			// Do a full sync
+			sync_req.options = 0;
+
+			// no break
+
+		case HFS_FLUSH_BARRIER:
+			// If barrier only flush doesn't support, fall back to use full flush.
+			if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
+				sync_req.options = 0;
+
+			error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
+					   FWRITE, NULL);
+			break;
+
+		default:
+			error = EINVAL;
+	}
+
+	return error;
+}
+
+/*
+ * hfs_erase_unused_nodes
+ *
+ * Check wheter a volume may suffer from unused Catalog B-tree nodes that
+ * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
+ * zeroes to the unused nodes.
+ *
+ * How do we detect when a volume needs this repair?  We can't always be
+ * certain.  If a volume was created after a certain date, then it may have
+ * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
+ * clump, we can assume that if a Catalog B-tree is larger than its clump size,
+ * that means that the entire first clump must have been written to, which means
+ * there shouldn't be unused and unwritten nodes in that first clump, and this
+ * repair is not needed.
+ *
+ * We have defined a bit in the Volume Header's attributes to indicate when the
+ * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
+ * As will fsck_hfs when it repairs the unused nodes.
+ */
+int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
+{
+	int result; 
+	struct filefork *catalog;
+	int lockflags;
+	
+	if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
+	{
+		/* This volume has already been checked and repaired. */
+		return 0;
+	}
+
+	if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
+	{
+		/* This volume is too old to have had the problem. */
+		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+		return 0;
+	}
+
+	catalog = hfsmp->hfs_catalog_cp->c_datafork;
+	if (catalog->ff_size > catalog->ff_clumpsize)
+	{
+		/* The entire first clump must have been in use at some point. */
+		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+		return 0;
+	}
+	
+	/*
+	 * If we get here, we need to zero out those unused nodes.
+	 *
+	 * We start a transaction and lock the catalog since we're going to be
+	 * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
+	 * do its writing via the journal, because that would be too much I/O
+	 * to fit in a transaction, and it's a pain to break it up into multiple
+	 * transactions.  (It behaves more like growing a B-tree would.)
+	 */
+	printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
+	result = hfs_start_transaction(hfsmp);
+	if (result)
+		goto done;
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+	result = BTZeroUnusedNodes(catalog);
+	vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	hfs_end_transaction(hfsmp);
+	if (result == 0)
+		hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
+	printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
+
+done:
+	return result;
+}
+
+
+int
+check_for_dataless_file(struct vnode *vp, uint64_t op_type)
+{
+	int error;
+
+	if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || decmpfs_cnode_cmp_type(VTOCMP(vp)) != DATALESS_CMPFS_TYPE) {
+		// there's nothing to do, it's not dataless
+		return 0;
+	}
+
+	/* Swap files are special; ignore them */
+	if (vnode_isswap(vp)) {
+		return 0;	
+	}
+
+	// printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
+	error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
+	if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
+		error = 0;
+	} else if (error) {
+		if (error == EAGAIN) {
+			printf("hfs: dataless: timed out waiting for namespace handler...\n");
+			// XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
+			return 0;				
+		} else if (error == EINTR) {
+			// printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
+			return EINTR;
+		}
+	} else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
+		//
+		// if we're here, the dataless bit is still set on the file 
+		// which means it didn't get handled.  we return an error
+		// but it's presently ignored by all callers of this function.
+		//
+		// XXXdbg - EDATANOTPRESENT is what we really need...
+		//
+		return EBADF;
+	}				
+
+	return error;
+}
+
+
+//
+// NOTE: this function takes care of starting a transaction and
+//       acquiring the systemfile lock so that it can call
+//       cat_update().
+//
+// NOTE: do NOT hold and cnode locks while calling this function
+//       to avoid deadlocks (because we take a lock on the root
+//       cnode)
+//
+int
+hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
+{
+	struct vnode *rvp;
+	struct cnode *cp;
+	int error;
+
+	error = hfs_vfs_root(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
+	if (error) {
+		return error;
+	}
+
+	cp = VTOC(rvp);
+	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
+		return error;
+	}
+	struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
+	
+	int lockflags;
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+		return error;
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+					
+	if (extinfo->document_id == 0) {
+		// initialize this to start at 3 (one greater than the root-dir id)
+		extinfo->document_id = 3;
+	}
+
+	*docid = extinfo->document_id++;
+
+	// mark the root cnode dirty
+	cp->c_flag |= C_MODIFIED;
+	hfs_update(cp->c_vp, 0);
+
+	hfs_systemfile_unlock (hfsmp, lockflags);
+	(void) hfs_end_transaction(hfsmp);
+		
+	(void) hfs_unlock(cp);
+
+	vnode_put(rvp);
+	rvp = NULL;
+
+	return 0;
+}
+
+
+/* 
+ * Return information about number of file system allocation blocks 
+ * taken by metadata on a volume.  
+ *  
+ * This function populates struct hfsinfo_metadata with allocation blocks 
+ * used by extents overflow btree, catalog btree, bitmap, attribute btree, 
+ * journal file, and sum of all of the above.  
+ */
+int 
+hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
+{
+	int lockflags = 0;
+	int ret_lockflags = 0;
+
+	/* Zero out the output buffer */
+	bzero(hinfo, sizeof(struct hfsinfo_metadata));
+
+	/* 
+	 * Getting number of allocation blocks for all btrees 
+	 * should be a quick operation, so we grab locks for 
+	 * all of them at the same time
+	 */
+	lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
+	ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+	/* 
+	 * Make sure that we were able to acquire all locks requested 
+	 * to protect us against conditions like unmount in progress.
+	 */
+	if ((lockflags & ret_lockflags) != lockflags) {
+		/* Release any locks that were acquired */
+		hfs_systemfile_unlock(hfsmp, ret_lockflags);
+		return EPERM;
+	}
+
+	/* Get information about all the btrees */
+	hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
+	hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
+	hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
+	hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
+
+	/* Done with btrees, give up the locks */
+	hfs_systemfile_unlock(hfsmp, ret_lockflags);
+
+	/* Get information about journal file */
+	hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
+
+	/* Calculate total number of metadata blocks */
+	hinfo->total = hinfo->extents + hinfo->catalog + 
+			hinfo->allocation + hinfo->attribute +
+			hinfo->journal;
+	
+	return 0;
+}
+
+static int
+hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
+{
+	vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
+
+	return 0;
+}
+
+int hfs_freeze(struct hfsmount *hfsmp)
+{
+	// First make sure some other process isn't freezing
+	hfs_lock_mount(hfsmp);
+	while (hfsmp->hfs_freeze_state != HFS_THAWED) {
+		if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+				   PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
+			hfs_unlock_mount(hfsmp);
+			return EINTR;
+		}
+	}
+
+	// Stop new syncers from starting
+	hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
+
+	// Now wait for all syncers to finish
+	while (hfsmp->hfs_syncers) {
+		if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
+			   PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
+			hfs_thaw_locked(hfsmp);
+			hfs_unlock_mount(hfsmp);
+			return EINTR;				
+		}
+	}
+	hfs_unlock_mount(hfsmp);
+
+	// flush things before we get started to try and prevent
+	// dirty data from being paged out while we're frozen.
+	// note: we can't do this once we're in the freezing state because
+	// other threads will need to take the global lock
+	vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
+
+	// Block everything in hfs_lock_global now
+	hfs_lock_mount(hfsmp);
+	hfsmp->hfs_freeze_state = HFS_FREEZING;
+	hfsmp->hfs_freezing_thread = current_thread();
+	hfs_unlock_mount(hfsmp);
+
+	/* Take the exclusive lock to flush out anything else that
+	   might have the global lock at the moment and also so we
+	   can flush the journal. */
+	hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
+	journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
+	hfs_unlock_global(hfsmp);
+
+	// don't need to iterate on all vnodes, we just need to
+	// wait for writes to the system files and the device vnode
+	//
+	// Now that journal flush waits for all metadata blocks to 
+	// be written out, waiting for btree writes is probably no
+	// longer required.
+	if (HFSTOVCB(hfsmp)->extentsRefNum)
+		vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
+	if (HFSTOVCB(hfsmp)->catalogRefNum)
+		vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
+	if (HFSTOVCB(hfsmp)->allocationsRefNum)
+		vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
+	if (hfsmp->hfs_attribute_vp)
+		vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
+	vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
+
+	// We're done, mark frozen
+	hfs_lock_mount(hfsmp);
+	hfsmp->hfs_freeze_state  = HFS_FROZEN;
+	hfsmp->hfs_freezing_proc = current_proc();
+	hfs_unlock_mount(hfsmp);
+
+	return 0;
+}
+
+int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
+{
+	hfs_lock_mount(hfsmp);
+
+	if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
+		hfs_unlock_mount(hfsmp);
+		return EINVAL;
+	}
+	if (process && hfsmp->hfs_freezing_proc != process) {
+		hfs_unlock_mount(hfsmp);
+		return EPERM;
+	}
+
+	hfs_thaw_locked(hfsmp);
+
+	hfs_unlock_mount(hfsmp);
+
+	return 0;
+}
+
+static void hfs_thaw_locked(struct hfsmount *hfsmp)
+{
+	hfsmp->hfs_freezing_proc = NULL;
+	hfsmp->hfs_freeze_state = HFS_THAWED;
+
+	wakeup(&hfsmp->hfs_freeze_state);
+}
+
+uintptr_t obfuscate_addr(void *addr)
+{
+	vm_offset_t new_addr;
+	vm_kernel_addrperm_external((vm_offset_t)addr, &new_addr);
+	return new_addr;
+}
+
+#if CONFIG_HFS_STD
+/*
+ * Convert HFS encoded string into UTF-8
+ *
+ * Unicode output is fully decomposed
+ * '/' chars are converted to ':'
+ */
+int
+hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
+{
+	int error;
+	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
+	ItemCount uniCount;
+	size_t utf8len;
+	hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
+	u_int8_t pascal_length = 0;
+
+	/*
+	 * Validate the length of the Pascal-style string before passing it
+	 * down to the decoding engine.
+	 */
+	pascal_length = *((const u_int8_t*)(hfs_str));
+	if (pascal_length > 31) {
+		/* invalid string; longer than 31 bytes */
+		error = EINVAL;
+		return error;
+	}
+
+	error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
+
+	if (uniCount == 0)
+		error = EINVAL;
+
+	if (error == 0) {
+		error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
+		if (error == ENAMETOOLONG)
+			*actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
+		else
+			*actualDstLen = utf8len;
+	}
+
+	return error;
+}
+
+/*
+ * Convert UTF-8 string into HFS encoding
+ *
+ * ':' chars are converted to '/'
+ * Assumes input represents fully decomposed Unicode
+ */
+int
+utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
+{
+	int error;
+	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
+	size_t ucslen;
+
+	error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
+	if (error == 0)
+		error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
+
+	return error;
+}
+
+/*
+ * Convert Unicode string into HFS encoding
+ *
+ * ':' chars are converted to '/'
+ * Assumes input represents fully decomposed Unicode
+ */
+int
+unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
+{
+	int error;
+	unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
+
+	error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
+	if (error && retry) {
+		error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
+	}
+	return error;
+}
+
+#endif // CONFIG_HFS_STD
+
+static uint64_t hfs_allocated __attribute__((aligned(8)));
+
+#if HFS_MALLOC_DEBUG
+
+#warning HFS_MALLOC_DEBUG is on
+
+#include <libkern/OSDebug.h>
+#include "hfs_alloc_trace.h"
+
+struct alloc_debug_header {
+	uint32_t magic;
+	uint32_t size;
+	uint64_t sequence;
+	LIST_ENTRY(alloc_debug_header) chain;
+	void *backtrace[HFS_ALLOC_BACKTRACE_LEN];
+};
+
+enum {
+	HFS_ALLOC_MAGIC = 0x68667361,	// "hfsa"
+	HFS_ALLOC_DEAD  = 0x68667364,	// "hfsd"
+};
+
+static LIST_HEAD(, alloc_debug_header) hfs_alloc_list;
+static lck_mtx_t *hfs_alloc_mtx;
+static int hfs_alloc_tracing;
+static uint64_t hfs_alloc_sequence;
+
+void hfs_alloc_trace_enable(void)
+{
+	if (hfs_alloc_tracing)
+		return;
+
+	// Not thread-safe, but this is debug so who cares
+	extern lck_grp_t *hfs_mutex_group;
+	extern lck_attr_t *hfs_lock_attr;
+
+	if (!hfs_alloc_mtx) {
+		hfs_alloc_mtx = lck_mtx_alloc_init(hfs_mutex_group, hfs_lock_attr);
+		LIST_INIT(&hfs_alloc_list);
+	}
+
+	// Using OSCompareAndSwap in lieu of a barrier
+	OSCompareAndSwap(hfs_alloc_tracing, true, &hfs_alloc_tracing);
+}
+
+void hfs_alloc_trace_disable(void)
+{
+	if (!hfs_alloc_tracing)
+		return;
+
+	hfs_alloc_tracing = false;
+
+	lck_mtx_lock_spin(hfs_alloc_mtx);
+
+	struct alloc_debug_header *hdr;
+	LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
+		hdr->chain.le_prev = NULL;
+	}
+	LIST_INIT(&hfs_alloc_list);
+
+	lck_mtx_unlock(hfs_alloc_mtx);
+}
+
+static int hfs_handle_alloc_tracing SYSCTL_HANDLER_ARGS
+{
+	int v = hfs_alloc_tracing;
+
+	int err = sysctl_handle_int(oidp, &v, 0, req);
+	if (err || req->newptr == USER_ADDR_NULL || v == hfs_alloc_tracing)
+		return err;
+
+	if (v)
+		hfs_alloc_trace_enable();
+	else
+		hfs_alloc_trace_disable();
+
+	return 0;
+}
+
+HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_tracing,
+		   CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0,
+		   hfs_handle_alloc_tracing, "I", "Allocation tracing")
+
+static int hfs_handle_alloc_trace_info SYSCTL_HANDLER_ARGS
+{
+	if (!hfs_alloc_tracing) {
+		struct hfs_alloc_trace_info info = {};
+		return sysctl_handle_opaque(oidp, &info, sizeof(info), req);
+	}
+
+	const int size = 128 * 1024;
+	struct hfs_alloc_trace_info *info = kalloc(size);
+
+	const int max_entries = ((size - sizeof(*info))
+							 / sizeof(struct hfs_alloc_info_entry));
+
+	info->entry_count = 0;
+	info->more = false;
+
+	lck_mtx_lock_spin(hfs_alloc_mtx);
+
+	struct alloc_debug_header *hdr;
+	LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
+		if (info->entry_count == max_entries) {
+			info->more = true;
+			break;
+		}
+		vm_offset_t o;
+		vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
+		info->entries[info->entry_count].ptr = o;
+		info->entries[info->entry_count].size = hdr->size;
+		info->entries[info->entry_count].sequence = hdr->sequence;
+		for (int i = 0; i < HFS_ALLOC_BACKTRACE_LEN; ++i) {
+			vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[i], &o);
+			info->entries[info->entry_count].backtrace[i] = o;
+		}
+		++info->entry_count;
+	}
+
+	lck_mtx_unlock(hfs_alloc_mtx);
+
+	int err = sysctl_handle_opaque(oidp, info,
+								   sizeof(*info) + info->entry_count
+								   * sizeof(struct hfs_alloc_info_entry),
+								   req);
+
+	kfree(info, size);
+
+	return err;
+}
+
+HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_trace_info,
+		   CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED, NULL, 0,
+		   hfs_handle_alloc_trace_info, "-", "Allocation trace info")
+
+bool hfs_dump_allocations(void)
+{
+	if (!hfs_allocated)
+		return false;
+
+	lck_mtx_lock(hfs_alloc_mtx);
+
+	struct alloc_debug_header *hdr;
+	LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
+		vm_offset_t o;
+		vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
+		printf(" -- 0x%lx:%llu <%u> --\n", o, hdr->sequence, hdr->size);
+		for (int j = 0; j < HFS_ALLOC_BACKTRACE_LEN && hdr->backtrace[j]; ++j) {
+			vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[j], &o);
+			printf("0x%lx\n", o);
+		}
+	}
+
+	lck_mtx_unlock(hfs_alloc_mtx);
+
+	return true;
+}
+
+#endif
+
+HFS_SYSCTL(QUAD, _vfs_generic_hfs, OID_AUTO, allocated,
+		   CTLFLAG_RD | CTLFLAG_LOCKED, &hfs_allocated, "Memory allocated")
+
+void *hfs_malloc(size_t size)
+{
+#if HFS_MALLOC_DEBUG
+	hfs_assert(size <= 0xffffffff);
+
+	struct alloc_debug_header *hdr;
+
+	void *ptr;
+	ptr = kalloc(size + sizeof(*hdr));
+
+	hdr = ptr + size;
+
+	hdr->magic = HFS_ALLOC_MAGIC;
+	hdr->size = size;
+
+	if (hfs_alloc_tracing) {
+		OSBacktrace(hdr->backtrace, HFS_ALLOC_BACKTRACE_LEN);
+		lck_mtx_lock_spin(hfs_alloc_mtx);
+		LIST_INSERT_HEAD(&hfs_alloc_list, hdr, chain);
+		hdr->sequence = ++hfs_alloc_sequence;
+		lck_mtx_unlock(hfs_alloc_mtx);
+	} else
+		hdr->chain.le_prev = NULL;
+#else
+	void *ptr;
+	ptr = kalloc(size);
+#endif
+
+	OSAddAtomic64(size, &hfs_allocated);
+
+	return ptr;
+}
+
+void hfs_free(void *ptr, size_t size)
+{
+	if (!ptr)
+		return;
+
+	OSAddAtomic64(-(int64_t)size, &hfs_allocated);
+
+#if HFS_MALLOC_DEBUG
+	struct alloc_debug_header *hdr = ptr + size;
+
+	hfs_assert(hdr->magic == HFS_ALLOC_MAGIC);
+	hfs_assert(hdr->size == size);
+
+	hdr->magic = HFS_ALLOC_DEAD;
+
+	if (hdr->chain.le_prev) {
+		lck_mtx_lock_spin(hfs_alloc_mtx);
+		LIST_REMOVE(hdr, chain);
+		lck_mtx_unlock(hfs_alloc_mtx);
+	}
+
+	kfree(ptr, size + sizeof(*hdr));
+#else
+	kfree(ptr, size);
+#endif
+}
+
+void *hfs_mallocz(size_t size)
+{
+	void *ptr = hfs_malloc(size);
+	bzero(ptr, size);
+	return ptr;
+}
+
+// -- Zone allocator-related structures and routines --
+
+hfs_zone_entry_t hfs_zone_entries[HFS_NUM_ZONES] = {
+	{ HFS_CNODE_ZONE, sizeof(struct cnode), "HFS node", true },
+	{ HFS_FILEFORK_ZONE, sizeof(struct filefork), "HFS fork", true },
+	{ HFS_DIRHINT_ZONE, sizeof(struct directoryhint), "HFS dirhint", true }
+};
+
+hfs_zone_t hfs_zones[HFS_NUM_ZONES];
+
+void hfs_init_zones(void) {
+	for (int i = 0; i < HFS_NUM_ZONES; i++) {
+		hfs_zones[i].hz_zone = zinit(hfs_zone_entries[i].hze_elem_size, 1024 * 1024, PAGE_SIZE, hfs_zone_entries[i].hze_name);
+		hfs_zones[i].hz_elem_size = hfs_zone_entries[i].hze_elem_size;
+		
+		zone_change(hfs_zones[i].hz_zone, Z_CALLERACCT, false);
+		if (hfs_zone_entries[i].hze_noencrypt)
+			zone_change(hfs_zones[i].hz_zone, Z_NOENCRYPT, true);
+	}
+}
+
+void *hfs_zalloc(hfs_zone_kind_t zone)
+{
+	OSAddAtomic64(hfs_zones[zone].hz_elem_size, &hfs_allocated);
+	
+	return zalloc(hfs_zones[zone].hz_zone);
+}
+
+void hfs_zfree(void *ptr, hfs_zone_kind_t zone)
+{
+	OSAddAtomic64(-(int64_t)hfs_zones[zone].hz_elem_size, &hfs_allocated);
+	
+	zfree(hfs_zones[zone].hz_zone, ptr);
+}
+
+struct hfs_sysctl_chain *sysctl_list;
+
+void hfs_sysctl_register(void)
+{
+	struct hfs_sysctl_chain *e = sysctl_list;
+	while (e) {
+		sysctl_register_oid(e->oid);
+		e = e->next;
+	}
+}
+
+void hfs_sysctl_unregister(void)
+{
+	struct hfs_sysctl_chain *e = sysctl_list;
+	while (e) {
+		sysctl_unregister_oid(e->oid);
+		e = e->next;
+	}
+}
+
+void hfs_assert_fail(const char *file, unsigned line, const char *expr)
+{
+	Assert(file, line, expr);
+	__builtin_unreachable();
+}
diff --git a/core/hfs_vnops.c b/core/hfs_vnops.c
new file mode 100644
index 0000000..db88785
--- /dev/null
+++ b/core/hfs_vnops.c
@@ -0,0 +1,7622 @@
+/*
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <libkern/OSAtomic.h>
+#include <stdbool.h>
+#include <sys/systm.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode_if.h>
+#include <sys/malloc.h>
+#include <sys/ubc.h>
+#include <sys/paths.h>
+#include <sys/quota.h>
+#include <sys/time.h>
+#include <sys/disk.h>
+#include <sys/kauth.h>
+#include <sys/fsctl.h>
+#include <sys/xattr.h>
+#include <sys/decmpfs.h>
+#include <sys/mman.h>
+#include <sys/doc_tombstone.h>
+#include <sys/namei.h>
+#include <string.h>
+#include <sys/fsevents.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <vfs/vfs_support.h>
+
+#include <sys/kdebug.h>
+#include <sys/sysctl.h>
+#include <stdbool.h>
+
+#include "hfs.h"
+#include "hfs_catalog.h"
+#include "hfs_cnode.h"
+#include "hfs_dbg.h"
+#include "hfs_mount.h"
+#include "hfs_quota.h"
+#include "hfs_endian.h"
+#include "hfs_kdebug.h"
+#include "hfs_cprotect.h"
+
+#if HFS_CONFIG_KEY_ROLL
+#include "hfs_key_roll.h"
+#endif
+
+#include "BTreesInternal.h"
+#include "FileMgrInternal.h"
+
+/* Global vfs data structures for hfs */
+
+/*
+ * Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is
+ * 'no').  At some point this might need to move into VFS and we might
+ * need to provide an API to get at it, but for now, this is only used
+ * by HFS+.
+ */
+int always_do_fullfsync = 0;
+SYSCTL_DECL(_vfs_generic);
+HFS_SYSCTL(INT, _vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW | CTLFLAG_LOCKED, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called")
+
+int hfs_makenode(struct vnode *dvp, struct vnode **vpp,
+                        struct componentname *cnp, struct vnode_attr *vap,
+                        vfs_context_t ctx);
+int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p);
+int hfs_metasync_all(struct hfsmount *hfsmp);
+
+int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
+                         int, int);
+int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
+                          int, int, int, struct vnode *, int);
+
+/* Used here and in cnode teardown -- for symlinks */
+int hfs_removefile_callback(struct buf *bp, void *hfsmp);
+
+enum {
+	HFS_MOVE_DATA_INCLUDE_RSRC		= 1,
+};
+typedef uint32_t hfs_move_data_options_t;
+
+static int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, 
+						 hfs_move_data_options_t options);
+static int hfs_move_fork(filefork_t *srcfork, cnode_t *src, 
+ 						 filefork_t *dstfork, cnode_t *dst);
+
+
+static int hfs_exchangedata_getxattr (struct vnode *vp, uint32_t name_selector, void **buffer, size_t *xattr_size);
+static int hfs_exchangedata_setxattr (struct hfsmount *hfsmp, uint32_t fileid, 
+										uint32_t name_selector, void *buffer, size_t xattr_size);
+
+enum XATTR_NAME_ENTRIES {
+	quarantine = 0,
+	MAX_NUM_XATTR_NAMES	//must be last
+};
+
+
+/* These are special EAs that follow the content in exchangedata(2). */
+const char *XATTR_NAMES [MAX_NUM_XATTR_NAMES] = { "com.apple.quarantine" };
+
+#define MAX_EXCHANGE_EA_SIZE 4096
+
+#if HFS_COMPRESSION
+static int hfs_move_compressed(cnode_t *from_vp, cnode_t *to_vp);
+#endif
+
+decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
+
+#if FIFO
+static int hfsfifo_read(struct vnop_read_args *);
+static int hfsfifo_write(struct vnop_write_args *);
+static int hfsfifo_close(struct vnop_close_args *);
+
+extern int (**fifo_vnodeop_p)(void *);
+#endif /* FIFO */
+
+int hfs_vnop_close(struct vnop_close_args*);
+int hfs_vnop_exchange(struct vnop_exchange_args*);
+int hfs_vnop_fsync(struct vnop_fsync_args*);
+int hfs_vnop_mkdir(struct vnop_mkdir_args*);
+int hfs_vnop_mknod(struct vnop_mknod_args*);
+int hfs_vnop_getattr(struct vnop_getattr_args*);
+int hfs_vnop_open(struct vnop_open_args*);
+int hfs_vnop_readdir(struct vnop_readdir_args*);
+int hfs_vnop_rename(struct vnop_rename_args*);
+int hfs_vnop_renamex(struct vnop_renamex_args*);
+int hfs_vnop_rmdir(struct vnop_rmdir_args*);
+int hfs_vnop_symlink(struct vnop_symlink_args*);
+int hfs_vnop_setattr(struct vnop_setattr_args*);
+int hfs_vnop_readlink(struct vnop_readlink_args *);
+int hfs_vnop_pathconf(struct vnop_pathconf_args *);
+int hfs_vnop_mmap(struct vnop_mmap_args *ap);
+int hfsspec_read(struct vnop_read_args *);
+int hfsspec_write(struct vnop_write_args *);
+int hfsspec_close(struct vnop_close_args *);
+
+/* Options for hfs_removedir and hfs_removefile */
+#define HFSRM_SKIP_RESERVE  0x01
+
+
+
+/*****************************************************************************
+*
+* Common Operations on vnodes
+*
+*****************************************************************************/
+
+/*
+ * Is the given cnode either the .journal or .journal_info_block file on
+ * a volume with an active journal?  Many VNOPs use this to deny access
+ * to those files.
+ *
+ * Note: the .journal file on a volume with an external journal still
+ * returns true here, even though it does not actually hold the contents
+ * of the volume's journal.
+ */
+bool
+hfs_is_journal_file(struct hfsmount *hfsmp, struct cnode *cp)
+{
+	if (hfsmp->jnl != NULL &&
+	    (cp->c_fileid == hfsmp->hfs_jnlinfoblkid ||
+	     cp->c_fileid == hfsmp->hfs_jnlfileid)) {
+		return true;
+	} else {
+		return false;
+	}
+}
+
+/*
+ * Create a regular file.
+ */
+int
+hfs_vnop_create(struct vnop_create_args *ap)
+{
+	/*
+	 * We leave handling of certain race conditions here to the caller
+	 * which will have a better understanding of the semantics it
+	 * requires.  For example, if it turns out that the file exists,
+	 * it would be wrong of us to return a reference to the existing
+	 * file because the caller might not want that and it would be
+	 * misleading to suggest the file had been created when it hadn't
+	 * been.  Note that our NFS server code does not set the
+	 * VA_EXCLUSIVE flag so you cannot assume that callers don't want
+	 * EEXIST errors if it's not set.  The common case, where users
+	 * are calling open with the O_CREAT mode, is handled in VFS; when
+	 * we return EEXIST, it will loop and do the look-up again.
+	 */
+	return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context);
+}
+
+/*
+ * Make device special file.
+ */
+int
+hfs_vnop_mknod(struct vnop_mknod_args *ap)
+{
+	struct vnode_attr *vap = ap->a_vap;
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode **vpp = ap->a_vpp;
+	struct cnode *cp;
+	int error;
+
+	if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord) {
+		return (ENOTSUP);
+	}
+
+	/* Create the vnode */
+	error = hfs_makenode(dvp, vpp, ap->a_cnp, vap, ap->a_context);
+	if (error)
+		return (error);
+
+	cp = VTOC(*vpp);
+	cp->c_touch_acctime = TRUE;
+	cp->c_touch_chgtime = TRUE;
+	cp->c_touch_modtime = TRUE;
+
+	if ((vap->va_rdev != VNOVAL) &&
+	    (vap->va_type == VBLK || vap->va_type == VCHR))
+		cp->c_rdev = vap->va_rdev;
+
+	return (0);
+}
+
+#if HFS_COMPRESSION
+/* 
+ *	hfs_ref_data_vp(): returns the data fork vnode for a given cnode. 
+ *	In the (hopefully rare) case where the data fork vnode is not 
+ *	present, it will use hfs_vget() to create a new vnode for the
+ *	data fork. 
+ *	
+ *	NOTE: If successful and a vnode is returned, the caller is responsible
+ *	for releasing the returned vnode with vnode_rele().
+ */
+static int
+hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock)
+{
+	int vref = 0;
+
+	if (!data_vp || !cp) /* sanity check incoming parameters */
+		return EINVAL;
+	
+	/* maybe we should take the hfs cnode lock here, and if so, use the skiplock parameter to tell us not to */
+
+	if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+	struct vnode *c_vp = cp->c_vp;
+	if (c_vp) {
+		/* we already have a data vnode */
+		*data_vp = c_vp;
+		vref = vnode_ref(*data_vp);
+		if (!skiplock) hfs_unlock(cp);
+		if (vref == 0) {
+			return 0;
+		}
+		return EINVAL;
+	}
+	/* no data fork vnode in the cnode, so ask hfs for one. */
+
+	if (!cp->c_rsrc_vp) {
+		/* if we don't have either a c_vp or c_rsrc_vp, we can't really do anything useful */
+		*data_vp = NULL;
+		if (!skiplock) hfs_unlock(cp);
+		return EINVAL;
+	}
+	
+	if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1, 0) &&
+		0 != data_vp) {
+		vref = vnode_ref(*data_vp);
+		vnode_put(*data_vp);
+		if (!skiplock) hfs_unlock(cp);
+		if (vref == 0) {
+			return 0;
+		}
+		return EINVAL;
+	}
+	/* there was an error getting the vnode */
+	*data_vp = NULL;
+	if (!skiplock) hfs_unlock(cp);
+	return EINVAL;
+}
+
+/*
+ *	hfs_lazy_init_decmpfs_cnode(): returns the decmpfs_cnode for a cnode,
+ *	allocating it if necessary; returns NULL if there was an allocation error.
+ *  function is non-static so that it can be used from the FCNTL handler.
+ */
+decmpfs_cnode *
+hfs_lazy_init_decmpfs_cnode(struct cnode *cp)
+{
+	if (!cp->c_decmp) {
+		decmpfs_cnode *dp = decmpfs_cnode_alloc();
+		decmpfs_cnode_init(dp);
+		if (!OSCompareAndSwapPtr(NULL, dp, (void * volatile *)&cp->c_decmp)) {
+			/* another thread got here first, so free the decmpfs_cnode we allocated */
+			decmpfs_cnode_destroy(dp);
+			decmpfs_cnode_free(dp);
+		}
+	}
+	
+	return cp->c_decmp;
+}
+
+/*
+ *	hfs_file_is_compressed(): returns 1 if the file is compressed, and 0 (zero) if not.
+ *	if the file's compressed flag is set, makes sure that the decmpfs_cnode field
+ *	is allocated by calling hfs_lazy_init_decmpfs_cnode(), then makes sure it is populated,
+ *	or else fills it in via the decmpfs_file_is_compressed() function.
+ */
+int
+hfs_file_is_compressed(struct cnode *cp, int skiplock)
+{
+	int ret = 0;
+	
+	/* fast check to see if file is compressed. If flag is clear, just answer no */
+	if (!(cp->c_bsdflags & UF_COMPRESSED)) {
+		return 0;
+	}
+
+	decmpfs_cnode *dp = hfs_lazy_init_decmpfs_cnode(cp);
+	if (!dp) {
+		/* error allocating a decmpfs cnode, treat the file as uncompressed */
+		return 0;
+	}
+	
+	/* flag was set, see if the decmpfs_cnode state is valid (zero == invalid) */
+	uint32_t decmpfs_state = decmpfs_cnode_get_vnode_state(dp);
+	switch(decmpfs_state) {
+		case FILE_IS_COMPRESSED:
+		case FILE_IS_CONVERTING: /* treat decompressing files as if they are compressed */
+			return 1;
+		case FILE_IS_NOT_COMPRESSED:
+			return 0;
+		/* otherwise the state is not cached yet */
+	}
+	
+	/* decmpfs hasn't seen this file yet, so call decmpfs_file_is_compressed() to init the decmpfs_cnode struct */
+	struct vnode *data_vp = NULL;
+	if (0 == hfs_ref_data_vp(cp, &data_vp, skiplock)) {
+		if (data_vp) {
+			ret = decmpfs_file_is_compressed(data_vp, VTOCMP(data_vp)); // fill in decmpfs_cnode
+			vnode_rele(data_vp);
+		}
+	}
+	return ret;
+}
+
+/*	hfs_uncompressed_size_of_compressed_file() - get the uncompressed size of the file.
+ *	if the caller has passed a valid vnode (has a ref count > 0), then hfsmp and fid are not required.
+ *	if the caller doesn't have a vnode, pass NULL in vp, and pass valid hfsmp and fid.
+ *	files size is returned in size (required)
+ *	if the indicated file is a directory (or something that doesn't have a data fork), then this call
+ *	will return an error and the caller should fall back to treating the item as an uncompressed file
+ */
+int
+hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock)
+{
+	int ret = 0;
+	int putaway = 0;									/* flag to remember if we used hfs_vget() */
+
+	if (!size) {
+		return EINVAL;									/* no place to put the file size */
+	}
+
+	if (NULL == vp) {
+		if (!hfsmp || !fid) {							/* make sure we have the required parameters */
+			return EINVAL;
+		}
+		if (0 != hfs_vget(hfsmp, fid, &vp, skiplock, 0)) {		/* vnode is null, use hfs_vget() to get it */
+			vp = NULL;
+		} else {
+			putaway = 1;								/* note that hfs_vget() was used to aquire the vnode */
+		}
+	}
+	/* this double check for compression (hfs_file_is_compressed)
+	 * ensures the cached size is present in case decmpfs hasn't 
+	 * encountered this node yet.
+	 */
+	if (vp) {
+		if (hfs_file_is_compressed(VTOC(vp), skiplock) ) {
+			*size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp));	/* file info will be cached now, so get size */
+		} else if (VTOCMP(vp)) {
+			uint32_t cmp_type = decmpfs_cnode_cmp_type(VTOCMP(vp));
+
+			if (cmp_type == DATALESS_CMPFS_TYPE) {
+				*size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp));	/* file info will be cached now, so get size */
+				ret = 0;
+			} else if (cmp_type >= CMP_MAX && VTOC(vp)->c_datafork) {
+				// if we don't recognize this type, just use the real data fork size
+				*size = VTOC(vp)->c_datafork->ff_size;
+				ret = 0;
+			} else
+				ret = EINVAL;
+		} else
+			ret = EINVAL;
+	}
+
+	if (putaway) {		/* did we use hfs_vget() to get this vnode? */
+		vnode_put(vp);	/* if so, release it and set it to null */
+		vp = NULL;
+	}
+	return ret;
+}
+
+int
+hfs_hides_rsrc(vfs_context_t ctx, struct cnode *cp, int skiplock)
+{
+	if (ctx == decmpfs_ctx)
+		return 0;
+	if (!hfs_file_is_compressed(cp, skiplock))
+		return 0;
+	return decmpfs_hides_rsrc(ctx, cp->c_decmp);
+}
+
+int
+hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skiplock)
+{
+	if (ctx == decmpfs_ctx)
+		return 0;
+	if (!hfs_file_is_compressed(cp, skiplock))
+		return 0;
+	return decmpfs_hides_xattr(ctx, cp->c_decmp, name);
+}
+#endif /* HFS_COMPRESSION */
+
+/*
+ * Open a file/directory.
+ */
+int
+hfs_vnop_open(struct vnop_open_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct filefork *fp;
+	struct timeval tv;
+	int error;
+	static int past_bootup = 0;
+	struct cnode *cp = VTOC(vp);
+	struct hfsmount *hfsmp = VTOHFS(vp);
+
+#if CONFIG_PROTECT
+	error = cp_handle_open(vp, ap->a_mode);
+	if (error)
+		return error;
+#endif
+
+#if HFS_COMPRESSION
+	if (ap->a_mode & FWRITE) {
+		/* open for write */
+		if ( hfs_file_is_compressed(cp, 1) ) { /* 1 == don't take the cnode lock */
+			/* opening a compressed file for write, so convert it to decompressed */
+			struct vnode *data_vp = NULL;
+			error = hfs_ref_data_vp(cp, &data_vp, 1); /* 1 == don't take the cnode lock */
+			if (0 == error) {
+				if (data_vp) {
+					error = decmpfs_decompress_file(data_vp, VTOCMP(data_vp), -1, 1, 0);
+					vnode_rele(data_vp);
+				} else {
+					error = EINVAL;
+				}
+			}
+			if (error != 0)
+				return error;
+		}
+	} else {
+		/* open for read */
+		if (hfs_file_is_compressed(cp, 1) ) { /* 1 == don't take the cnode lock */
+			if (VNODE_IS_RSRC(vp)) {
+				/* opening the resource fork of a compressed file, so nothing to do */
+			} else {
+				/* opening a compressed file for read, make sure it validates */
+				error = decmpfs_validate_compressed_file(vp, VTOCMP(vp));
+				if (error != 0)
+					return error;
+			}
+		}
+	}
+#endif
+
+	/*
+	 * Files marked append-only must be opened for appending.
+	 */
+	if ((cp->c_bsdflags & APPEND) && !vnode_isdir(vp) &&
+	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+		return (EPERM);
+
+	if (vnode_issystem(vp))
+		return (EBUSY);  /* file is in use by the kernel */
+
+	/* Don't allow journal to be opened externally. */
+	if (hfs_is_journal_file(hfsmp, cp))
+		return (EPERM);
+
+	bool have_lock = false;
+
+#if CONFIG_PROTECT
+	if (ISSET(ap->a_mode, FENCRYPTED) && cp->c_cpentry && vnode_isreg(vp)) {
+		bool have_trunc_lock = false;
+
+#if HFS_CONFIG_KEY_ROLL
+	again:
+#endif
+
+		if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			if (have_trunc_lock)
+				hfs_unlock_truncate(cp, 0);
+			return error;
+		}
+
+		have_lock = true;
+
+		if (cp->c_cpentry->cp_raw_open_count + 1
+			< cp->c_cpentry->cp_raw_open_count) {
+			// Overflow; too many raw opens on this file
+			hfs_unlock(cp);
+			if (have_trunc_lock)
+				hfs_unlock_truncate(cp, 0);
+			return ENFILE;
+		}
+
+#if HFS_CONFIG_KEY_ROLL
+		if (cp_should_auto_roll(hfsmp, cp->c_cpentry)) {
+			if (!have_trunc_lock) {
+				hfs_unlock(cp);
+				hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, 0);
+				have_trunc_lock = true;
+				goto again;
+			}
+
+			error = hfs_key_roll_start(cp);
+			if (error) {
+				hfs_unlock(cp);
+				hfs_unlock_truncate(cp, 0);
+				return error;
+			}
+		}
+#endif
+
+		if (have_trunc_lock)
+			hfs_unlock_truncate(cp, 0);
+
+		++cp->c_cpentry->cp_raw_open_count;
+	}
+#endif
+
+	if (ISSET(hfsmp->hfs_flags, HFS_READ_ONLY)
+		|| !vnode_isreg(vp)
+#if NAMEDSTREAMS
+		|| vnode_isnamedstream(vp)
+#endif
+		|| !hfsmp->jnl || vnode_isinuse(vp, 0)) {
+
+#if CONFIG_PROTECT
+		if (have_lock)
+			hfs_unlock(cp);
+#endif
+
+		return (0);
+	}
+
+	if (!have_lock && (error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+		return (error);
+
+#if QUOTA
+	/* If we're going to write to the file, initialize quotas. */
+	if ((ap->a_mode & FWRITE) && (hfsmp->hfs_flags & HFS_QUOTAS))
+		(void)hfs_getinoquota(cp);
+#endif /* QUOTA */
+
+	/*
+	 * On the first (non-busy) open of a fragmented
+	 * file attempt to de-frag it, if it's less than hfs_defrag_max bytes.
+	 * That field is initially set to 20MB.
+	 */
+	fp = VTOF(vp);
+	if (fp->ff_blocks &&
+	    fp->ff_extents[7].blockCount != 0 &&
+	    fp->ff_size <= hfsmp->hfs_defrag_max) { 
+		
+		int no_mods = 0;
+		struct timeval now;
+		/* 
+		 * Wait until system bootup is done (3 min).
+		 * And don't relocate a file that's been modified
+		 * within the past minute -- this can lead to
+		 * system thrashing.
+		 */
+
+		if (hfsmp->hfs_defrag_nowait) {
+			/* If this is toggled, then issue the defrag if appropriate */
+			past_bootup = 1;
+			no_mods = 1;
+		}
+
+		if (!past_bootup) {
+			microuptime(&tv);
+			if (tv.tv_sec > (60*3)) {
+				past_bootup = 1;
+			}
+		}
+		
+		microtime(&now);
+		if ((now.tv_sec - cp->c_mtime) > 60) {	
+			no_mods = 1;
+		} 
+		
+		if (past_bootup && no_mods) {
+			(void) hfs_relocate(vp, hfsmp->nextAllocation + 4096,
+					vfs_context_ucred(ap->a_context),
+					vfs_context_proc(ap->a_context));
+		}
+	}
+
+	hfs_unlock(cp);
+
+	return (0);
+}
+
+
+/*
+ * Close a file/directory.
+ */
+int
+hfs_vnop_close(struct vnop_close_args *ap)
+{
+	register struct vnode *vp = ap->a_vp;
+ 	register struct cnode *cp;
+	struct proc *p = vfs_context_proc(ap->a_context);
+	struct hfsmount *hfsmp;
+	int busy;
+	int tooktrunclock = 0;
+	int knownrefs = 0;
+
+	if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0)
+		return (0);
+	cp = VTOC(vp);
+	hfsmp = VTOHFS(vp);
+
+#if CONFIG_PROTECT
+	if (cp->c_cpentry && ISSET(ap->a_fflag, FENCRYPTED) && vnode_isreg(vp)) {
+		hfs_assert(cp->c_cpentry->cp_raw_open_count > 0);
+		--cp->c_cpentry->cp_raw_open_count;
+	}
+#endif
+
+	/* 
+	 * If the rsrc fork is a named stream, it can cause the data fork to
+	 * stay around, preventing de-allocation of these blocks. 
+	 * Do checks for truncation on close. Purge extra extents if they exist.
+	 * Make sure the vp is not a directory, and that it has a resource fork,
+	 * and that resource fork is also a named stream.
+	 */
+
+	if ((vnode_vtype(vp) == VREG) && (cp->c_rsrc_vp)
+			&& (vnode_isnamedstream(cp->c_rsrc_vp))) {
+		uint32_t blks;
+
+		blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize);
+		/*
+		 * If there are extra blocks and there are only 2 refs on
+		 * this vp (ourselves + rsrc fork holding ref on us), go ahead
+		 * and try to truncate.
+		 */
+		if ((blks < VTOF(vp)->ff_blocks) && (!vnode_isinuse(vp, 2))) {
+			// release cnode lock; must acquire truncate lock BEFORE cnode lock
+			hfs_unlock(cp);
+
+			hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			tooktrunclock = 1;
+
+			if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { 
+				hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+				// bail out if we can't re-acquire cnode lock
+				return 0;
+			}
+			// now re-test to make sure it's still valid
+			if (cp->c_rsrc_vp) {
+				knownrefs = 1 + vnode_isnamedstream(cp->c_rsrc_vp);
+				if (!vnode_isinuse(vp, knownrefs)){
+					// now we can truncate the file, if necessary
+					blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize);
+					if (blks < VTOF(vp)->ff_blocks){
+						(void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY,
+											0, ap->a_context);
+					}
+				}
+			}
+		}
+	}
+
+
+	// if we froze the fs and we're exiting, then "thaw" the fs 
+	if (hfsmp->hfs_freeze_state == HFS_FROZEN
+	    && hfsmp->hfs_freezing_proc == p && proc_exiting(p)) {
+		hfs_thaw(hfsmp, p);
+	}
+
+	busy = vnode_isinuse(vp, 1);
+
+	if (busy) {
+		hfs_touchtimes(VTOHFS(vp), cp);	
+	}
+	if (vnode_isdir(vp)) {
+		hfs_reldirhints(cp, busy);
+	} else if (vnode_issystem(vp) && !busy) {
+		vnode_recycle(vp);
+	}
+
+	if (tooktrunclock){
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	}
+	hfs_unlock(cp);
+
+	if (ap->a_fflag & FWASWRITTEN) {
+		hfs_sync_ejectable(hfsmp);
+	}
+
+	return (0);
+}
+
+static bool hfs_should_generate_document_id(hfsmount_t *hfsmp, cnode_t *cp)
+{
+	return (!ISSET(hfsmp->hfs_flags, HFS_READ_ONLY)
+			&& ISSET(cp->c_bsdflags, UF_TRACKED)
+			&& cp->c_desc.cd_cnid != kHFSRootFolderID
+			&& (S_ISDIR(cp->c_mode) || S_ISREG(cp->c_mode) || S_ISLNK(cp->c_mode)));
+}
+
+/*
+ * Get basic attributes.
+ */
+int
+hfs_vnop_getattr(struct vnop_getattr_args *ap)
+{
+#define VNODE_ATTR_TIMES  \
+	(VNODE_ATTR_va_access_time|VNODE_ATTR_va_change_time|VNODE_ATTR_va_modify_time)
+#define VNODE_ATTR_AUTH  \
+	(VNODE_ATTR_va_mode | VNODE_ATTR_va_uid | VNODE_ATTR_va_gid | \
+         VNODE_ATTR_va_flags | VNODE_ATTR_va_acl)
+
+	struct vnode *vp = ap->a_vp;
+	struct vnode_attr *vap = ap->a_vap;
+	struct vnode *rvp = NULLVP;
+	struct hfsmount *hfsmp;
+	struct cnode *cp;
+	uint64_t data_size;
+	enum vtype v_type;
+	int error = 0;
+	cp = VTOC(vp);
+
+#if HFS_COMPRESSION
+	/* we need to inspect the decmpfs state of the file before we take the hfs cnode lock */
+	int compressed = 0;
+	int hide_size = 0;
+	off_t uncompressed_size = -1;
+	if (VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_alloc) || VATTR_IS_ACTIVE(vap, va_data_alloc) || VATTR_IS_ACTIVE(vap, va_total_size)) {
+		/* we only care about whether the file is compressed if asked for the uncompressed size */
+		if (VNODE_IS_RSRC(vp)) {
+			/* if it's a resource fork, decmpfs may want us to hide the size */
+			hide_size = hfs_hides_rsrc(ap->a_context, cp, 0);
+		} else {
+			/* if it's a data fork, we need to know if it was compressed so we can report the uncompressed size */
+			compressed = hfs_file_is_compressed(cp, 0);
+		}
+		if ((VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) {
+			// if it's compressed 
+			if (compressed || (!VNODE_IS_RSRC(vp) && cp->c_decmp && decmpfs_cnode_cmp_type(cp->c_decmp) >= CMP_MAX)) {
+				if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) {
+					/* failed to get the uncompressed size, we'll check for this later */
+					uncompressed_size = -1;
+				} else {
+					// fake that it's compressed
+					compressed = 1;
+				}
+			}
+		}
+	}
+#endif
+
+	/*
+	 * Shortcut for vnode_authorize path.  Each of the attributes
+	 * in this set is updated atomically so we don't need to take
+	 * the cnode lock to access them.
+	 */
+	if ((vap->va_active & ~VNODE_ATTR_AUTH) == 0) {
+		/* Make sure file still exists. */
+		if (cp->c_flag & C_NOEXISTS)
+			return (ENOENT);
+
+		vap->va_uid = cp->c_uid;
+		vap->va_gid = cp->c_gid;
+		vap->va_mode = cp->c_mode;
+		vap->va_flags = cp->c_bsdflags;
+		vap->va_supported |= VNODE_ATTR_AUTH & ~VNODE_ATTR_va_acl;
+
+		if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) {
+			vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE;
+			VATTR_SET_SUPPORTED(vap, va_acl);
+		}
+	
+		return (0);
+	}
+
+	hfsmp = VTOHFS(vp);
+	v_type = vnode_vtype(vp);
+
+	if (VATTR_IS_ACTIVE(vap, va_document_id)) {
+		uint32_t document_id;
+
+		if (cp->c_desc.cd_cnid == kHFSRootFolderID)
+			document_id = kHFSRootFolderID;
+		else {
+			/*
+			 * This is safe without a lock because we're just reading
+			 * a 32 bit aligned integer which should be atomic on all
+			 * platforms we support.
+			 */
+			document_id = hfs_get_document_id(cp);
+
+			if (!document_id && hfs_should_generate_document_id(hfsmp, cp)) {
+				uint32_t new_document_id;
+
+				error = hfs_generate_document_id(hfsmp, &new_document_id);
+				if (error)
+					return error;
+
+				error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+				if (error)
+					return error;
+
+				bool want_docid_fsevent = false;
+
+				// Need to check again now that we have the lock
+				document_id = hfs_get_document_id(cp);
+				if (!document_id && hfs_should_generate_document_id(hfsmp, cp)) {
+					cp->c_attr.ca_finderextendeddirinfo.document_id = document_id = new_document_id;
+					want_docid_fsevent = true;
+					SET(cp->c_flag, C_MODIFIED);
+				}
+
+				hfs_unlock(cp);
+
+				if (want_docid_fsevent) {
+					add_fsevent(FSE_DOCID_CHANGED, ap->a_context,
+								FSE_ARG_DEV,   hfsmp->hfs_raw_dev,
+								FSE_ARG_INO,   (ino64_t)0,             // src inode #
+								FSE_ARG_INO,   (ino64_t)cp->c_fileid,  // dst inode #
+								FSE_ARG_INT32, document_id,
+								FSE_ARG_DONE);
+
+					if (need_fsevent(FSE_STAT_CHANGED, vp)) {
+						add_fsevent(FSE_STAT_CHANGED, ap->a_context, 
+									FSE_ARG_VNODE, vp, FSE_ARG_DONE);
+					}
+				}
+			}
+		}
+
+		vap->va_document_id = document_id;
+		VATTR_SET_SUPPORTED(vap, va_document_id);
+	}
+
+	/*
+	 * If time attributes are requested and we have cnode times
+	 * that require updating, then acquire an exclusive lock on
+	 * the cnode before updating the times.  Otherwise we can
+	 * just acquire a shared lock.
+	 */
+	if ((vap->va_active & VNODE_ATTR_TIMES) &&
+	    (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime)) {
+		if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+			return (error);
+		hfs_touchtimes(hfsmp, cp);
+
+		// downgrade to a shared lock since that's all we need from here on out
+		cp->c_lockowner = HFS_SHARED_OWNER;
+		lck_rw_lock_exclusive_to_shared(&cp->c_rwlock);
+
+	} else if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+		return (error);
+	}
+
+	if (v_type == VDIR) {
+		data_size = (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE;
+
+		if (VATTR_IS_ACTIVE(vap, va_nlink)) {
+			int nlink;
+	
+			/*
+			 * For directories, the va_nlink is esentially a count
+			 * of the ".." references to a directory plus the "."
+			 * reference and the directory itself. So for HFS+ this
+			 * becomes the sub-directory count plus two.
+			 *
+			 * In the absence of a sub-directory count we use the
+			 * directory's item count.  This will be too high in
+			 * most cases since it also includes files.
+			 */
+			if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) && 
+			    (cp->c_attr.ca_recflags & kHFSHasFolderCountMask))
+				nlink = cp->c_attr.ca_dircount;  /* implied ".." entries */
+			else
+				nlink = cp->c_entries;
+
+			/* Account for ourself and our "." entry */
+			nlink += 2;  
+			 /* Hide our private directories. */
+			if (cp->c_cnid == kHFSRootFolderID) {
+				if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0) {
+					--nlink;    
+				}
+				if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0) {
+					--nlink;
+				}
+			}
+			VATTR_RETURN(vap, va_nlink, (u_int64_t)nlink);
+		}		
+		if (VATTR_IS_ACTIVE(vap, va_nchildren)) {
+			int entries;
+	
+			entries = cp->c_entries;
+			/* Hide our private files and directories. */
+			if (cp->c_cnid == kHFSRootFolderID) {
+				if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid != 0)
+					--entries;
+				if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid != 0)
+					--entries;
+				if (hfsmp->jnl || ((hfsmp->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY)))
+					entries -= 2;   /* hide the journal files */
+			}
+			VATTR_RETURN(vap, va_nchildren, entries);
+		}
+		/*
+		 * The va_dirlinkcount is the count of real directory hard links.
+		 * (i.e. its not the sum of the implied "." and ".." references)
+		 */
+		if (VATTR_IS_ACTIVE(vap, va_dirlinkcount)) {
+			VATTR_RETURN(vap, va_dirlinkcount, (uint32_t)cp->c_linkcount);
+		}
+	} else /* !VDIR */ {
+		data_size = VCTOF(vp, cp)->ff_size;
+
+		VATTR_RETURN(vap, va_nlink, (u_int64_t)cp->c_linkcount);
+		if (VATTR_IS_ACTIVE(vap, va_data_alloc)) {
+			u_int64_t blocks;
+	
+#if HFS_COMPRESSION
+			if (hide_size) {
+				VATTR_RETURN(vap, va_data_alloc, 0);
+			} else if (compressed) {
+				/* for compressed files, we report all allocated blocks as belonging to the data fork */
+				blocks = cp->c_blocks;
+				VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize);
+			}
+			else
+#endif
+			{
+				blocks = VCTOF(vp, cp)->ff_blocks;
+				VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize);
+			}
+		}
+	}
+
+	/* conditional because 64-bit arithmetic can be expensive */
+	if (VATTR_IS_ACTIVE(vap, va_total_size)) {
+		if (v_type == VDIR) {
+			VATTR_RETURN(vap, va_total_size, (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE);
+		} else {
+			u_int64_t total_size = ~0ULL;
+			struct cnode *rcp;
+#if HFS_COMPRESSION
+			if (hide_size) {
+				/* we're hiding the size of this file, so just return 0 */
+				total_size = 0;
+			} else if (compressed) {
+				if (uncompressed_size == -1) {
+					/*
+					 * We failed to get the uncompressed size above,
+					 * so we'll fall back to the standard path below
+					 * since total_size is still -1
+					 */
+				} else {
+					/* use the uncompressed size we fetched above */
+					total_size = uncompressed_size;
+				}
+			}
+#endif
+			if (total_size == ~0ULL) {
+				if (cp->c_datafork) {
+					total_size = cp->c_datafork->ff_size;
+				}
+				
+				if (cp->c_blocks - VTOF(vp)->ff_blocks) {
+					/* We deal with rsrc fork vnode iocount at the end of the function */
+					error = hfs_vgetrsrc(hfsmp, vp, &rvp);
+					if (error) {
+						/*
+						 * Note that we call hfs_vgetrsrc with error_on_unlinked
+						 * set to FALSE.  This is because we may be invoked via
+						 * fstat() on an open-unlinked file descriptor and we must 
+						 * continue to support access to the rsrc fork until it disappears.
+						 * The code at the end of this function will be
+						 * responsible for releasing the iocount generated by 
+						 * hfs_vgetrsrc.  This is because we can't drop the iocount
+						 * without unlocking the cnode first.
+						 */
+						goto out;
+					}
+					
+					rcp = VTOC(rvp);
+					if (rcp && rcp->c_rsrcfork) {
+						total_size += rcp->c_rsrcfork->ff_size;
+					}
+				}
+			}
+			
+			VATTR_RETURN(vap, va_total_size, total_size);
+		}
+	}
+	if (VATTR_IS_ACTIVE(vap, va_total_alloc)) {
+		if (v_type == VDIR) {
+			VATTR_RETURN(vap, va_total_alloc, 0);
+		} else {
+			VATTR_RETURN(vap, va_total_alloc, (u_int64_t)cp->c_blocks * (u_int64_t)hfsmp->blockSize);
+		}
+	}
+
+	/*
+	 * If the VFS wants extended security data, and we know that we
+	 * don't have any (because it never told us it was setting any)
+	 * then we can return the supported bit and no data.  If we do
+	 * have extended security, we can just leave the bit alone and
+	 * the VFS will use the fallback path to fetch it.
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_acl)) {
+		if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) {
+			vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE;
+			VATTR_SET_SUPPORTED(vap, va_acl);
+		}
+	}
+
+	vap->va_access_time.tv_sec = cp->c_atime;
+	vap->va_access_time.tv_nsec = 0;
+	vap->va_create_time.tv_sec = cp->c_itime;
+	vap->va_create_time.tv_nsec = 0;
+	vap->va_modify_time.tv_sec = cp->c_mtime;
+	vap->va_modify_time.tv_nsec = 0;
+	vap->va_change_time.tv_sec = cp->c_ctime;
+	vap->va_change_time.tv_nsec = 0;
+	vap->va_backup_time.tv_sec = cp->c_btime;
+	vap->va_backup_time.tv_nsec = 0;	
+
+	/* See if we need to emit the date added field to the user */
+	if (VATTR_IS_ACTIVE(vap, va_addedtime)) {
+		u_int32_t dateadded = hfs_get_dateadded (cp);
+		if (dateadded) {
+			vap->va_addedtime.tv_sec = dateadded;
+			vap->va_addedtime.tv_nsec = 0;
+			VATTR_SET_SUPPORTED (vap, va_addedtime);
+		}
+	}
+
+	/* XXX is this really a good 'optimal I/O size'? */
+	vap->va_iosize = hfsmp->hfs_logBlockSize;
+	vap->va_uid = cp->c_uid;
+	vap->va_gid = cp->c_gid;
+	vap->va_mode = cp->c_mode;
+	vap->va_flags = cp->c_bsdflags;
+
+	/*
+	 * Exporting file IDs from HFS Plus:
+	 *
+	 * For "normal" files the c_fileid is the same value as the
+	 * c_cnid.  But for hard link files, they are different - the
+	 * c_cnid belongs to the active directory entry (ie the link)
+	 * and the c_fileid is for the actual inode (ie the data file).
+	 *
+	 * The stat call (getattr) uses va_fileid and the Carbon APIs,
+	 * which are hardlink-ignorant, will ask for va_linkid.
+	 */
+	vap->va_fileid = (u_int64_t)cp->c_fileid;
+	/* 
+	 * We need to use the origin cache for both hardlinked files 
+	 * and directories. Hardlinked directories have multiple cnids 
+	 * and parents (one per link). Hardlinked files also have their 
+	 * own parents and link IDs separate from the indirect inode number. 
+	 * If we don't use the cache, we could end up vending the wrong ID 
+	 * because the cnode will only reflect the link that was looked up most recently.
+	 */
+	if (cp->c_flag & C_HARDLINK) {
+		vap->va_linkid = (u_int64_t)hfs_currentcnid(cp);
+		vap->va_parentid = (u_int64_t)hfs_currentparent(cp, /* have_lock: */ true);
+	} else {
+		vap->va_linkid = (u_int64_t)cp->c_cnid;
+		vap->va_parentid = (u_int64_t)cp->c_parentcnid;
+	}
+
+	vap->va_fsid = hfsmp->hfs_raw_dev;
+	if (VATTR_IS_ACTIVE(vap, va_devid)) {
+		VATTR_RETURN(vap, va_devid, hfsmp->hfs_raw_dev);
+	}
+	vap->va_filerev = 0;
+	vap->va_encoding = cp->c_encoding;
+	vap->va_rdev = (v_type == VBLK || v_type == VCHR) ? cp->c_rdev : 0;
+#if HFS_COMPRESSION
+	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+		if (hide_size)
+			vap->va_data_size = 0;
+		else if (compressed) {
+			if (uncompressed_size == -1) {
+				/* failed to get the uncompressed size above, so just return data_size */
+				vap->va_data_size = data_size;
+			} else {
+				/* use the uncompressed size we fetched above */
+				vap->va_data_size = uncompressed_size;
+			}
+		} else
+			vap->va_data_size = data_size;
+		VATTR_SET_SUPPORTED(vap, va_data_size);
+	}
+#else
+	vap->va_data_size = data_size;
+	vap->va_supported |= VNODE_ATTR_va_data_size;
+#endif
+
+#if CONFIG_PROTECT
+	if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) {
+    		vap->va_dataprotect_class = cp->c_cpentry ? CP_CLASS(cp->c_cpentry->cp_pclass) : 0;
+    		VATTR_SET_SUPPORTED(vap, va_dataprotect_class);
+    	} 	
+#endif
+	if (VATTR_IS_ACTIVE(vap, va_write_gencount)) {
+		if (ubc_is_mapped_writable(vp)) {
+			/*
+			 * Return 0 to the caller to indicate the file may be
+			 * changing.  There is no need for us to increment the
+			 * generation counter here because it gets done as part of
+			 * page-out and also when the file is unmapped (to account
+			 * for changes we might not have seen).
+			 */
+			vap->va_write_gencount = 0;
+		} else {
+			vap->va_write_gencount = hfs_get_gencount(cp);
+		}
+
+		VATTR_SET_SUPPORTED(vap, va_write_gencount);
+	}
+
+	/* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */
+	vap->va_supported |= VNODE_ATTR_va_access_time |
+						 VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time |
+	                     VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time |
+	                     VNODE_ATTR_va_iosize | VNODE_ATTR_va_uid |
+	                     VNODE_ATTR_va_gid | VNODE_ATTR_va_mode |
+	                     VNODE_ATTR_va_flags |VNODE_ATTR_va_fileid |
+	                     VNODE_ATTR_va_linkid | VNODE_ATTR_va_parentid |
+	                     VNODE_ATTR_va_fsid | VNODE_ATTR_va_filerev |
+	                     VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev;
+
+	/* If this is the root, let VFS to find out the mount name, which 
+	 * may be different from the real name.  Otherwise, we need to take care
+	 * for hardlinked files, which need to be looked up, if necessary 
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_name) && (cp->c_cnid != kHFSRootFolderID)) {
+		struct cat_desc linkdesc;
+		int lockflags;
+		int uselinkdesc = 0;
+		cnid_t nextlinkid = 0;
+		cnid_t prevlinkid = 0;
+
+		/* Get the name for ATTR_CMN_NAME.  We need to take special care for hardlinks      
+		 * here because the info. for the link ID requested by getattrlist may be
+		 * different than what's currently in the cnode.  This is because the cnode     
+		 * will be filled in with the information for the most recent link ID that went
+		 * through namei/lookup().  If there are competing lookups for hardlinks that point 
+	 	 * to the same inode, one (or more) getattrlists could be vended incorrect name information.
+		 * Also, we need to beware of open-unlinked files which could have a namelen of 0.     
+		 */
+
+		if ((cp->c_flag & C_HARDLINK) && 
+				((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) {
+			/* 
+			 * If we have no name and our link ID is the raw inode number, then we may
+			 * have an open-unlinked file.  Go to the next link in this case.
+			 */
+			if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) {
+				if ((error = hfs_lookup_siblinglinks(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){
+					goto out;
+				}
+			}	
+			else {
+				/* just use link obtained from vap above */
+				nextlinkid = vap->va_linkid;
+			}
+
+			/* We need to probe the catalog for the descriptor corresponding to the link ID
+			 * stored in nextlinkid.  Note that we don't know if we have the exclusive lock
+			 * for the cnode here, so we can't just update the descriptor.  Instead,
+			 * we should just store the descriptor's value locally and then use it to pass
+			 * out the name value as needed below. 
+			 */ 
+			if (nextlinkid){
+				lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+				error = cat_findname(hfsmp, nextlinkid, &linkdesc);
+				hfs_systemfile_unlock(hfsmp, lockflags);	
+				if (error == 0) {
+					uselinkdesc = 1;
+				}
+			}
+		}
+
+		/* By this point, we've either patched up the name above and the c_desc
+		 * points to the correct data, or it already did, in which case we just proceed
+		 * by copying the name into the vap.  Note that we will never set va_name to
+		 * supported if nextlinkid is never initialized.  This could happen in the degenerate
+		 * case above involving the raw inode number, where it has no nextlinkid.  In this case
+		 * we will simply not mark the name bit as supported.
+		 */
+		if (uselinkdesc) {
+			strlcpy(vap->va_name, (const char*) linkdesc.cd_nameptr, MAXPATHLEN);
+			VATTR_SET_SUPPORTED(vap, va_name);
+			cat_releasedesc(&linkdesc);
+		}	
+		else if (cp->c_desc.cd_namelen) {
+			strlcpy(vap->va_name, (const char*) cp->c_desc.cd_nameptr, MAXPATHLEN);
+			VATTR_SET_SUPPORTED(vap, va_name);
+		}
+	}
+
+out:
+	hfs_unlock(cp);
+	/*
+	 * We need to vnode_put the rsrc fork vnode only *after* we've released
+	 * the cnode lock, since vnode_put can trigger an inactive call, which 
+	 * will go back into HFS and try to acquire a cnode lock.
+	 */
+	if (rvp) {
+		vnode_put (rvp);
+	}
+
+	return (error);
+}
+
+int
+hfs_set_bsd_flags(struct hfsmount *hfsmp, struct cnode *cp,
+				  u_int32_t new_bsd_flags, u_int32_t document_id,
+				  vfs_context_t ctx, int *compression_changedp)
+{
+	u_int16_t *fdFlags;
+
+	if ((new_bsd_flags & UF_TRACKED) && !(cp->c_bsdflags & UF_TRACKED)) {
+		struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
+
+		//
+		// we're marking this item UF_TRACKED.  if the document_id is
+		// not set, get a new one and put it on the file.
+		//
+		if (fip->document_id == 0) {
+			if (document_id != 0) {
+				// printf("SETATTR: assigning doc-id %d to %s (ino %d)\n", document_id, vp->v_name, cp->c_desc.cd_cnid);
+				fip->document_id = (uint32_t)document_id;
+				add_fsevent(FSE_DOCID_CHANGED, ctx,
+							FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+							FSE_ARG_INO, (ino64_t)0,                // src inode #
+							FSE_ARG_INO, (ino64_t)cp->c_fileid,     // dst inode #
+							FSE_ARG_INT32, document_id,
+							FSE_ARG_DONE);
+			} else {
+				// printf("hfs: could not acquire a new document_id for %s (ino %d)\n", vp->v_name, cp->c_desc.cd_cnid);
+			}
+		}
+
+	} else if (!(new_bsd_flags & UF_TRACKED) && (cp->c_bsdflags & UF_TRACKED)) {
+		//
+		// UF_TRACKED is being cleared so clear the document_id
+		//
+		struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
+		if (fip->document_id) {
+			// printf("SETATTR: clearing doc-id %d from %s (ino %d)\n", fip->document_id, vp->v_name, cp->c_desc.cd_cnid);
+			add_fsevent(FSE_DOCID_CHANGED, ctx,
+						FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+						FSE_ARG_INO, (ino64_t)cp->c_fileid,          // src inode #
+						FSE_ARG_INO, (ino64_t)0,                     // dst inode #
+						FSE_ARG_INT32, fip->document_id,             // document id
+						FSE_ARG_DONE);
+			fip->document_id = 0;
+			cp->c_bsdflags &= ~UF_TRACKED;
+		}
+	}
+
+#if HFS_COMPRESSION
+	if ((cp->c_bsdflags ^ new_bsd_flags) & UF_COMPRESSED) {
+		/*
+		 * the UF_COMPRESSED was toggled, so reset our cached compressed state
+		 * but we don't want to actually do the update until we've released the cnode lock down below
+		 * NOTE: turning the flag off doesn't actually decompress the file, so that we can
+		 * turn off the flag and look at the "raw" file for debugging purposes
+		 */
+		*compression_changedp = 1;
+	}
+#endif
+
+	cp->c_bsdflags = new_bsd_flags;
+	cp->c_flag |= C_MODIFIED;
+	cp->c_touch_chgtime = TRUE;
+
+	/*
+	 * Mirror the UF_HIDDEN flag to the invisible bit of the Finder Info.
+	 *
+	 * The fdFlags for files and frFlags for folders are both 8 bytes
+	 * into the userInfo (the first 16 bytes of the Finder Info).  They
+	 * are both 16-bit fields.
+	 */
+	fdFlags = (u_int16_t *) &cp->c_finderinfo[8];
+	if (new_bsd_flags & UF_HIDDEN)
+		*fdFlags |= OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+	else
+		*fdFlags &= ~OSSwapHostToBigConstInt16(kFinderInvisibleMask);
+
+	return 0;
+}
+
+int
+hfs_vnop_setattr(struct vnop_setattr_args *ap)
+{
+	struct vnode_attr *vap = ap->a_vap;
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp = NULL;
+	struct hfsmount *hfsmp;
+	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
+	struct proc *p = vfs_context_proc(ap->a_context);
+	int error = 0;
+	uid_t nuid;
+	gid_t ngid;
+	time_t orig_ctime;
+
+	orig_ctime = VTOC(vp)->c_ctime;
+	
+#if HFS_COMPRESSION
+	int decmpfs_reset_state = 0;
+	/*
+	 we call decmpfs_update_attributes even if the file is not compressed
+	 because we want to update the incoming flags if the xattrs are invalid
+	 */
+	error = decmpfs_update_attributes(vp, vap);
+	if (error)
+		return error;
+#endif
+	//
+	// if this is not a size-changing setattr and it is not just
+	// an atime update, then check for a snapshot.
+	//
+	if (!VATTR_IS_ACTIVE(vap, va_data_size) && !(vap->va_active == VNODE_ATTR_va_access_time)) {
+		nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_MOD, NSPACE_REARM_NO_ARG);
+	}
+
+#if CONFIG_PROTECT
+	/*
+	 * All metadata changes should be allowed except a size-changing setattr, which
+	 * has effects on file content and requires calling into cp_handle_vnop
+	 * to have content protection check.
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
+			return (error);
+		}
+	}
+#endif /* CONFIG_PROTECT */
+
+	hfsmp = VTOHFS(vp);
+
+	/* Don't allow modification of the journal. */
+	if (hfs_is_journal_file(hfsmp, VTOC(vp))) {
+		return (EPERM);
+	}
+
+	//
+	// Check if we'll need a document_id and if so, get it before we lock the
+	// the cnode to avoid any possible deadlock with the root vnode which has
+	// to get locked to get the document id
+	//
+	u_int32_t document_id=0;
+	if (VATTR_IS_ACTIVE(vap, va_flags) && (vap->va_flags & UF_TRACKED) && !(VTOC(vp)->c_bsdflags & UF_TRACKED)) {
+		struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&(VTOC(vp)->c_attr.ca_finderinfo) + 16);
+		//
+		// If the document_id is not set, get a new one.  It will be set
+		// on the file down below once we hold the cnode lock.
+		//
+		if (fip->document_id == 0) {
+			if (hfs_generate_document_id(hfsmp, &document_id) != 0) {
+				document_id = 0;
+			}
+		}
+	}
+
+
+	/*
+	 * File size change request.
+	 * We are guaranteed that this is not a directory, and that
+	 * the filesystem object is writeable.
+	 *
+	 * NOTE: HFS COMPRESSION depends on the data_size being set *before* the bsd flags are updated
+	 */
+	VATTR_SET_SUPPORTED(vap, va_data_size);
+	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+		if (!vnode_isreg(vp)) {
+			if (vnode_isdir(vp)) {
+				return EISDIR;
+			}
+			//otherwise return EINVAL
+			return EINVAL;
+		}
+
+#if HFS_COMPRESSION
+		/* keep the compressed state locked until we're done truncating the file */
+		decmpfs_cnode *dp = VTOCMP(vp);
+		if (!dp) {
+			/*
+			 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
+			 * is filled in; we need a decmpfs_cnode to lock out decmpfs state changes
+			 * on this file while it's truncating
+			 */
+			dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp));
+			if (!dp) {
+				/* failed to allocate a decmpfs_cnode */
+				return ENOMEM; /* what should this be? */
+			}
+		}
+		
+		nspace_snapshot_event(vp, orig_ctime, vap->va_data_size == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
+		decmpfs_lock_compressed_data(dp, 1);
+		if (hfs_file_is_compressed(VTOC(vp), 1)) {
+			error = decmpfs_decompress_file(vp, dp, -1/*vap->va_data_size*/, 0, 1);
+			if (error != 0) {
+				decmpfs_unlock_compressed_data(dp, 1);
+				return error;
+			}
+		}
+#endif
+
+		// Take truncate lock
+		hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+		// hfs_truncate will deal with the cnode lock
+		error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 
+							 0, ap->a_context);
+
+		hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
+#if HFS_COMPRESSION
+		decmpfs_unlock_compressed_data(dp, 1);
+#endif
+		if (error)
+			return error;
+	}
+	if (cp == NULL) {
+		if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+			return (error);
+		cp = VTOC(vp);
+	}
+
+	/*
+	 * If it is just an access time update request by itself
+	 * we know the request is from kernel level code, and we
+	 * can delay it without being as worried about consistency.
+	 * This change speeds up mmaps, in the rare case that they
+	 * get caught behind a sync.
+	 */
+
+	if (vap->va_active == VNODE_ATTR_va_access_time) {
+		cp->c_touch_acctime=TRUE;
+		goto out;
+	}
+
+
+
+	/*
+	 * Owner/group change request.
+	 * We are guaranteed that the new owner/group is valid and legal.
+	 */
+	VATTR_SET_SUPPORTED(vap, va_uid);
+	VATTR_SET_SUPPORTED(vap, va_gid);
+	nuid = VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uid_t)VNOVAL;
+	ngid = VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (gid_t)VNOVAL;
+	if (((nuid != (uid_t)VNOVAL) || (ngid != (gid_t)VNOVAL)) &&
+	    ((error = hfs_chown(vp, nuid, ngid, cred, p)) != 0))
+		goto out;
+
+	/*
+	 * Mode change request.
+	 * We are guaranteed that the mode value is valid and that in
+	 * conjunction with the owner and group, this change is legal.
+   	*/
+	VATTR_SET_SUPPORTED(vap, va_mode);
+	if (VATTR_IS_ACTIVE(vap, va_mode) &&
+	    ((error = hfs_chmod(vp, (int)vap->va_mode, cred, p)) != 0))
+	    goto out;
+
+	/*
+	 * File flags change.
+	 * We are guaranteed that only flags allowed to change given the
+	 * current securelevel are being changed.
+	 */
+	VATTR_SET_SUPPORTED(vap, va_flags);
+	if (VATTR_IS_ACTIVE(vap, va_flags)) {
+		if ((error = hfs_set_bsd_flags(hfsmp, cp, vap->va_flags, document_id,
+									   ap->a_context,
+									   &decmpfs_reset_state)) != 0) {
+			goto out;
+		}
+	}
+
+	/*
+	 * Timestamp updates.
+	 */
+	VATTR_SET_SUPPORTED(vap, va_create_time);
+	VATTR_SET_SUPPORTED(vap, va_access_time);
+	VATTR_SET_SUPPORTED(vap, va_modify_time);
+	VATTR_SET_SUPPORTED(vap, va_backup_time);
+	VATTR_SET_SUPPORTED(vap, va_change_time);
+	if (VATTR_IS_ACTIVE(vap, va_create_time) ||
+	    VATTR_IS_ACTIVE(vap, va_access_time) ||
+	    VATTR_IS_ACTIVE(vap, va_modify_time) ||
+	    VATTR_IS_ACTIVE(vap, va_backup_time)) {
+		if (VATTR_IS_ACTIVE(vap, va_create_time))
+			cp->c_itime = vap->va_create_time.tv_sec;
+		if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+			cp->c_atime = vap->va_access_time.tv_sec;
+			cp->c_touch_acctime = FALSE;
+		}
+		if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+			cp->c_mtime = vap->va_modify_time.tv_sec;
+			cp->c_touch_modtime = FALSE;
+			cp->c_touch_chgtime = TRUE;
+
+			hfs_clear_might_be_dirty_flag(cp);
+
+			/*
+			 * The utimes system call can reset the modification
+			 * time but it doesn't know about HFS create times.
+			 * So we need to ensure that the creation time is
+			 * always at least as old as the modification time.
+			 */
+			if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) &&
+			    (cp->c_cnid != kHFSRootFolderID) &&
+				!VATTR_IS_ACTIVE(vap, va_create_time) &&
+			    (cp->c_mtime < cp->c_itime)) {
+				cp->c_itime = cp->c_mtime;
+			}
+		}
+		if (VATTR_IS_ACTIVE(vap, va_backup_time))
+			cp->c_btime = vap->va_backup_time.tv_sec;
+		cp->c_flag |= C_MINOR_MOD;
+	}
+
+	// Set the date added time
+	VATTR_SET_SUPPORTED(vap, va_addedtime);
+	if (VATTR_IS_ACTIVE(vap, va_addedtime)) {
+		hfs_write_dateadded(&cp->c_attr, vap->va_addedtime.tv_sec);
+		cp->c_flag &= ~C_NEEDS_DATEADDED;
+		cp->c_touch_chgtime = true;
+	}
+
+	/*
+	 * Set name encoding.
+	 */
+	VATTR_SET_SUPPORTED(vap, va_encoding);
+	if (VATTR_IS_ACTIVE(vap, va_encoding)) {
+		cp->c_encoding = vap->va_encoding;
+		cp->c_flag |= C_MODIFIED;
+		hfs_setencodingbits(hfsmp, cp->c_encoding);
+	}
+
+	if ((error = hfs_update(vp, 0)) != 0)
+		goto out;
+
+out:
+	if (cp) {
+		/* Purge origin cache for cnode, since caller now has correct link ID for it 
+		 * We purge it here since it was acquired for us during lookup, and we no longer need it.
+		 */
+		if ((cp->c_flag & C_HARDLINK) && (vnode_vtype(vp) != VDIR)){
+			hfs_relorigin(cp, 0);
+		}
+
+		hfs_unlock(cp);
+#if HFS_COMPRESSION
+		if (decmpfs_reset_state) {
+			/*
+			 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
+			 * but don't do it while holding the hfs cnode lock
+			 */
+			decmpfs_cnode *dp = VTOCMP(vp);
+			if (!dp) {
+				/*
+				 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
+				 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
+				 * on this file if it's locked
+				 */
+				dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp));
+				if (!dp) {
+					/* failed to allocate a decmpfs_cnode */
+					return ENOMEM; /* what should this be? */
+				}
+			}
+			decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
+		}
+#endif
+	}
+
+#if CONFIG_PROTECT
+	VATTR_SET_SUPPORTED(vap, va_dataprotect_class);
+	if (!error && VATTR_IS_ACTIVE(vap, va_dataprotect_class))
+		error = cp_vnode_setclass(vp, vap->va_dataprotect_class);
+#endif
+
+	return (error);
+}
+
+
+/*
+ * Change the mode on a file.
+ * cnode must be locked before calling.
+ */
+int
+hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struct proc *p)
+{
+	register struct cnode *cp = VTOC(vp);
+
+	if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord)
+		return (0);
+
+	// Don't allow modification of the journal or journal_info_block
+	if (hfs_is_journal_file(VTOHFS(vp), cp)) {
+		return EPERM;
+	}
+
+#if OVERRIDE_UNKNOWN_PERMISSIONS
+	if (((unsigned int)vfs_flags(VTOVFS(vp))) & MNT_UNKNOWNPERMISSIONS) {
+		return (0);
+	};
+#endif
+
+	mode_t new_mode = (cp->c_mode & ~ALLPERMS) | (mode & ALLPERMS);
+	if (new_mode != cp->c_mode) {
+		cp->c_mode = new_mode;
+		cp->c_flag |= C_MINOR_MOD;
+	}
+	cp->c_touch_chgtime = TRUE;
+	return (0);
+}
+
+
+int
+hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags)
+{
+	struct cnode *cp = VTOC(vp);
+	int retval = 0;
+	int is_member;
+
+	/*
+	 * Disallow write attempts on read-only file systems;
+	 * unless the file is a socket, fifo, or a block or
+	 * character device resident on the file system.
+	 */
+	switch (vnode_vtype(vp)) {
+	case VDIR:
+ 	case VLNK:
+	case VREG:
+		if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY)
+			return (EROFS);
+		break;
+	default:
+		break;
+ 	}
+ 
+	/* If immutable bit set, nobody gets to write it. */
+	if (considerFlags && (cp->c_bsdflags & IMMUTABLE))
+		return (EPERM);
+
+	/* Otherwise, user id 0 always gets access. */
+	if (!suser(cred, NULL))
+		return (0);
+
+	/* Otherwise, check the owner. */
+	if ((retval = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, false)) == 0)
+		return ((cp->c_mode & S_IWUSR) == S_IWUSR ? 0 : EACCES);
+ 
+	/* Otherwise, check the groups. */
+	if (kauth_cred_ismember_gid(cred, cp->c_gid, &is_member) == 0 && is_member) {
+		return ((cp->c_mode & S_IWGRP) == S_IWGRP ? 0 : EACCES);
+ 	}
+ 
+	/* Otherwise, check everyone else. */
+	return ((cp->c_mode & S_IWOTH) == S_IWOTH ? 0 : EACCES);
+}
+
+
+/*
+ * Perform chown operation on cnode cp;
+ * code must be locked prior to call.
+ */
+int
+#if !QUOTA
+hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, __unused kauth_cred_t cred,
+	__unused struct proc *p)
+#else 
+hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
+	__unused struct proc *p)
+#endif
+{
+	register struct cnode *cp = VTOC(vp);
+	uid_t ouid;
+	gid_t ogid;
+#if QUOTA
+	int error = 0;
+	register int i;
+	int64_t change;
+#endif /* QUOTA */
+
+	if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord)
+		return (ENOTSUP);
+
+	if (((unsigned int)vfs_flags(VTOVFS(vp))) & MNT_UNKNOWNPERMISSIONS)
+		return (0);
+	
+	if (uid == (uid_t)VNOVAL)
+		uid = cp->c_uid;
+	if (gid == (gid_t)VNOVAL)
+		gid = cp->c_gid;
+
+#if 0	/* we are guaranteed that this is already the case */
+	/*
+	 * If we don't own the file, are trying to change the owner
+	 * of the file, or are not a member of the target group,
+	 * the caller must be superuser or the call fails.
+	 */
+	if ((kauth_cred_getuid(cred) != cp->c_uid || uid != cp->c_uid ||
+	    (gid != cp->c_gid &&
+	     (kauth_cred_ismember_gid(cred, gid, &is_member) || !is_member))) &&
+	    (error = suser(cred, 0)))
+		return (error);
+#endif
+
+	ogid = cp->c_gid;
+	ouid = cp->c_uid;
+
+	if (ouid == uid && ogid == gid) {
+		// No change, just set change time
+		cp->c_touch_chgtime = TRUE;
+		return 0;
+	}
+
+#if QUOTA
+	if ((error = hfs_getinoquota(cp)))
+		return (error);
+	if (ouid == uid) {
+		dqrele(cp->c_dquot[USRQUOTA]);
+		cp->c_dquot[USRQUOTA] = NODQUOT;
+	}
+	if (ogid == gid) {
+		dqrele(cp->c_dquot[GRPQUOTA]);
+		cp->c_dquot[GRPQUOTA] = NODQUOT;
+	}
+
+	/*
+	 * Eventually need to account for (fake) a block per directory
+	 * if (vnode_isdir(vp))
+	 *     change = VTOHFS(vp)->blockSize;
+	 * else
+	 */
+
+	change = (int64_t)(cp->c_blocks) * (int64_t)VTOVCB(vp)->blockSize;
+	(void) hfs_chkdq(cp, -change, cred, CHOWN);
+	(void) hfs_chkiq(cp, -1, cred, CHOWN);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dqrele(cp->c_dquot[i]);
+		cp->c_dquot[i] = NODQUOT;
+	}
+#endif /* QUOTA */
+	cp->c_gid = gid;
+	cp->c_uid = uid;
+#if QUOTA
+	if ((error = hfs_getinoquota(cp)) == 0) {
+		if (ouid == uid) {
+			dqrele(cp->c_dquot[USRQUOTA]);
+			cp->c_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(cp->c_dquot[GRPQUOTA]);
+			cp->c_dquot[GRPQUOTA] = NODQUOT;
+		}
+		if ((error = hfs_chkdq(cp, change, cred, CHOWN)) == 0) {
+			if ((error = hfs_chkiq(cp, 1, cred, CHOWN)) == 0)
+				goto good;
+			else
+				(void) hfs_chkdq(cp, -change, cred, CHOWN|FORCE);
+		}
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dqrele(cp->c_dquot[i]);
+			cp->c_dquot[i] = NODQUOT;
+		}
+	}
+	cp->c_gid = ogid;
+	cp->c_uid = ouid;
+	if (hfs_getinoquota(cp) == 0) {
+		if (ouid == uid) {
+			dqrele(cp->c_dquot[USRQUOTA]);
+			cp->c_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(cp->c_dquot[GRPQUOTA]);
+			cp->c_dquot[GRPQUOTA] = NODQUOT;
+		}
+		(void) hfs_chkdq(cp, change, cred, FORCE|CHOWN);
+		(void) hfs_chkiq(cp, 1, cred, FORCE|CHOWN);
+		(void) hfs_getinoquota(cp);
+	}
+	return (error);
+good:
+	if (hfs_getinoquota(cp))
+		panic("hfs_chown: lost quota");
+#endif /* QUOTA */
+
+	/*
+	 * Without quotas, we could probably make this a minor
+	 * modification.
+	 */
+	cp->c_flag |= C_MODIFIED;
+
+	/*
+	  According to the SUSv3 Standard, chown() shall mark
+	  for update the st_ctime field of the file.
+	  (No exceptions mentioned)
+	*/
+		cp->c_touch_chgtime = TRUE;
+	return (0);
+}
+
+#if HFS_COMPRESSION
+/*
+ * Flush the resource fork if it exists.  vp is the data fork and has
+ * an iocount.
+ */
+static int hfs_flush_rsrc(vnode_t vp, vfs_context_t ctx)
+{
+	cnode_t *cp = VTOC(vp);
+
+	hfs_lock(cp, HFS_SHARED_LOCK, 0);
+
+	vnode_t rvp = cp->c_rsrc_vp;
+
+	if (!rvp) {
+		hfs_unlock(cp);
+		return 0;
+	}
+
+	int vid = vnode_vid(rvp);
+
+	hfs_unlock(cp);
+
+	int error = vnode_getwithvid(rvp, vid);
+
+	if (error)
+		return error == ENOENT ? 0 : error;
+
+	hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, 0);
+	hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+	hfs_filedone(rvp, ctx, HFS_FILE_DONE_NO_SYNC);
+	hfs_unlock(cp);
+	hfs_unlock_truncate(cp, 0);
+
+	error = ubc_msync(rvp, 0, ubc_getsize(rvp), NULL,
+					  UBC_PUSHALL | UBC_SYNC);
+
+	vnode_put(rvp);
+
+	return error;
+}
+#endif // HFS_COMPRESSION
+
+
+/* Helper Functions for exchangedata(2) */
+
+/*
+ * hfs_exchangedata_getxattr
+ * arguments: 
+ *	vp: vnode to extract the EA for
+ *	name_selector: the index into the array of EA name entries. 	
+ *  buffer: address for output buffer to store the output EA
+ * 	NOTE: This function will allocate the buffer, it is the caller's responsibility to free it.
+ * xattr_size: output argument; will return the size of the EA, to correspond with the buffer. 
+ * 
+ * Return: 0 on success.
+ * errno on error.  If we return any error, the buffer is guaranteed to be NULL. 
+ * 
+ * Assumes CNODE lock held on cnode for 'vp'
+ */
+static 
+int hfs_exchangedata_getxattr (struct vnode *vp, uint32_t name_selector,  void **buffer, size_t *xattr_size) {
+	void *xattr_rawdata = NULL;
+	void *extracted_xattr = NULL;
+	uio_t uio;
+	size_t memsize = MAX_EXCHANGE_EA_SIZE;
+	size_t attrsize;
+	int error = 0;
+	struct hfsmount *hfsmp = NULL;
+
+	/* Sanity check inputs */
+	if (name_selector > MAX_NUM_XATTR_NAMES) {
+		return EINVAL;
+	}
+
+	if (buffer == NULL || xattr_size == NULL) {
+		return EINVAL;
+	}
+
+	hfsmp = VTOHFS(vp);
+
+	//allocate 4k memory to hold the EA.  We don't use this for "large" EAs, and the default
+	//EA B-tree size should produce inline attributes of size < 4K
+	xattr_rawdata = hfs_malloc (MAX_EXCHANGE_EA_SIZE);	
+	if (!xattr_rawdata) {
+		return ENOMEM;
+	}	
+
+	//now create the UIO
+	uio = uio_create (1, 0, UIO_SYSSPACE, UIO_READ);
+	if (!uio) {
+		hfs_free (xattr_rawdata, memsize);
+		return ENOMEM;
+	}
+	uio_addiov(uio, CAST_USER_ADDR_T(xattr_rawdata), memsize);
+	attrsize = memsize;
+	
+	struct vnop_getxattr_args vga = {
+		.a_uio = uio,
+		.a_name = XATTR_NAMES[name_selector],
+		.a_size = &attrsize
+	};
+
+	// this takes care of grabbing the systemfile locks for us. 
+	error = hfs_getxattr_internal (VTOC(vp), &vga, hfsmp, 0); 
+
+	if (error) {
+		/* 
+		 * We could have gotten a variety of errors back from the XATTR tree:
+		 * is it too big? (bigger than 4k?) == ERANGE
+		 * was the EA not found? == ENOATTR
+		 */
+		uio_free(uio);
+		hfs_free (xattr_rawdata, memsize);
+		return error;
+	}
+
+	//free the UIO
+	uio_free(uio);
+
+	//upon success, a_size/attrsize now contains the actua/exported EA size
+	extracted_xattr = hfs_malloc (attrsize);
+	memcpy (extracted_xattr, xattr_rawdata, attrsize);
+	hfs_free (xattr_rawdata, memsize);
+
+	*xattr_size = attrsize;
+	*buffer = extracted_xattr;
+
+	return error;		
+}
+	
+
+/*
+ * hfs_exchangedata_setxattr
+ * 
+ * Note: This function takes fileIDs in as inputs, because exchangedata does
+ * swizzly things with the two cnodes (See big block comment in hfs_vnop_exchange)
+ * so we operate with FileIDs more or less directly on the XATTR b-tree.
+ * 
+ * arguments: 
+ * 	hfsmp: the mount we're working on
+ * 	fileid: the fileID of the EA to store into the tree.
+ * 	name_selector: selector into the EA name array. 
+ *	buffer: pointer to the memory of the EA to write. 
+ * 	xattr_size: size of the EA to write.
+ * 
+ * Returns 0 on success
+ * errno on failure
+ *
+ * Assumes that a transaction has already begun when this is called
+ */
+
+static 
+int hfs_exchangedata_setxattr (struct hfsmount *hfsmp, uint32_t fileid, 
+		uint32_t name_selector, void *buffer, size_t xattr_size) {
+
+	int error = 0;
+
+
+	/* Sanity check arguments */
+	if (name_selector > MAX_NUM_XATTR_NAMES) {
+		return EINVAL;
+	}
+
+	if (buffer == NULL  || xattr_size == 0 || fileid < kHFSFirstUserCatalogNodeID ) {
+		return EINVAL;
+	}
+
+	// is the size too big?
+	if (xattr_size > hfsmp->hfs_max_inline_attrsize) {
+		return EINVAL;
+	}
+
+	/* setup the arguments to setxattr*/
+	struct vnop_setxattr_args vsa = {
+		.a_desc = NULL,
+		.a_vp = NULL,
+		.a_name = XATTR_NAMES[name_selector], 
+		.a_uio = NULL, // we use the data_ptr argument to setxattr_internal instead 
+		.a_options = 0,
+		.a_context = NULL // no context needed, only done from within exchangedata
+	};	
+
+	/*
+	 * Since we must be in a transaction to guard the exchangedata operation, this will start
+	 * a nested transaction within the exchangedata one.
+	 */
+	error = hfs_setxattr_internal (NULL, (caddr_t) buffer, xattr_size, &vsa, hfsmp, fileid);
+
+	return error;
+
+}
+
+/*
+ * hfs_vnop_exchange:
+ * 
+ * Inputs: 
+ * 		'from' vnode/cnode
+ * 		'to' vnode/cnode
+ * 		options flag bits
+ * 		vfs_context
+ * 
+ * Discussion:
+ * hfs_vnop_exchange is used to service the exchangedata(2) system call.
+ * Per the requirements of that system call, this function "swaps" some
+ * of the information that lives in one catalog record for some that
+ * lives in another.  Note that not everything is swapped; in particular, 
+ * the extent information stored in each cnode is kept local to that 
+ * cnode.  This allows existing file descriptor references to continue
+ * to operate on the same content, regardless of the location in the
+ * namespace that the file may have moved to.  See inline comments
+ * in the function for more information.
+ */
+int
+hfs_vnop_exchange(struct vnop_exchange_args *ap)
+{
+	struct vnode *from_vp = ap->a_fvp;
+	struct vnode *to_vp = ap->a_tvp;
+	struct cnode *from_cp;
+	struct cnode *to_cp;
+	struct hfsmount *hfsmp;
+	struct cat_desc tempdesc;
+	struct cat_attr tempattr;
+	const unsigned char *from_nameptr;
+	const unsigned char *to_nameptr;
+	char from_iname[32];
+	char to_iname[32];
+	uint32_t to_flag_special;
+	uint32_t from_flag_special;
+
+	uint16_t to_recflags_special;
+	uint16_t from_recflags_special;
+	
+	cnid_t  from_parid;
+	cnid_t  to_parid;
+	int lockflags;
+	int error = 0, started_tr = 0, got_cookie = 0;
+	cat_cookie_t cookie;
+	time_t orig_from_ctime, orig_to_ctime;
+	bool have_cnode_locks = false, have_from_trunc_lock = false, have_to_trunc_lock = false;
+	
+	/* For the quarantine EA */
+	void *from_xattr = NULL;
+	void *to_xattr = NULL;
+	size_t from_attrsize = 0;
+	size_t to_attrsize = 0;
+
+
+	/*
+	 * VFS does the following checks:
+	 * 1. Validate that both are files.
+	 * 2. Validate that both are on the same mount.
+	 * 3. Validate that they're not the same vnode.
+	 */
+
+	from_cp = VTOC(from_vp);
+	to_cp = VTOC(to_vp);
+	hfsmp = VTOHFS(from_vp);
+
+	orig_from_ctime = from_cp->c_ctime;
+	orig_to_ctime = to_cp->c_ctime;
+
+#if CONFIG_PROTECT
+	/* 
+	 * Do not allow exchangedata/F_MOVEDATAEXTENTS on data-protected filesystems 
+	 * because the EAs will not be swapped.  As a result, the persistent keys would not
+	 * match and the files will be garbage.
+	 */
+	if (cp_fs_protected (vnode_mount(from_vp))) {
+		return EINVAL;
+	}
+#endif
+
+#if HFS_COMPRESSION
+	if (!ISSET(ap->a_options, FSOPT_EXCHANGE_DATA_ONLY)) {
+		if ( hfs_file_is_compressed(from_cp, 0) ) {
+			if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) {
+				return error;
+			}
+		}
+
+		if ( hfs_file_is_compressed(to_cp, 0) ) {
+			if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) {
+				return error;
+			}
+		}
+	}
+#endif // HFS_COMPRESSION
+
+	// Resource forks cannot be exchanged.
+	if (VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp))
+		return EINVAL;
+
+	/* 
+	 * Normally, we want to notify the user handlers about the event,
+	 * except if it's a handler driving the event.
+	 */
+	if ((ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) == 0) {
+		nspace_snapshot_event(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
+		nspace_snapshot_event(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
+	} else {
+		/*
+		 * This is currently used by mtmd so we should tidy up the
+		 * file now because the data won't be used again in the
+		 * destination file.
+		 */
+		hfs_lock_truncate(from_cp, HFS_EXCLUSIVE_LOCK, 0);
+		hfs_lock_always(from_cp, HFS_EXCLUSIVE_LOCK);
+		hfs_filedone(from_vp, ap->a_context, HFS_FILE_DONE_NO_SYNC);
+		hfs_unlock(from_cp);
+		hfs_unlock_truncate(from_cp, 0);
+
+		// Flush all the data from the source file
+ 		error = ubc_msync(from_vp, 0, ubc_getsize(from_vp), NULL,
+ 						  UBC_PUSHALL | UBC_SYNC);
+		if (error)
+			goto exit;
+
+#if HFS_COMPRESSION
+		/*
+		 * If this is a compressed file, we need to do the same for
+		 * the resource fork.
+		 */
+		if (ISSET(from_cp->c_bsdflags, UF_COMPRESSED)) {
+			error = hfs_flush_rsrc(from_vp, ap->a_context);
+			if (error)
+				goto exit;
+		}
+#endif
+
+		/*
+		 * We're doing a data-swap so we need to take the truncate
+		 * lock exclusively.  We need an exclusive lock because we
+		 * will be completely truncating the source file and we must
+		 * make sure nobody else sneaks in and trys to issue I/O
+		 * whilst we don't have the cnode lock.
+		 *
+		 * After taking the truncate lock we do a quick check to
+		 * verify there are no other references (including mmap
+		 * references), but we must remember that this does not stop
+		 * anybody coming in later and taking a reference.  We will
+		 * have the truncate lock exclusively so that will prevent
+		 * them from issuing any I/O.
+		 */
+
+		if (to_cp < from_cp) {
+			hfs_lock_truncate(to_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			have_to_trunc_lock = true;
+		}
+
+		hfs_lock_truncate(from_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		have_from_trunc_lock = true;
+
+ 		/*
+		 * Do an early check to verify the source is not in use by
+		 * anyone.  We should be called from an FD opened as F_EVTONLY
+		 * so that doesn't count as a reference.
+		 */
+		if (vnode_isinuse(from_vp, 0)) {
+			error = EBUSY;
+			goto exit;
+		}
+
+		if (to_cp >= from_cp) {
+			hfs_lock_truncate(to_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			have_to_trunc_lock = true;
+		}
+	}
+
+	if ((error = hfs_lockpair(from_cp, to_cp, HFS_EXCLUSIVE_LOCK)))
+		goto exit;
+	have_cnode_locks = true;
+
+	// Don't allow modification of the journal or journal_info_block
+	if (hfs_is_journal_file(hfsmp, from_cp) ||
+	    hfs_is_journal_file(hfsmp, to_cp)) {
+		error = EPERM;
+		goto exit;
+	}
+
+	/* 
+	 * If doing a data move, then call the underlying function. 
+	 */
+	if (ISSET(ap->a_options, FSOPT_EXCHANGE_DATA_ONLY)) {
+#if HFS_COMPRESSION
+		if (ISSET(from_cp->c_bsdflags, UF_COMPRESSED)) {
+			error = hfs_move_compressed(from_cp, to_cp);
+			goto exit;
+		}
+#endif
+
+		error = hfs_move_data(from_cp, to_cp, 0);
+		goto exit;
+	}
+
+	/* 
+	 * If we're doing a normal exchangedata, then get the source/dst quarantine
+	 * EAs as needed. We do it here before we start the transaction. 
+	 */
+
+	//get the EA for the 'from' vnode if it exists.
+	error = hfs_exchangedata_getxattr (from_vp, quarantine, &from_xattr, &from_attrsize);
+	if (error) {
+		if (error == ENOATTR) {	
+			//it's OK for the quarantine EA to not exist
+			error = 0;
+		}
+		else {
+			goto exit;
+		}
+	}
+
+
+	//get the EA from the 'to' vnode if it exists
+	error = hfs_exchangedata_getxattr (to_vp, quarantine, &to_xattr, &to_attrsize);
+	if (error) {
+		if (error == ENOATTR) {	
+			//it's OK for the quarantine EA to not exist
+			error = 0;
+		}
+		else {
+			goto exit;
+		}
+	}
+	
+
+	/* Start a transaction; we have to do all of this atomically */
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+	    goto exit;
+	}
+	started_tr = 1;
+	
+	/*
+	 * Reserve some space in the Catalog file.
+	 */
+	if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, vfs_context_proc(ap->a_context)))) {
+		goto exit;
+	}
+	got_cookie = 1;
+
+	/* The backend code always tries to delete the virtual
+	 * extent id for exchanging files so we need to lock
+	 * the extents b-tree.
+	 */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+	/* Account for the location of the catalog objects. */
+	if (from_cp->c_flag & C_HARDLINK) {
+		MAKE_INODE_NAME(from_iname, sizeof(from_iname),
+				from_cp->c_attr.ca_linkref);
+		from_nameptr = (unsigned char *)from_iname;
+		from_parid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+		from_cp->c_hint = 0;
+	} else {
+		from_nameptr = from_cp->c_desc.cd_nameptr;
+		from_parid = from_cp->c_parentcnid;
+	}
+	if (to_cp->c_flag & C_HARDLINK) {
+		MAKE_INODE_NAME(to_iname, sizeof(to_iname),
+				to_cp->c_attr.ca_linkref);
+		to_nameptr = (unsigned char *)to_iname;
+		to_parid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+		to_cp->c_hint = 0;
+	} else {
+		to_nameptr = to_cp->c_desc.cd_nameptr;
+		to_parid = to_cp->c_parentcnid;
+	}
+
+	/*
+	 * ExchangeFileIDs swaps the on-disk, or in-BTree extent information 
+	 * attached to two different file IDs.  It also swaps the extent 
+	 * information that may live in the extents-overflow B-Tree.
+	 *
+	 * We do this in a transaction as this may require a lot of B-Tree nodes
+	 * to do completely, particularly if one of the files in question
+	 * has a lot of extents. 
+	 *
+	 * For example, assume "file1" has fileID 50, and "file2" has fileID 52.
+	 * For the on-disk records, which are assumed to be synced, we will
+	 * first swap the resident inline-8 extents as part of the catalog records.
+	 * Then we will swap any extents overflow records for each file.
+	 * 
+	 * When ExchangeFileIDs returns successfully, "file1" will have fileID 52, 
+	 * and "file2" will have fileID 50.  However, note that this is only 
+	 * approximately half of the work that exchangedata(2) will need to 
+	 * accomplish.  In other words, we swap "too much" of the information 
+	 * because if we only called ExchangeFileIDs, both the fileID and extent 
+	 * information would be the invariants of this operation.  We don't 
+	 * actually want that; we want to conclude with "file1" having 
+	 * file ID 50, and "file2" having fileID 52.
+	 * 
+	 * The remainder of hfs_vnop_exchange will swap the file ID and other cnode
+	 * data back to the proper ownership, while still allowing the cnode to remain
+	 * pointing at the same set of extents that it did originally.
+	 */
+	error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid,
+	                        to_parid, from_cp->c_hint, to_cp->c_hint);
+	hfs_systemfile_unlock(hfsmp, lockflags);	
+
+	if (error != E_NONE) {
+		error = MacToVFSError(error);
+		goto exit;
+	}
+
+	/* 
+	 * Now, we have to swap the quarantine EA. 
+	 * 
+	 * Ordinarily, we would not have to swap/exchange any extended attributes, 
+	 * since they are keyed by the file ID, and this function is supposed
+     * to manipulate the main data stream/fork only. 
+	 * 
+	 * However, we want the quarantine EA to follow the file content.
+	 */
+
+	int from_xattr_status = 0;
+	if (from_xattr) {
+		/* 
+		 * Caution! 
+		 * We've crossed a point of no return here, because if we
+		 * have successfully swapped the file content above, we need to continue here
+		 * to swap the rest of the cnode content, which is not subject to failure.
+		 * Failing the whole function because the xattr swap will result in perceived
+		 * data loss to the caller, so we swallow the error case here. 
+		 */ 
+		from_xattr_status = hfs_removexattr_by_id (hfsmp, from_cp->c_fileid, XATTR_NAMES[quarantine]);	
+		if (from_xattr_status == 0) {
+			int xattr_lockflags;
+			int remaining_eas;
+			/* 
+			 * Check to see if we need to remove the xattr bit from the catalog record flags while
+			 * 'from_cp' still tracks with its original file ID. Once the cnodes' contents are swapped
+			 * and they are ready to be re-hashed, we will OR in the bit if we know that we moved the
+			 * EA to the counterpart. 
+			 */ 
+			xattr_lockflags = hfs_systemfile_lock (hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+			remaining_eas = file_attribute_exist (hfsmp, from_cp->c_fileid);
+			if (remaining_eas == 0) {
+				from_cp->c_attr.ca_recflags &= ~kHFSHasAttributesMask;
+				//the cnode will be pushed out to disk LATER on.
+			}
+			hfs_systemfile_unlock (hfsmp, xattr_lockflags);
+
+		}	
+	}
+
+	//and the same for to_xattr
+	if (to_xattr) {
+		int xattr_status = hfs_removexattr_by_id (hfsmp, to_cp->c_fileid, XATTR_NAMES[quarantine]);	
+		
+		if (xattr_status == 0) {
+			int xattr_lockflags;
+			int remaining_eas;
+			/* 
+			 * Check to see if we need to remove the xattr bit from the catalog record flags while
+			 * 'to_cp' still tracks with its original file ID. Once the cnodes' contents are swapped
+			 * and they are ready to be re-hashed, we will OR in the bit if we know that we moved the
+			 * EA to the counterpart. 
+			 */
+			xattr_lockflags = hfs_systemfile_lock (hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+			remaining_eas = file_attribute_exist (hfsmp, from_cp->c_fileid);
+			if (remaining_eas == 0) {
+				to_cp->c_attr.ca_recflags &= ~kHFSHasAttributesMask;
+				//the cnode will be pushed out to disk LATER on.
+			}
+			hfs_systemfile_unlock (hfsmp, xattr_lockflags);
+
+			/* Now move the EA to the counterparty fileID. We piggyback on the larger transaction here */
+			hfs_exchangedata_setxattr (hfsmp, from_cp->c_fileid, quarantine, to_xattr, to_attrsize);
+		}
+	}
+
+	if (from_xattr && from_xattr_status == 0) {
+		/* 
+		 * if the from EA got removed properly, then attach it to the 'to' file.  We do it at this point
+		 * to ensure that it got removed properly above before re-setting it again.
+	 	 */ 
+		hfs_exchangedata_setxattr (hfsmp, to_cp->c_fileid, quarantine, from_xattr, from_attrsize);
+	}
+
+
+	/* Purge the vnodes from the name cache */
+ 	if (from_vp)
+		cache_purge(from_vp);
+	if (to_vp)
+		cache_purge(to_vp);
+
+	/* Bump both source and destination write counts before any swaps. */
+	{
+		hfs_incr_gencount (from_cp);
+		hfs_incr_gencount (to_cp);
+	}
+
+	/* Save a copy of "from" attributes before swapping. */
+	bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc));
+	bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr));
+	
+	/* Save whether or not each cnode is a hardlink or has EAs */
+	from_flag_special = from_cp->c_flag & (C_HARDLINK | C_HASXATTRS);
+	from_recflags_special = (from_cp->c_attr.ca_recflags & kHFSHasAttributesMask);
+
+	to_flag_special = to_cp->c_flag & (C_HARDLINK | C_HASXATTRS);
+	to_recflags_special = (to_cp->c_attr.ca_recflags & kHFSHasAttributesMask);
+
+	/* Drop the special bits from each cnode */
+	from_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS);
+	to_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS);
+	from_cp->c_attr.ca_recflags &= ~(kHFSHasAttributesMask);
+	to_cp->c_attr.ca_recflags &= ~(kHFSHasAttributesMask);
+
+	/*
+	 * Now complete the in-memory portion of the copy.
+	 *
+	 * ExchangeFileIDs swaps the on-disk records involved.  We complete the
+	 * operation by swapping the in-memory contents of the two files here.
+	 * We swap the cnode descriptors, which contain name, BSD attributes,
+	 * timestamps, etc, about the file.  
+	 * 
+	 * NOTE: We do *NOT* swap the fileforks of the two cnodes.  We have
+	 * already swapped the on-disk extent information.  As long as we swap the 
+	 * IDs, the in-line resident 8 extents that live in the filefork data
+	 * structure will point to the right data for the new file ID if we leave 
+	 * them alone.
+	 *
+	 * As a result, any file descriptor that points to a particular
+	 * vnode (even though it should change names), will continue
+	 * to point to the same content. 
+	 */
+
+	/* Copy the "to" -> "from" cnode */
+	bcopy(&to_cp->c_desc, &from_cp->c_desc, sizeof(struct cat_desc));
+
+	from_cp->c_hint = 0;
+	/*
+	 * If 'to' was a hardlink, then we copied over its link ID/CNID/(namespace ID) 
+	 * when we bcopy'd the descriptor above.  However, the cnode attributes
+	 * are not bcopied.  As a result, make sure to swap the file IDs of each item.
+	 *
+	 * Further, other hardlink attributes must be moved along in this swap:
+	 * the linkcount, the linkref, and the firstlink all need to move
+	 * along with the file IDs.  See note below regarding the flags and
+	 * what moves vs. what does not.
+	 *
+	 * For Reference:
+	 * linkcount == total # of hardlinks.
+	 * linkref == the indirect inode pointer.
+	 * firstlink == the first hardlink in the chain (written to the raw inode).
+	 * These three are tied to the fileID and must move along with the rest of the data.
+	 */ 
+	from_cp->c_fileid = to_cp->c_attr.ca_fileid;
+
+	from_cp->c_itime = to_cp->c_itime;
+	from_cp->c_btime = to_cp->c_btime;
+	from_cp->c_atime = to_cp->c_atime;
+	from_cp->c_ctime = to_cp->c_ctime;
+	from_cp->c_gid = to_cp->c_gid;
+	from_cp->c_uid = to_cp->c_uid;
+	from_cp->c_bsdflags = to_cp->c_bsdflags;
+	from_cp->c_mode = to_cp->c_mode;
+	from_cp->c_linkcount = to_cp->c_linkcount;
+	from_cp->c_attr.ca_linkref = to_cp->c_attr.ca_linkref;
+	from_cp->c_attr.ca_firstlink = to_cp->c_attr.ca_firstlink;
+
+	/* 
+	 * The cnode flags need to stay with the cnode and not get transferred
+	 * over along with everything else because they describe the content; they are
+	 * not attributes that reflect changes specific to the file ID.  In general, 
+	 * fields that are tied to the file ID are the ones that will move.
+	 * 
+	 * This reflects the fact that the file may have borrowed blocks, dirty metadata, 
+	 * or other extents, which may not yet have been written to the catalog.  If 
+	 * they were, they would have been transferred above in the ExchangeFileIDs call above...
+	 *
+	 * The flags that are special are:
+	 * C_HARDLINK, C_HASXATTRS
+	 * 
+	 * and the c_attr recflag:
+	 *  kHFSHasAttributesMask
+	 * 
+	 * These flags move with the item and file ID in the namespace since their
+	 * state is tied to that of the file ID.
+	 * 
+	 * So to transfer the flags, we have to take the following steps
+	 * 1) Store in a localvar whether or not the special bits are set.
+	 * 2) Drop the special bits from the current flags
+	 * 3) swap the special flag bits to their destination
+	 */	 
+	from_cp->c_flag |= to_flag_special | C_MODIFIED;
+	from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags;
+	from_cp->c_attr.ca_recflags |= to_recflags_special;
+	if (from_xattr) {
+		/* 
+		 * NOTE:
+		 * This is counter-intuitive and part of the complexity of exchangedata.
+		 * if 'from_cp' originally had a quarantine EA, then ensure that the cnode
+		 * pointed to by 'from_cp' CONTINUES to keep the "has EAs" bit. This is because
+		 * the cnode is about to be re-hashed with a new ID, but the file CONTENT 
+		 * (i.e. the file fork) stayed put.  And we want the quarantine EA to follow
+		 * the content.  The check above is correct. 
+		 */
+		from_cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+	}
+
+	bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32);
+
+
+	/* Copy the "from" -> "to" cnode */
+	bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc));
+	to_cp->c_hint = 0;
+	/* 
+	 * Pull the file ID from the tempattr we copied above. We can't assume 
+	 * it is the same as the CNID.
+	 */
+	to_cp->c_fileid = tempattr.ca_fileid;
+	to_cp->c_itime = tempattr.ca_itime;
+	to_cp->c_btime = tempattr.ca_btime;
+	to_cp->c_atime = tempattr.ca_atime;
+	to_cp->c_ctime = tempattr.ca_ctime;
+	to_cp->c_gid = tempattr.ca_gid;
+	to_cp->c_uid = tempattr.ca_uid;
+	to_cp->c_bsdflags = tempattr.ca_flags;
+	to_cp->c_mode = tempattr.ca_mode;
+	to_cp->c_linkcount = tempattr.ca_linkcount;
+	to_cp->c_attr.ca_linkref = tempattr.ca_linkref;
+	to_cp->c_attr.ca_firstlink = tempattr.ca_firstlink;
+
+	/* 
+	 * Only OR in the "from" flags into our cnode flags below. 
+	 * Leave the rest of the flags alone.
+	 */
+	to_cp->c_flag |= from_flag_special | C_MODIFIED;
+	to_cp->c_attr.ca_recflags = tempattr.ca_recflags;
+	to_cp->c_attr.ca_recflags |= from_recflags_special;
+
+	if (to_xattr) {
+		/* 
+		 * NOTE:
+		 * This is counter-intuitive and part of the complexity of exchangedata.
+		 * if 'to_cp' originally had a quarantine EA, then ensure that the cnode
+		 * pointed to by 'to_cp' CONTINUES to keep the "has EAs" bit. This is because
+		 * the cnode is about to be re-hashed with a new ID, but the file CONTENT 
+		 * (i.e. the file fork) stayed put.  And we want the quarantine EA to follow
+		 * the content.  The check above is correct. 
+		 */
+		to_cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+	}
+
+	bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32);
+
+
+	/* Rehash the cnodes using their new file IDs */
+	hfs_chash_rehash(hfsmp, from_cp, to_cp);
+
+	/*
+	 * When a file moves out of "Cleanup At Startup"
+	 * we can drop its NODUMP status.
+	 */
+	if ((from_cp->c_bsdflags & UF_NODUMP) &&
+	    (from_cp->c_parentcnid != to_cp->c_parentcnid)) {
+		from_cp->c_bsdflags &= ~UF_NODUMP;
+		from_cp->c_touch_chgtime = TRUE;
+	}
+	if ((to_cp->c_bsdflags & UF_NODUMP) &&
+	    (to_cp->c_parentcnid != from_cp->c_parentcnid)) {
+		to_cp->c_bsdflags &= ~UF_NODUMP;
+		to_cp->c_touch_chgtime = TRUE;
+	}
+
+exit:
+	if (got_cookie) {
+		cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context));
+	}
+	if (started_tr) {
+	    hfs_end_transaction(hfsmp);
+	}
+
+	if (have_cnode_locks)
+		hfs_unlockpair(from_cp, to_cp);
+
+	if (have_from_trunc_lock)
+		hfs_unlock_truncate(from_cp, 0);
+
+	if (have_to_trunc_lock)
+		hfs_unlock_truncate(to_cp, 0);
+
+	/* Free the memory used by the EAs */
+	if (from_xattr) {
+		hfs_free (from_xattr, from_attrsize);
+		from_xattr = NULL;
+	}
+
+	if (to_xattr) {
+		hfs_free (to_xattr, to_attrsize);
+		to_xattr = NULL;
+	}
+
+ 	return (error);
+}
+
+#if HFS_COMPRESSION
+/*
+ * This function is used specifically for the case when a namespace
+ * handler is trying to steal data before it's deleted.  Note that we
+ * don't bother deleting the xattr from the source because it will get
+ * deleted a short time later anyway.
+ *
+ * cnodes must be locked
+ */
+static int hfs_move_compressed(cnode_t *from_cp, cnode_t *to_cp)
+{
+	int 	ret;
+	void   *data 	= NULL;
+
+	CLR(from_cp->c_bsdflags, UF_COMPRESSED);
+	SET(from_cp->c_flag, C_MODIFIED);
+
+	ret = hfs_move_data(from_cp, to_cp, HFS_MOVE_DATA_INCLUDE_RSRC);
+	if (ret)
+		goto exit;
+
+	/*
+	 * Transfer the xattr that decmpfs uses.  Ideally, this code
+	 * should be with the other decmpfs code but it's file system
+	 * agnostic and this path is currently, and likely to remain, HFS+
+	 * specific.  It's easier and more performant if we implement it
+	 * here.
+	 */
+
+	size_t size;
+	data = hfs_malloc(size = MAX_DECMPFS_XATTR_SIZE);
+
+	ret = hfs_xattr_read(from_cp->c_vp, DECMPFS_XATTR_NAME, data, &size);
+	if (ret)
+		goto exit;
+
+	ret = hfs_xattr_write(to_cp->c_vp, DECMPFS_XATTR_NAME, data, size);
+	if (ret)
+		goto exit;
+
+	SET(to_cp->c_bsdflags, UF_COMPRESSED);
+	SET(to_cp->c_flag, C_MODIFIED);
+
+exit:
+	hfs_free(data, MAX_DECMPFS_XATTR_SIZE);
+
+	return ret;
+}
+#endif // HFS_COMPRESSION
+
+int
+hfs_vnop_mmap(struct vnop_mmap_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	cnode_t *cp = VTOC(vp);
+	int error;
+	
+	if (VNODE_IS_RSRC(vp)) {
+		/* allow pageins of the resource fork */
+	} else {
+		int compressed = hfs_file_is_compressed(cp, 1); /* 1 == don't take the cnode lock */
+		time_t orig_ctime = cp->c_ctime;
+		
+		if (!compressed && (cp->c_bsdflags & UF_COMPRESSED)) {
+			error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+			if (error != 0) {
+				return error;
+			}
+		}
+
+		if (ap->a_fflags & PROT_WRITE) {
+			nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
+		}
+	}
+
+#if CONFIG_PROTECT
+	error = cp_handle_vnop(vp, (ap->a_fflags & PROT_WRITE
+								? CP_WRITE_ACCESS : 0) | CP_READ_ACCESS, 0);
+	if (error)
+		return error;
+#endif
+
+	//
+	// NOTE: we return ENOTSUP because we want the cluster layer
+	//       to actually do all the real work.
+	//
+	return (ENOTSUP);
+}
+
+static errno_t hfs_vnop_mnomap(struct vnop_mnomap_args *ap)
+{
+	vnode_t vp = ap->a_vp;
+
+	/*
+	 * Whilst the file was mapped, there may not have been any
+	 * page-outs so we need to increment the generation counter now.
+	 * Unfortunately this may lead to a change in the generation
+	 * counter when no actual change has been made, but there is
+	 * little we can do about that with our current architecture.
+	 */
+	if (ubc_is_mapped_writable(vp)) {
+		cnode_t *cp = VTOC(vp);
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+		hfs_incr_gencount(cp);
+
+		/*
+		 * We don't want to set the modification time here since a
+		 * change to that is not acceptable if no changes were made.
+		 * Instead we set a flag so that if we get any page-outs we
+		 * know to update the modification time.  It's possible that
+		 * they weren't actually because of changes made whilst the
+		 * file was mapped but that's not easy to fix now.
+		 */
+		SET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING);
+
+		hfs_unlock(cp);
+	}
+
+	return 0;
+}
+
+/*
+ * Mark the resource fork as needing a ubc_setsize when we drop the
+ * cnode lock later.
+ */
+static void hfs_rsrc_setsize(cnode_t *cp)
+{
+	/*
+	 * We need to take an iocount if we don't have one.  vnode_get
+	 * will return ENOENT if the vnode is terminating which is what we
+	 * want as it's not safe to call ubc_setsize in that case.
+	 */
+	if (cp->c_rsrc_vp && !vnode_get(cp->c_rsrc_vp)) {
+		// Shouldn't happen, but better safe...
+		if (ISSET(cp->c_flag, C_NEED_RVNODE_PUT))
+			vnode_put(cp->c_rsrc_vp);
+		SET(cp->c_flag, C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE);
+	}
+}
+
+/*
+ * hfs_move_data
+ *
+ * This is a non-symmetric variant of exchangedata.  In this function,
+ * the contents of the data fork (and optionally the resource fork)
+ * are moved from from_cp to to_cp.
+ * 
+ * The cnodes must be locked. 
+ *
+ * The cnode pointed to by 'to_cp' *must* be empty prior to invoking
+ * this function.  We impose this restriction because we may not be
+ * able to fully delete the entire file's contents in a single
+ * transaction, particularly if it has a lot of extents.  In the
+ * normal file deletion codepath, the file is screened for two
+ * conditions: 1) bigger than 400MB, and 2) more than 8 extents.  If
+ * so, the file is relocated to the hidden directory and the deletion
+ * is broken up into multiple truncates.  We can't do that here
+ * because both files need to exist in the namespace. The main reason
+ * this is imposed is that we may have to touch a whole lot of bitmap
+ * blocks if there are many extents.
+ * 
+ * Any data written to 'from_cp' after this call completes is not
+ * guaranteed to be moved.
+ * 
+ * Arguments:
+ * cnode_t *from_cp : source file
+ * cnode_t *to_cp   : destination file; must be empty
+ * 
+ * Returns:
+ *  
+ *  EBUSY - File has been deleted or is in use
+ *	EFBIG - Destination file was not empty
+ *  EIO   - An I/O error
+ *	0	  - success
+ *  other - Other errors that can be returned from called functions
+ */
+int hfs_move_data(cnode_t *from_cp, cnode_t *to_cp, 
+				  hfs_move_data_options_t options)
+{	
+	hfsmount_t *hfsmp 			= VTOHFS(from_cp->c_vp);
+	int error 					= 0;
+	int lockflags				= 0;
+	bool return_EIO_on_error 	= false;
+	const bool include_rsrc		= ISSET(options, HFS_MOVE_DATA_INCLUDE_RSRC);
+
+	/* Verify that neither source/dest file is open-unlinked */
+	if (ISSET(from_cp->c_flag, C_DELETED | C_NOEXISTS)
+		|| ISSET(to_cp->c_flag, C_DELETED | C_NOEXISTS)) {
+		return EBUSY;
+	}
+
+	/* 
+	 * Verify the source file is not in use by anyone besides us.
+	 *
+	 * This function is typically invoked by a namespace handler 
+	 * process responding to a temporarily stalled system call.  
+	 * The FD that it is working off of is opened O_EVTONLY, so
+	 * it really has no active usecounts (the kusecount from O_EVTONLY
+	 * is subtracted from the total usecounts).
+	 * 
+	 * As a result, we shouldn't have any active usecounts against
+	 * this vnode when we go to check it below.
+	 */
+	if (vnode_isinuse(from_cp->c_vp, 0))
+		return EBUSY;
+
+	if (include_rsrc && from_cp->c_rsrc_vp) {
+		if (vnode_isinuse(from_cp->c_rsrc_vp, 0))
+			return EBUSY;
+
+		/* 
+		 * In the code below, if the destination file doesn't have a
+		 * c_rsrcfork then we don't create it which means we we cannot
+		 * transfer the ff_invalidranges and cf_vblocks fields.  These
+		 * shouldn't be set because we flush the resource fork before
+		 * calling this function but there is a tiny window when we
+		 * did not have any locks...
+		 */
+		if (!to_cp->c_rsrcfork
+			&& (!TAILQ_EMPTY(&from_cp->c_rsrcfork->ff_invalidranges)
+				|| from_cp->c_rsrcfork->ff_unallocblocks)) {
+			/*
+			 * The file isn't really busy now but something did slip
+			 * in and tinker with the file while we didn't have any
+			 * locks, so this is the most meaningful return code for
+			 * the caller.
+			 */
+			return EBUSY;
+		}
+	}
+
+	// Check the destination file is empty
+	if (to_cp->c_datafork->ff_blocks 
+		|| to_cp->c_datafork->ff_size
+		|| (include_rsrc
+			&& (to_cp->c_blocks
+				|| (to_cp->c_rsrcfork && to_cp->c_rsrcfork->ff_size)))) {
+		return EFBIG;
+	}
+
+	if ((error = hfs_start_transaction (hfsmp)))
+		return error;
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, 
+									HFS_EXCLUSIVE_LOCK);
+
+	// filefork_t is 128 bytes which should be OK
+	filefork_t rfork_buf, *from_rfork = NULL;
+
+	if (include_rsrc) {
+		from_rfork = from_cp->c_rsrcfork;
+
+		/*
+		 * Creating resource fork vnodes is expensive, so just get get
+		 * the fork data if we need it.
+		 */
+		if (!from_rfork && hfs_has_rsrc(from_cp)) {
+			from_rfork = &rfork_buf;
+
+			from_rfork->ff_cp = from_cp;
+			TAILQ_INIT(&from_rfork->ff_invalidranges);
+
+			error = cat_idlookup(hfsmp, from_cp->c_fileid, 0, 1, NULL, NULL,
+								 &from_rfork->ff_data);
+
+			if (error)
+				goto exit;
+		}
+	}
+
+	/*
+	 * From here on, any failures mean that we might be leaving things
+	 * in a weird or inconsistent state.  Ideally, we should back out
+	 * all the changes, but to do that properly we need to fix
+	 * MoveData.  We'll save fixing that for another time.  For now,
+	 * just return EIO in all cases to the caller so that they know.
+	 */
+	return_EIO_on_error = true;
+
+	bool data_overflow_extents = overflow_extents(from_cp->c_datafork);
+
+	// Move the data fork
+	if ((error = hfs_move_fork (from_cp->c_datafork, from_cp,
+								to_cp->c_datafork, to_cp))) {
+		goto exit;
+	}
+
+	SET(from_cp->c_flag, C_NEED_DATA_SETSIZE);
+	SET(to_cp->c_flag, C_NEED_DATA_SETSIZE);
+
+	// We move the resource fork later
+
+	/* 
+	 * Note that because all we're doing is moving the extents around,
+	 * we can probably do this in a single transaction: Each extent
+	 * record (group of 8) is 64 bytes.  A extent overflow B-Tree node
+	 * is typically 4k.  This means each node can hold roughly ~60
+	 * extent records == (480 extents).
+	 *
+	 * If a file was massively fragmented and had 20k extents, this
+	 * means we'd roughly touch 20k/480 == 41 to 42 nodes, plus the
+	 * index nodes, for half of the operation.  (inserting or
+	 * deleting). So if we're manipulating 80-100 nodes, this is
+	 * basically 320k of data to write to the journal in a bad case.
+	 */
+	if (data_overflow_extents) {
+		if ((error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0)))
+			goto exit;
+	}
+
+	if (from_rfork && overflow_extents(from_rfork)) {
+		if ((error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1)))
+			goto exit;
+	}
+
+	// Touch times
+	from_cp->c_touch_acctime = TRUE;
+	from_cp->c_touch_chgtime = TRUE;
+	from_cp->c_touch_modtime = TRUE;
+	hfs_touchtimes(hfsmp, from_cp);
+
+	to_cp->c_touch_acctime = TRUE;
+	to_cp->c_touch_chgtime = TRUE;
+	to_cp->c_touch_modtime = TRUE;
+	hfs_touchtimes(hfsmp, to_cp);
+
+	struct cat_fork dfork_buf;
+	const struct cat_fork *dfork, *rfork;
+
+	dfork = hfs_prepare_fork_for_update(to_cp->c_datafork, NULL,
+										&dfork_buf, hfsmp->blockSize);
+	rfork = hfs_prepare_fork_for_update(from_rfork, NULL,
+										&rfork_buf.ff_data, hfsmp->blockSize);
+
+	// Update the catalog nodes, to_cp first
+	if ((error = cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, 
+							dfork, rfork))) {
+		goto exit;
+	}
+
+	CLR(to_cp->c_flag, C_MODIFIED | C_MINOR_MOD);
+
+	// Update in-memory resource fork data here
+	if (from_rfork) {
+		// Update c_blocks
+		uint32_t moving = from_rfork->ff_blocks + from_rfork->ff_unallocblocks;
+
+		from_cp->c_blocks -= moving;
+		to_cp->c_blocks   += moving;
+
+		// Update to_cp's resource data if it has it
+		filefork_t *to_rfork = to_cp->c_rsrcfork;
+		if (to_rfork) {
+			TAILQ_SWAP(&to_rfork->ff_invalidranges,
+					   &from_rfork->ff_invalidranges, rl_entry, rl_link);
+			to_rfork->ff_data = from_rfork->ff_data;
+
+			// Deal with ubc_setsize
+			hfs_rsrc_setsize(to_cp);
+		}
+
+		// Wipe out the resource fork in from_cp
+		rl_init(&from_rfork->ff_invalidranges);
+		bzero(&from_rfork->ff_data, sizeof(from_rfork->ff_data));
+
+		// Deal with ubc_setsize
+		hfs_rsrc_setsize(from_cp);
+	}
+
+	// Currently unnecessary, but might be useful in future...
+	dfork = hfs_prepare_fork_for_update(from_cp->c_datafork, NULL, &dfork_buf,
+										hfsmp->blockSize);
+	rfork = hfs_prepare_fork_for_update(from_rfork, NULL, &rfork_buf.ff_data,
+										hfsmp->blockSize);
+
+	// Update from_cp
+	if ((error = cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, 
+							dfork, rfork))) {
+		goto exit;
+	}
+
+	CLR(from_cp->c_flag, C_MODIFIED | C_MINOR_MOD);
+
+exit:
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		hfs_end_transaction(hfsmp);
+	}
+
+	if (error && error != EIO && return_EIO_on_error) {
+		printf("hfs_move_data: encountered error %d\n", error);
+		error = EIO;
+	}
+
+	return error;
+}
+
+/* 
+ * Move all of the catalog and runtime data in srcfork to dstfork.
+ * 
+ * This allows us to maintain the invalid ranges across the move data
+ * operation so we don't need to force all of the pending IO right
+ * now. In addition, we move all non overflow-extent extents into the
+ * destination here.
+ *
+ * The destination fork must be empty and should have been checked
+ * prior to calling this.
+ */
+static int hfs_move_fork(filefork_t *srcfork, cnode_t *src_cp,
+						 filefork_t *dstfork, cnode_t *dst_cp) 
+{
+	// Move the invalid ranges
+	TAILQ_SWAP(&dstfork->ff_invalidranges, &srcfork->ff_invalidranges,
+			   rl_entry, rl_link);
+	rl_remove_all(&srcfork->ff_invalidranges);
+
+	// Move the fork data (copy whole structure)
+	dstfork->ff_data = srcfork->ff_data;
+	bzero(&srcfork->ff_data, sizeof(srcfork->ff_data));
+
+	// Update c_blocks
+	src_cp->c_blocks -= dstfork->ff_blocks + dstfork->ff_unallocblocks;
+	dst_cp->c_blocks += dstfork->ff_blocks + dstfork->ff_unallocblocks;
+
+	return 0;
+}
+
+/*
+ *  cnode must be locked
+ */
+int
+hfs_fsync(struct vnode *vp, int waitfor, hfs_fsync_mode_t fsyncmode, struct proc *p)
+{
+	struct cnode *cp = VTOC(vp);
+	struct filefork *fp = NULL;
+	int retval = 0;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct timeval tv;
+	int waitdata;		/* attributes necessary for data retrieval */
+	int wait;		/* all other attributes (e.g. atime, etc.) */
+	int took_trunc_lock = 0;
+	int fsync_default = 1;
+
+	/*
+	 * Applications which only care about data integrity rather than full
+	 * file integrity may opt out of (delay) expensive metadata update
+	 * operations as a performance optimization.
+	 */
+	wait = (waitfor == MNT_WAIT);
+	waitdata = (waitfor == MNT_DWAIT) | wait;
+
+	if (always_do_fullfsync)
+		fsyncmode = HFS_FSYNC_FULL;
+	if (fsyncmode != HFS_FSYNC)
+		fsync_default = 0;
+	
+	/* HFS directories don't have any data blocks. */
+	if (vnode_isdir(vp))
+		goto metasync;
+	fp = VTOF(vp);
+
+	/*
+	 * For system files flush the B-tree header and
+	 * for regular files write out any clusters
+	 */
+	if (vnode_issystem(vp)) {
+	    if (VTOF(vp)->fcbBTCBPtr != NULL) {
+			// XXXdbg
+			if (hfsmp->jnl == NULL) {
+				BTFlushPath(VTOF(vp));
+			}
+	    }
+	} else {
+		hfs_unlock(cp);
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+		took_trunc_lock = 1;
+
+		if (fp->ff_unallocblocks != 0) {
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+
+			hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		}
+
+		/* Don't hold cnode lock when calling into cluster layer. */
+		(void) cluster_push(vp, waitdata ? IO_SYNC : 0);
+
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+	}
+	/*
+	 * When MNT_WAIT is requested and the zero fill timeout
+	 * has expired then we must explicitly zero out any areas
+	 * that are currently marked invalid (holes).
+	 *
+	 * Files with NODUMP can bypass zero filling here.
+	 */
+	if (fp && (((cp->c_flag & C_ALWAYS_ZEROFILL) && !TAILQ_EMPTY(&fp->ff_invalidranges)) ||
+	    ((wait || (cp->c_flag & C_ZFWANTSYNC)) &&
+		((cp->c_bsdflags & UF_NODUMP) == 0) &&
+		 (vnode_issystem(vp) ==0) &&
+		cp->c_zftimeout != 0))) {
+
+		microuptime(&tv);
+		if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && fsync_default && tv.tv_sec < (long)cp->c_zftimeout) {
+			/* Remember that a force sync was requested. */
+			cp->c_flag |= C_ZFWANTSYNC;
+			goto datasync;
+		}
+		if (!TAILQ_EMPTY(&fp->ff_invalidranges)) {
+			if (!took_trunc_lock || (cp->c_truncatelockowner == HFS_SHARED_OWNER)) {
+				hfs_unlock(cp);
+				if (took_trunc_lock) {
+					hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+				}
+				hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+				hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+				took_trunc_lock = 1;
+			}
+			hfs_flush_invalid_ranges(vp);
+			hfs_unlock(cp);
+			(void) cluster_push(vp, waitdata ? IO_SYNC : 0);
+			hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+		}
+	}
+datasync:
+	if (took_trunc_lock) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		took_trunc_lock = 0;
+	}
+
+	if (!hfsmp->jnl)
+		buf_flushdirtyblks(vp, waitdata, 0, "hfs_fsync");
+	else if (fsync_default && vnode_islnk(vp)
+			 && vnode_hasdirtyblks(vp) && vnode_isrecycled(vp)) {
+		/*
+		 * If it's a symlink that's dirty and is about to be recycled,
+		 * we need to flush the journal.
+		 */
+		fsync_default = 0;
+	}
+
+metasync:
+	if (vnode_isreg(vp) && vnode_issystem(vp)) {
+		if (VTOF(vp)->fcbBTCBPtr != NULL) {
+			microuptime(&tv);
+			BTSetLastSync(VTOF(vp), tv.tv_sec);
+		}
+		cp->c_touch_acctime = FALSE;
+		cp->c_touch_chgtime = FALSE;
+		cp->c_touch_modtime = FALSE;
+	} else if (!vnode_isswap(vp)) {
+		retval = hfs_update(vp, HFS_UPDATE_FORCE);
+
+		/*
+		 * When MNT_WAIT is requested push out the catalog record for
+		 * this file.  If they asked for a full fsync, we can skip this
+		 * because the journal_flush or hfs_metasync_all will push out
+		 * all of the metadata changes.
+		 */
+		if ((retval == 0) && wait && fsync_default && cp->c_hint &&
+   		    !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) {
+   			hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p);
+		}
+
+		/*
+		 * If this was a full fsync, make sure all metadata
+		 * changes get to stable storage.
+		 */
+		if (!fsync_default) {
+			if (hfsmp->jnl) {
+				if (fsyncmode == HFS_FSYNC_FULL)
+					hfs_flush(hfsmp, HFS_FLUSH_FULL);
+				else
+					hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_BARRIER);
+			} else {
+				retval = hfs_metasync_all(hfsmp);
+				/* XXX need to pass context! */
+				hfs_flush(hfsmp, HFS_FLUSH_CACHE);
+			}
+		}
+	}
+
+	if (!hfs_is_dirty(cp) && !ISSET(cp->c_flag, C_DELETED))
+		vnode_cleardirty(vp);
+
+	return (retval);
+}
+
+
+/* Sync an hfs catalog b-tree node */
+int
+hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p)
+{
+	vnode_t	vp;
+	buf_t	bp;
+	int lockflags;
+
+	vp = HFSTOVCB(hfsmp)->catalogRefNum;
+
+	// XXXdbg - don't need to do this on a journaled volume
+	if (hfsmp->jnl) {
+		return 0;
+	}
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+	/*
+	 * Look for a matching node that has been delayed
+	 * but is not part of a set (B_LOCKED).
+	 *
+	 * BLK_ONLYVALID causes buf_getblk to return a
+	 * buf_t for the daddr64_t specified only if it's
+	 * currently resident in the cache... the size
+	 * parameter to buf_getblk is ignored when this flag
+	 * is set
+	 */
+	bp = buf_getblk(vp, node, 0, 0, 0, BLK_META | BLK_ONLYVALID);
+
+	if (bp) {
+	        if ((buf_flags(bp) & (B_LOCKED | B_DELWRI)) == B_DELWRI)
+		        (void) VNOP_BWRITE(bp);
+		else
+		        buf_brelse(bp);
+	}
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	return (0);
+}
+
+
+/*
+ * Sync all hfs B-trees.  Use this instead of journal_flush for a volume
+ * without a journal.  Note that the volume bitmap does not get written;
+ * we rely on fsck_hfs to fix that up (which it can do without any loss
+ * of data).
+ */
+int
+hfs_metasync_all(struct hfsmount *hfsmp)
+{
+	int lockflags;
+
+	/* Lock all of the B-trees so we get a mutually consistent state */
+	lockflags = hfs_systemfile_lock(hfsmp,
+		SFL_CATALOG|SFL_EXTENTS|SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+	/* Sync each of the B-trees */
+	if (hfsmp->hfs_catalog_vp)
+		hfs_btsync(hfsmp->hfs_catalog_vp, 0);
+	if (hfsmp->hfs_extents_vp)
+		hfs_btsync(hfsmp->hfs_extents_vp, 0);
+	if (hfsmp->hfs_attribute_vp)
+		hfs_btsync(hfsmp->hfs_attribute_vp, 0);
+	
+	/* Wait for all of the writes to complete */
+	if (hfsmp->hfs_catalog_vp)
+		vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_metasync_all");
+	if (hfsmp->hfs_extents_vp)
+		vnode_waitforwrites(hfsmp->hfs_extents_vp, 0, 0, 0, "hfs_metasync_all");
+	if (hfsmp->hfs_attribute_vp)
+		vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs_metasync_all");
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	
+	return 0;
+}
+
+
+/*ARGSUSED 1*/
+static int
+hfs_btsync_callback(struct buf *bp, __unused void *dummy)
+{
+	buf_clearflags(bp, B_LOCKED);
+	(void) buf_bawrite(bp);
+
+	return(BUF_CLAIMED);
+}
+
+
+int
+hfs_btsync(struct vnode *vp, int sync_transaction)
+{
+	struct cnode *cp = VTOC(vp);
+	struct timeval tv;
+	int    flags = 0;
+
+	if (sync_transaction)
+	        flags |= BUF_SKIP_NONLOCKED;
+	/*
+	 * Flush all dirty buffers associated with b-tree.
+	 */
+	buf_iterate(vp, hfs_btsync_callback, flags, 0);
+
+	microuptime(&tv);
+	if (vnode_issystem(vp) && (VTOF(vp)->fcbBTCBPtr != NULL))
+		(void) BTSetLastSync(VTOF(vp), tv.tv_sec);
+	cp->c_touch_acctime = FALSE;
+	cp->c_touch_chgtime = FALSE;
+	cp->c_touch_modtime = FALSE;
+
+	return 0;
+}
+
+/*
+ * Remove a directory.
+ */
+int
+hfs_vnop_rmdir(struct vnop_rmdir_args *ap)
+{
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode *vp = ap->a_vp;
+	struct cnode *dcp = VTOC(dvp);
+	struct cnode *cp = VTOC(vp);
+	int error;
+	time_t orig_ctime;
+
+	orig_ctime = VTOC(vp)->c_ctime;
+
+	if (!S_ISDIR(cp->c_mode)) {
+		return (ENOTDIR);
+	}
+	if (dvp == vp) {
+		return (EINVAL);
+	}
+
+	nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
+	cp = VTOC(vp);
+
+	if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+		return (error);
+	}
+
+	/* Check for a race with rmdir on the parent directory */
+	if (dcp->c_flag & (C_DELETED | C_NOEXISTS)) {
+		hfs_unlockpair (dcp, cp);
+		return ENOENT;
+	}
+
+	//
+	// if the item is tracked but doesn't have a document_id, assign one and generate an fsevent for it
+	//
+	if ((cp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id == 0) {
+		uint32_t newid;
+
+		hfs_unlockpair(dcp, cp);
+
+		if (hfs_generate_document_id(VTOHFS(vp), &newid) == 0) {
+			hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK);
+			((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id = newid;
+			add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+				    FSE_ARG_DEV,   VTOHFS(vp)->hfs_raw_dev,
+				    FSE_ARG_INO,   (ino64_t)0,             // src inode #
+				    FSE_ARG_INO,   (ino64_t)cp->c_fileid,  // dst inode #
+				    FSE_ARG_INT32, newid,
+				    FSE_ARG_DONE);
+		} else {
+			// XXXdbg - couldn't get a new docid... what to do?  can't really fail the rm...
+			hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK);
+		}
+	}
+
+	error = hfs_removedir(dvp, vp, ap->a_cnp, 0, 0);
+
+	hfs_unlockpair(dcp, cp);
+
+	return (error);
+}
+
+/*
+ * Remove a directory
+ *
+ * Both dvp and vp cnodes are locked
+ */
+int
+hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
+              int skip_reserve, int only_unlink)
+{
+	struct cnode *cp;
+	struct cnode *dcp;
+	struct hfsmount * hfsmp;
+	struct cat_desc desc;
+	int lockflags;
+	int error = 0, started_tr = 0;
+
+	cp = VTOC(vp);
+	dcp = VTOC(dvp);
+	hfsmp = VTOHFS(vp);
+
+	if (dcp == cp) {
+		return (EINVAL);	/* cannot remove "." */
+	}
+	if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+		return (0);
+	}
+	if (cp->c_entries != 0) {
+		return (ENOTEMPTY);
+	}
+	
+	/*
+	 * If the directory is open or in use (e.g. opendir() or current working
+	 * directory for some process); wait for inactive/reclaim to actually
+	 * remove cnode from the catalog.  Both inactive and reclaim codepaths are capable
+	 * of removing open-unlinked directories from the catalog, as well as getting rid
+	 * of EAs still on the element.  So change only_unlink to true, so that it will get 
+	 * cleaned up below.
+	 *
+	 * Otherwise, we can get into a weird old mess where the directory has C_DELETED,
+	 * but it really means C_NOEXISTS because the item was actually removed from the 
+	 * catalog.  Then when we try to remove the entry from the catalog later on, it won't
+	 * really be there anymore.  
+	 */
+	if (vnode_isinuse(vp, 0))  {
+		only_unlink = 1;
+	}
+
+	/* Deal with directory hardlinks */
+	if (cp->c_flag & C_HARDLINK) {
+		/* 
+		 * Note that if we have a directory which was a hardlink at any point,
+		 * its actual directory data is stored in the directory inode in the hidden
+		 * directory rather than the leaf element(s) present in the namespace.
+		 * 
+		 * If there are still other hardlinks to this directory, 
+		 * then we'll just eliminate this particular link and the vnode will still exist.
+		 * If this is the last link to an empty directory, then we'll open-unlink the 
+		 * directory and it will be only tagged with C_DELETED (as opposed to C_NOEXISTS).
+		 * 
+		 * We could also return EBUSY here. 
+		 */
+		
+		return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve);
+	}
+	
+	/*
+	 * In a few cases, we may want to allow the directory to persist in an
+	 * open-unlinked state.  If the directory is being open-unlinked (still has usecount
+	 * references), or if it has EAs, or if it was being deleted as part of a rename, 
+	 * then we go ahead and move it to the hidden directory. 
+	 *
+	 * If the directory is being open-unlinked, then we want to keep the catalog entry 
+	 * alive so that future EA calls and fchmod/fstat etc. do not cause issues later.
+	 * 
+	 * If the directory had EAs, then we want to use the open-unlink trick so that the 
+	 * EA removal is not done in one giant transaction.  Otherwise, it could cause a panic
+	 * due to overflowing the journal.
+	 * 
+	 * Finally, if it was deleted as part of a rename, we move it to the hidden directory
+	 * in order to maintain rename atomicity.  
+	 * 
+	 * Note that the allow_dirs argument to hfs_removefile specifies that it is
+	 * supposed to handle directories for this case.
+     */
+		
+	if (((hfsmp->hfs_attribute_vp != NULL) &&
+	    ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0)) ||
+		(only_unlink != 0)) {
+		
+		int ret = hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL, only_unlink);
+		/* 
+		 * Even though hfs_vnop_rename calls vnode_recycle for us on tvp we call 
+		 * it here just in case we were invoked by rmdir() on a directory that had 
+		 * EAs.  To ensure that we start reclaiming the space as soon as possible,
+		 * we call vnode_recycle on the directory.
+		 */
+		vnode_recycle(vp);
+		
+		return ret;
+		
+	}
+
+	dcp->c_flag |= C_DIR_MODIFICATION;
+
+#if QUOTA
+	if (hfsmp->hfs_flags & HFS_QUOTAS)
+		(void)hfs_getinoquota(cp);
+#endif
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+	    goto out;
+	}
+	started_tr = 1;
+
+	/*
+	 * Verify the directory is empty (and valid).
+	 * (Rmdir ".." won't be valid since
+	 *  ".." will contain a reference to
+	 *  the current directory and thus be
+	 *  non-empty.)
+	 */
+	if ((dcp->c_bsdflags & APPEND) || (cp->c_bsdflags & (IMMUTABLE | APPEND))) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* Remove the entry from the namei cache: */
+	cache_purge(vp);
+
+	/* 
+	 * Protect against a race with rename by using the component
+	 * name passed in and parent id from dvp (instead of using 
+	 * the cp->c_desc which may have changed).
+	 */
+	desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+	desc.cd_namelen = cnp->cn_namelen;
+	desc.cd_parentcnid = dcp->c_fileid;
+	desc.cd_cnid = cp->c_cnid;
+	desc.cd_flags = CD_ISDIR;
+	desc.cd_encoding = cp->c_encoding;
+	desc.cd_hint = 0;
+
+	if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) {
+	    error = 0;
+	    goto out;
+	}
+
+	/* Remove entry from catalog */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+	if (!skip_reserve) {
+		/*
+		 * Reserve some space in the Catalog file.
+		 */
+		if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			goto out;
+		}
+	}
+
+	error = cat_delete(hfsmp, &desc, &cp->c_attr);
+
+	if (!error) {
+		//
+		// if skip_reserve == 1 then we're being called from hfs_vnop_rename() and thus
+		// we don't need to touch the document_id as it's handled by the rename code.
+		// otherwise it's a normal remove and we need to save the document id in the
+		// per thread struct and clear it from the cnode.
+		//
+		struct  doc_tombstone *ut;
+		ut = doc_tombstone_get();
+		if (!skip_reserve && (cp->c_bsdflags & UF_TRACKED)
+			&& doc_tombstone_should_save(ut, vp, cnp)) {
+
+			uint32_t doc_id = hfs_get_document_id(cp);
+
+			// this event is more of a "pending-delete"
+			if (ut->t_lastop_document_id) {
+				add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+							FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+							FSE_ARG_INO, (ino64_t)cp->c_fileid,       // src inode #
+							FSE_ARG_INO, (ino64_t)0,                  // dst inode #
+							FSE_ARG_INT32, doc_id,
+							FSE_ARG_DONE);
+			}
+
+			doc_tombstone_save(dvp, vp, cnp, doc_id, cp->c_fileid);
+
+			struct FndrExtendedFileInfo *fip = (struct FndrExtendedFileInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
+
+			// clear this so it's never returned again
+			fip->document_id = 0;
+			cp->c_bsdflags &= ~UF_TRACKED;
+		}
+
+		/* The parent lost a child */
+		if (dcp->c_entries > 0)
+			dcp->c_entries--;
+		DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+		dcp->c_dirchangecnt++;
+		hfs_incr_gencount(dcp);
+
+		dcp->c_touch_chgtime = TRUE;
+		dcp->c_touch_modtime = TRUE;
+		dcp->c_flag |= C_MODIFIED;
+
+		hfs_update(dcp->c_vp, 0);
+	}
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (error)
+		goto out;
+
+#if QUOTA
+	if (hfsmp->hfs_flags & HFS_QUOTAS)
+		(void)hfs_chkiq(cp, -1, NOCRED, 0);
+#endif /* QUOTA */
+
+	hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID));
+
+	/* Mark C_NOEXISTS since the catalog entry is now gone */
+	cp->c_flag |= C_NOEXISTS;
+
+out:
+	dcp->c_flag &= ~C_DIR_MODIFICATION;
+	wakeup((caddr_t)&dcp->c_flag);
+
+	if (started_tr) { 
+	    hfs_end_transaction(hfsmp);
+	}
+
+	return (error);
+}
+
+
+/*
+ * Remove a file or link.
+ */
+int
+hfs_vnop_remove(struct vnop_remove_args *ap)
+{
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode *vp = ap->a_vp;
+	struct cnode *dcp = VTOC(dvp);
+	struct cnode *cp;
+	struct vnode *rvp = NULL;
+	int error=0, recycle_rsrc=0;
+	int recycle_vnode = 0;
+	uint32_t rsrc_vid = 0;
+	time_t orig_ctime;
+
+	if (dvp == vp) {
+		return (EINVAL);
+	}
+
+	orig_ctime = VTOC(vp)->c_ctime;
+	if (!vnode_isnamedstream(vp) && ((ap->a_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) == 0)) {
+		error = nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
+		if (error) {
+			// XXXdbg - decide on a policy for handling namespace handler failures!
+			// for now we just let them proceed.
+		}		
+	}
+	error = 0;
+
+	cp = VTOC(vp);
+
+relock:
+
+	hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+	if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		if (rvp) {
+			vnode_put (rvp);
+		}	
+		return (error);
+	}
+	//
+	// if the item is tracked but doesn't have a document_id, assign one and generate an fsevent for it
+	//
+	if ((cp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id == 0) {
+		uint32_t newid;
+
+		hfs_unlockpair(dcp, cp);
+
+		if (hfs_generate_document_id(VTOHFS(vp), &newid) == 0) {
+			hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK);
+			((struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16))->document_id = newid;
+			add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+				    FSE_ARG_DEV,   VTOHFS(vp)->hfs_raw_dev,
+				    FSE_ARG_INO,   (ino64_t)0,             // src inode #
+				    FSE_ARG_INO,   (ino64_t)cp->c_fileid,  // dst inode #
+				    FSE_ARG_INT32, newid,
+				    FSE_ARG_DONE);
+		} else {
+			// XXXdbg - couldn't get a new docid... what to do?  can't really fail the rm...
+			hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK);
+		}
+	}
+	
+	/*
+	 * Lazily respond to determining if there is a valid resource fork
+	 * vnode attached to 'cp' if it is a regular file or symlink.  
+	 * If the vnode does not exist, then we may proceed without having to
+	 * create it.
+	 *
+	 * If, however, it does exist, then we need to acquire an iocount on the
+	 * vnode after acquiring its vid.  This ensures that if we have to do I/O
+	 * against it, it can't get recycled from underneath us in the middle
+	 * of this call.
+	 *
+	 * Note: this function may be invoked for directory hardlinks, so just skip these
+	 * steps if 'vp' is a directory.
+	 */
+
+	enum vtype vtype = vnode_vtype(vp);
+	if ((vtype == VLNK) || (vtype == VREG)) {
+		if ((cp->c_rsrc_vp) && (rvp == NULL)) {
+			/* We need to acquire the rsrc vnode */
+			rvp = cp->c_rsrc_vp;
+			rsrc_vid = vnode_vid (rvp);
+
+			/* Unlock everything to acquire iocount on the rsrc vnode */	
+			hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
+			hfs_unlockpair (dcp, cp);
+			/* Use the vid to maintain identity on rvp */
+			if (vnode_getwithvid(rvp, rsrc_vid)) {
+				/*
+				 * If this fails, then it was recycled or 
+				 * reclaimed in the interim.  Reset fields and
+				 * start over.
+				 */
+				rvp = NULL;
+				rsrc_vid = 0;
+			}
+			goto relock;
+		}
+	}
+
+	/* 
+	 * Check to see if we raced rmdir for the parent directory 
+	 * hfs_removefile already checks for a race on vp/cp
+	 */
+	if (dcp->c_flag & (C_DELETED | C_NOEXISTS)) {
+		error = ENOENT;
+		goto rm_done;	
+	}
+
+	error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, NULL, 0);
+	
+	/*
+	 * If the remove succeeded in deleting the file, then we may need to mark
+	 * the resource fork for recycle so that it is reclaimed as quickly
+	 * as possible.  If it were not recycled quickly, then this resource fork
+	 * vnode could keep a v_parent reference on the data fork, which prevents it
+	 * from going through reclaim (by giving it extra usecounts), except in the force-
+	 * unmount case.  
+	 * 
+	 * However, a caveat:  we need to continue to supply resource fork
+	 * access to open-unlinked files even if the resource fork is not open.  This is
+	 * a requirement for the compressed files work.  Luckily, hfs_vgetrsrc will handle
+	 * this already if the data fork has been re-parented to the hidden directory.
+	 * 
+	 * As a result, all we really need to do here is mark the resource fork vnode
+	 * for recycle.  If it goes out of core, it can be brought in again if needed.  
+	 * If the cnode was instead marked C_NOEXISTS, then there wouldn't be any 
+	 * more work.
+	 */
+	if (error == 0) {
+		hfs_hotfile_deleted(vp);
+		
+	   	if (rvp) {
+	    	recycle_rsrc = 1;
+		}
+		/* 
+		 * If the target was actually removed from the catalog schedule it for
+		 * full reclamation/inactivation.  We hold an iocount on it so it should just
+		 * get marked with MARKTERM
+		 */
+		if (cp->c_flag & C_NOEXISTS) {
+			recycle_vnode = 1;
+		}
+	}
+
+
+	/*
+	 * Drop the truncate lock before unlocking the cnode
+	 * (which can potentially perform a vnode_put and
+	 * recycle the vnode which in turn might require the
+	 * truncate lock)
+	 */
+rm_done:
+	hfs_unlockpair(dcp, cp);
+	hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+
+	if (recycle_rsrc) {
+		/* inactive or reclaim on rvp will clean up the blocks from the rsrc fork */
+		vnode_recycle(rvp);
+	} 
+	if (recycle_vnode) {
+		vnode_recycle (vp);
+	}
+
+	if (rvp) {
+		/* drop iocount on rsrc fork, was obtained at beginning of fxn */
+		vnode_put(rvp);
+	}
+
+	return (error);
+}
+
+
+int
+hfs_removefile_callback(struct buf *bp, void *hfsmp) {
+
+        if ( !(buf_flags(bp) & B_META))
+	        panic("hfs: symlink bp @ %p is not marked meta-data!\n", bp);
+	/*
+	 * it's part of the current transaction, kill it.
+	 */
+	journal_kill_block(((struct hfsmount *)hfsmp)->jnl, bp);
+
+	return (BUF_CLAIMED);
+}
+
+/*
+ * hfs_removefile
+ *
+ * Similar to hfs_vnop_remove except there are additional options.
+ * This function may be used to remove directories if they have
+ * lots of EA's -- note the 'allow_dirs' argument.
+ *
+ * This function is able to delete blocks & fork data for the resource
+ * fork even if it does not exist in core (and have a backing vnode).  
+ * It should infer the correct behavior based on the number of blocks
+ * in the cnode and whether or not the resource fork pointer exists or 
+ * not.  As a result, one only need pass in the 'vp' corresponding to the
+ * data fork of this file (or main vnode in the case of a directory).  
+ * Passing in a resource fork will result in an error.
+ *
+ * Because we do not create any vnodes in this function, we are not at 
+ * risk of deadlocking against ourselves by double-locking.
+ *
+ * Requires cnode and truncate locks to be held.
+ */
+int
+hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
+               int flags, int skip_reserve, int allow_dirs, 
+			   __unused struct vnode *rvp, int only_unlink)
+{
+	struct cnode *cp;
+	struct cnode *dcp;
+	struct vnode *rsrc_vp = NULL;
+	struct hfsmount *hfsmp;
+	struct cat_desc desc;
+	struct timeval tv;
+	int dataforkbusy = 0;
+	int rsrcforkbusy = 0;
+	int lockflags;
+	int error = 0;
+	int started_tr = 0;
+	int isbigfile = 0, defer_remove=0, isdir=0;
+	int update_vh = 0;
+
+	cp = VTOC(vp);
+	dcp = VTOC(dvp);
+	hfsmp = VTOHFS(vp);
+
+	/* Check if we lost a race post lookup. */
+	if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+		return (0);
+	}
+
+	if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) {
+	    return 0;
+	}
+
+	/* Make sure a remove is permitted */
+	if (VNODE_IS_RSRC(vp)) {
+		return (EPERM);
+	}
+	else {
+		/* 
+		 * We know it's a data fork.
+		 * Probe the cnode to see if we have a valid resource fork
+		 * in hand or not.
+		 */
+		rsrc_vp = cp->c_rsrc_vp;
+	}
+
+	/* Don't allow deleting the journal or journal_info_block. */
+	if (hfs_is_journal_file(hfsmp, cp)) {
+		return (EPERM);
+	}
+
+	/*
+	 * Hard links require special handling.
+	 */
+	if (cp->c_flag & C_HARDLINK) {
+		if ((flags & VNODE_REMOVE_NODELETEBUSY) && vnode_isinuse(vp, 0)) {
+			return (EBUSY);
+		} else {
+			/* A directory hard link with a link count of one is 
+			 * treated as a regular directory.  Therefore it should 
+			 * only be removed using rmdir().
+			 */
+			if ((vnode_isdir(vp) == 1) && (cp->c_linkcount == 1) && 
+			    (allow_dirs == 0)) {
+			    	return (EPERM);
+			}
+			return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve);
+		}
+	}
+
+	/* Directories should call hfs_rmdir! (unless they have a lot of attributes) */
+	if (vnode_isdir(vp)) {
+		if (allow_dirs == 0)
+			return (EPERM);  /* POSIX */
+		isdir = 1;
+	}
+	/* Sanity check the parent ids. */
+	if ((cp->c_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
+	    (cp->c_parentcnid != dcp->c_fileid)) {
+		return (EINVAL);
+	}
+
+	dcp->c_flag |= C_DIR_MODIFICATION;
+
+	// this guy is going away so mark him as such
+	cp->c_flag |= C_DELETED;
+
+
+	/* Remove our entry from the namei cache. */
+	cache_purge(vp);
+	
+	/*
+	 * If the caller was operating on a file (as opposed to a 
+	 * directory with EAs), then we need to figure out
+	 * whether or not it has a valid resource fork vnode.
+	 * 
+	 * If there was a valid resource fork vnode, then we need
+	 * to use hfs_truncate to eliminate its data.  If there is
+	 * no vnode, then we hold the cnode lock which would
+	 * prevent it from being created.  As a result, 
+	 * we can use the data deletion functions which do not
+	 * require that a cnode/vnode pair exist.
+	 */
+
+	/* Check if this file is being used. */
+	if (isdir == 0) {
+		dataforkbusy = vnode_isinuse(vp, 0);
+		/*  
+		 * At this point, we know that 'vp' points to the 
+		 * a data fork because we checked it up front. And if 
+		 * there is no rsrc fork, rsrc_vp will be NULL.
+		 */
+		if (rsrc_vp && (cp->c_blocks - VTOF(vp)->ff_blocks)) {
+			rsrcforkbusy = vnode_isinuse(rsrc_vp, 0);
+		}
+	}
+	
+	/* Check if we have to break the deletion into multiple pieces. */
+	if (isdir == 0)
+		isbigfile = cp->c_datafork->ff_size >= HFS_BIGFILE_SIZE;
+
+	/* Check if the file has xattrs.  If it does we'll have to delete them in
+	   individual transactions in case there are too many */
+	if ((hfsmp->hfs_attribute_vp != NULL) &&
+	    (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
+	    defer_remove = 1;
+	}
+	
+	/* If we are explicitly told to only unlink item and move to hidden dir, then do it */
+	if (only_unlink) {
+		defer_remove = 1;
+	}
+
+	/*
+	 * Carbon semantics prohibit deleting busy files.
+	 * (enforced when VNODE_REMOVE_NODELETEBUSY is requested)
+	 */
+	if (dataforkbusy || rsrcforkbusy) {
+		if ((flags & VNODE_REMOVE_NODELETEBUSY) ||
+		    (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid == 0)) {
+			error = EBUSY;
+			goto out;
+		}
+	}
+
+#if QUOTA
+	if (hfsmp->hfs_flags & HFS_QUOTAS)
+		(void)hfs_getinoquota(cp);
+#endif /* QUOTA */
+	
+	/* 
+	 * Do a ubc_setsize to indicate we need to wipe contents if:
+	 *  1) item is a regular file.
+	 *  2) Neither fork is busy AND we are not told to unlink this. 
+	 *
+	 * We need to check for the defer_remove since it can be set without 
+	 * having a busy data or rsrc fork   
+	 */
+	if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy) && (defer_remove == 0)) {
+		/*
+		 * A ubc_setsize can cause a pagein so defer it
+		 * until after the cnode lock is dropped.  The
+		 * cnode lock cannot be dropped/reacquired here
+		 * since we might already hold the journal lock.
+		 */
+		if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) {
+			cp->c_flag |= C_NEED_DATA_SETSIZE;
+		}
+		if (!rsrcforkbusy && rsrc_vp) {
+			cp->c_flag |= C_NEED_RSRC_SETSIZE;
+		}
+	}
+
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+	    goto out;
+	}
+	started_tr = 1;
+
+	// XXXdbg - if we're journaled, kill any dirty symlink buffers 
+	if (hfsmp->jnl && vnode_islnk(vp) && (defer_remove == 0)) {
+	        buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+	}
+
+	/*
+	 * Prepare to truncate any non-busy forks.  Busy forks will
+	 * get truncated when their vnode goes inactive.
+	 * Note that we will only enter this region if we
+	 * can avoid creating an open-unlinked file.  If 
+	 * either region is busy, we will have to create an open
+	 * unlinked file.
+	 *
+	 * Since we are deleting the file, we need to stagger the runtime
+	 * modifications to do things in such a way that a crash won't 
+	 * result in us getting overlapped extents or any other 
+	 * bad inconsistencies.  As such, we call prepare_release_storage
+	 * which updates the UBC, updates quota information, and releases
+	 * any loaned blocks that belong to this file.  No actual 
+	 * truncation or bitmap manipulation is done until *AFTER*
+	 * the catalog record is removed. 
+	 */
+	if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy) && (only_unlink == 0)) {
+		
+		if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) {
+			
+			error = hfs_prepare_release_storage (hfsmp, vp);
+			if (error) {
+				goto out;
+			}
+			update_vh = 1;
+		}
+		
+		/*
+		 * If the resource fork vnode does not exist, we can skip this step.
+		 */
+		if (!rsrcforkbusy && rsrc_vp) {
+			error = hfs_prepare_release_storage (hfsmp, rsrc_vp);
+			if (error) {
+				goto out;
+			}
+			update_vh = 1;
+		}
+	}
+	
+	/* 
+	 * Protect against a race with rename by using the component
+	 * name passed in and parent id from dvp (instead of using 
+	 * the cp->c_desc which may have changed).   Also, be aware that
+	 * because we allow directories to be passed in, we need to special case
+	 * this temporary descriptor in case we were handed a directory.
+	 */
+	if (isdir) {
+		desc.cd_flags = CD_ISDIR;
+	}
+	else {
+		desc.cd_flags = 0;
+	}
+	desc.cd_encoding = cp->c_desc.cd_encoding;
+	desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+	desc.cd_namelen = cnp->cn_namelen;
+	desc.cd_parentcnid = dcp->c_fileid;
+	desc.cd_hint = cp->c_desc.cd_hint;
+	desc.cd_cnid = cp->c_cnid;
+	microtime(&tv);
+
+	/*
+	 * There are two cases to consider:
+	 *  1. File/Dir is busy/big/defer_remove ==> move/rename the file/dir
+	 *  2. File is not in use ==> remove the file
+	 * 
+	 * We can get a directory in case 1 because it may have had lots of attributes,
+	 * which need to get removed here.
+	 */
+	if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) {
+		char delname[32];
+		struct cat_desc to_desc;
+		struct cat_desc todir_desc;
+
+		/*
+		 * Orphan this file or directory (move to hidden directory).
+		 * Again, we need to take care that we treat directories as directories,
+		 * and files as files.  Because directories with attributes can be passed in
+		 * check to make sure that we have a directory or a file before filling in the 
+		 * temporary descriptor's flags.  We keep orphaned directories AND files in
+		 * the FILE_HARDLINKS private directory since we're generalizing over all
+		 * orphaned filesystem objects.
+		 */
+		bzero(&todir_desc, sizeof(todir_desc));
+		todir_desc.cd_parentcnid = 2;
+
+		MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid);
+		bzero(&to_desc, sizeof(to_desc));
+		to_desc.cd_nameptr = (const u_int8_t *)delname;
+		to_desc.cd_namelen = strlen(delname);
+		to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+		if (isdir) {
+			to_desc.cd_flags = CD_ISDIR;
+		}
+		else {
+			to_desc.cd_flags = 0;
+		}
+		to_desc.cd_cnid = cp->c_cnid;
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+		if (!skip_reserve) {
+			if ((error = cat_preflight(hfsmp, CAT_RENAME, NULL, 0))) {
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				goto out;
+			}
+		}
+
+		error = cat_rename(hfsmp, &desc, &todir_desc,
+				&to_desc, (struct cat_desc *)NULL);
+
+		if (error == 0) {
+			hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries++;
+			if (isdir == 1) {
+				INC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+			}
+			(void) cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+			                  &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+
+			/* Update the parent directory */
+			if (dcp->c_entries > 0)
+				dcp->c_entries--;
+			if (isdir == 1) {
+				DEC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+			}
+			dcp->c_dirchangecnt++;
+			hfs_incr_gencount(dcp);
+
+			dcp->c_ctime = tv.tv_sec;
+			dcp->c_mtime = tv.tv_sec;
+			(void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+
+			/* Update the file or directory's state */
+			cp->c_flag |= C_DELETED;
+			cp->c_ctime = tv.tv_sec;
+			--cp->c_linkcount;
+			(void) cat_update(hfsmp, &to_desc, &cp->c_attr, NULL, NULL);
+		}
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error)
+			goto out;
+
+	} 
+	else {
+		/*
+		 * Nobody is using this item; we can safely remove everything.
+		 */
+		struct filefork *temp_rsrc_fork = NULL;
+#if QUOTA
+		off_t savedbytes;
+		int blksize = hfsmp->blockSize;
+#endif
+		u_int32_t fileid = cp->c_fileid;
+	
+		/* 
+		 * Figure out if we need to read the resource fork data into 
+		 * core before wiping out the catalog record.  
+		 *
+		 * 1) Must not be a directory
+		 * 2) cnode's c_rsrcfork ptr must be NULL.
+		 * 3) rsrc fork must have actual blocks 
+		 */
+		if ((isdir == 0) && (cp->c_rsrcfork == NULL) && 
+				(cp->c_blocks - VTOF(vp)->ff_blocks)) {
+			/*
+			 * The resource fork vnode & filefork did not exist.
+			 * Create a temporary one for use in this function only. 
+			 */
+			temp_rsrc_fork = hfs_zalloc(HFS_FILEFORK_ZONE);
+			bzero(temp_rsrc_fork, sizeof(struct filefork));
+			temp_rsrc_fork->ff_cp = cp;
+			rl_init(&temp_rsrc_fork->ff_invalidranges);
+		}	
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+		/* Look up the resource fork first, if necessary */
+		if (temp_rsrc_fork) {
+			error = cat_lookup (hfsmp, &desc, 1, 0, (struct cat_desc*) NULL, 
+					(struct cat_attr*) NULL, &temp_rsrc_fork->ff_data, NULL);
+			if (error) {
+				hfs_zfree(temp_rsrc_fork, HFS_FILEFORK_ZONE);
+				hfs_systemfile_unlock (hfsmp, lockflags);
+				goto out;
+			}
+		}
+
+		if (!skip_reserve) {
+			if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) {
+				if (temp_rsrc_fork) {
+					hfs_zfree(temp_rsrc_fork, HFS_FILEFORK_ZONE);
+				}
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				goto out;
+			}
+		}
+		
+		error = cat_delete(hfsmp, &desc, &cp->c_attr);
+		
+		if (error && error != ENXIO && error != ENOENT) {
+			printf("hfs_removefile: deleting file %s (id=%d) vol=%s err=%d\n",
+				   cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, hfsmp->vcbVN, error);
+		}
+		
+		if (error == 0) {
+			/* Update the parent directory */
+			if (dcp->c_entries > 0)
+				dcp->c_entries--;
+			dcp->c_dirchangecnt++;
+			hfs_incr_gencount(dcp);
+
+			dcp->c_ctime = tv.tv_sec;
+			dcp->c_mtime = tv.tv_sec;
+			(void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+		}
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+		if (error) {
+			if (temp_rsrc_fork) {
+				hfs_zfree(temp_rsrc_fork, HFS_FILEFORK_ZONE);
+			}
+			goto out;
+		}
+		
+		/* 
+		 * Now that we've wiped out the catalog record, the file effectively doesn't
+		 * exist anymore. So update the quota records to reflect the loss of the 
+		 * data fork and the resource fork. 
+		 */
+#if QUOTA
+		if (cp->c_datafork->ff_blocks > 0) {
+			savedbytes = ((off_t)cp->c_datafork->ff_blocks * (off_t)blksize);
+			(void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+		}
+		
+		/*
+		 * We may have just deleted the catalog record for a resource fork even 
+		 * though it did not exist in core as a vnode. However, just because there 
+		 * was a resource fork pointer in the cnode does not mean that it had any blocks.
+		 */
+		if (temp_rsrc_fork || cp->c_rsrcfork) {
+			if (cp->c_rsrcfork) {
+			   	if (cp->c_rsrcfork->ff_blocks > 0) {
+					savedbytes = ((off_t)cp->c_rsrcfork->ff_blocks * (off_t)blksize);
+					(void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+				}
+			}	
+			else {
+				/* we must have used a temporary fork */
+				savedbytes = ((off_t)temp_rsrc_fork->ff_blocks * (off_t)blksize);	
+				(void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+			}
+		}
+		
+		if (hfsmp->hfs_flags & HFS_QUOTAS) {
+			(void)hfs_chkiq(cp, -1, NOCRED, 0);
+		}
+#endif
+
+		if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
+			hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
+			cp->c_datafork->ff_symlinkptr = NULL;
+		}
+
+		/* 
+		 * If we didn't get any errors deleting the catalog entry, then go ahead
+		 * and release the backing store now.  The filefork pointers are still valid.
+		 */
+		if (temp_rsrc_fork) {	
+			error = hfs_release_storage (hfsmp, cp->c_datafork, temp_rsrc_fork, fileid);
+		}
+		else {
+			/* if cp->c_rsrcfork == NULL, hfs_release_storage will skip over it. */
+			error = hfs_release_storage (hfsmp, cp->c_datafork, cp->c_rsrcfork, fileid);
+		}
+		if (error) {
+			/* 
+			 * If we encountered an error updating the extents and bitmap,
+			 * mark the volume inconsistent.  At this point, the catalog record has
+			 * already been deleted, so we can't recover it at this point. We need
+			 * to proceed and update the volume header and mark the cnode C_NOEXISTS.
+			 * The subsequent fsck should be able to recover the free space for us.
+			 */
+			hfs_mark_inconsistent(hfsmp, HFS_OP_INCOMPLETE);
+		}
+		else {
+			/* reset update_vh to 0, since hfs_release_storage should have done it for us */
+			update_vh = 0;
+		}
+
+		/* Get rid of the temporary rsrc fork */
+		if (temp_rsrc_fork) {
+			hfs_zfree(temp_rsrc_fork, HFS_FILEFORK_ZONE);
+		}
+
+		cp->c_flag |= C_NOEXISTS;
+		cp->c_flag &= ~C_DELETED;
+		
+		cp->c_touch_chgtime = TRUE;
+		--cp->c_linkcount;
+		
+		/* 
+		 * We must never get a directory if we're in this else block.  We could 
+		 * accidentally drop the number of files in the volume header if we did.
+		 */
+		hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
+		
+	}
+
+	//
+	// if skip_reserve == 1 then we're being called from hfs_vnop_rename() and thus
+	// we don't need to touch the document_id as it's handled by the rename code.
+	// otherwise it's a normal remove and we need to save the document id in the
+	// per thread struct and clear it from the cnode.
+	//
+	if (!error && !skip_reserve && (cp->c_bsdflags & UF_TRACKED)
+		&& cp->c_linkcount <= 1) {
+		struct  doc_tombstone *ut;
+		ut = doc_tombstone_get();
+		if (doc_tombstone_should_save(ut, vp, cnp)) {
+			add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+						FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+						FSE_ARG_INO, (ino64_t)cp->c_fileid,       // src inode #
+						FSE_ARG_INO, (ino64_t)0,                  // dst inode #
+						FSE_ARG_INT32, hfs_get_document_id(cp),   // document id
+						FSE_ARG_DONE);
+
+			doc_tombstone_save(dvp, vp, cnp, hfs_get_document_id(cp),
+							   cp->c_fileid);
+
+			struct FndrExtendedFileInfo *fip = (struct FndrExtendedFileInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
+
+			fip->document_id = 0;
+			cp->c_bsdflags &= ~UF_TRACKED;
+		}
+	}
+
+	/*
+	 * All done with this cnode's descriptor...
+	 *
+	 * Note: all future catalog calls for this cnode must be by
+	 * fileid only.  This is OK for HFS (which doesn't have file
+	 * thread records) since HFS doesn't support the removal of
+	 * busy files.
+	 */
+	cat_releasedesc(&cp->c_desc);
+
+out:
+	if (error) {
+	    cp->c_flag &= ~C_DELETED;
+	}
+	
+	if (update_vh) {
+		/* 
+		 * If we bailed out earlier, we may need to update the volume header
+		 * to deal with the borrowed blocks accounting. 
+		 */
+		hfs_volupdate (hfsmp, VOL_UPDATE, 0);
+	}	
+
+	if (started_tr) {
+	    hfs_end_transaction(hfsmp);
+	}
+
+	dcp->c_flag &= ~C_DIR_MODIFICATION;
+	wakeup((caddr_t)&dcp->c_flag);
+
+	return (error);
+}
+
+
+void
+replace_desc(struct cnode *cp, struct cat_desc *cdp)
+{
+	// fixes 4348457 and 4463138
+	if (&cp->c_desc == cdp) {
+	    return;
+	}
+
+	/* First release allocated name buffer */
+	if (cp->c_desc.cd_flags & CD_HASBUF && cp->c_desc.cd_nameptr != 0) {
+		const u_int8_t *name = cp->c_desc.cd_nameptr;
+
+		cp->c_desc.cd_nameptr = 0;
+		cp->c_desc.cd_namelen = 0;
+		cp->c_desc.cd_flags &= ~CD_HASBUF;
+		vfs_removename((const char *)name);
+	}
+	bcopy(cdp, &cp->c_desc, sizeof(cp->c_desc));
+
+	/* Cnode now owns the name buffer */
+	cdp->cd_nameptr = 0;
+	cdp->cd_namelen = 0;
+	cdp->cd_flags &= ~CD_HASBUF;
+}
+
+/*
+ * hfs_vnop_rename
+ *
+ * Just forwards the arguments from VNOP_RENAME into those of 
+ * VNOP_RENAMEX but zeros out the flags word.
+ */
+int hfs_vnop_rename (struct vnop_rename_args *args) {
+	struct vnop_renamex_args vrx;
+
+	vrx.a_desc = args->a_desc; // we aren't using it to switch into the vnop array, so fine as is.
+	vrx.a_fdvp = args->a_fdvp;
+	vrx.a_fvp = args->a_fvp;
+	vrx.a_fcnp = args->a_fcnp;
+	vrx.a_tdvp = args->a_tdvp;
+	vrx.a_tvp = args->a_tvp;
+	vrx.a_tcnp = args->a_tcnp;
+	vrx.a_vap = NULL; // not used
+	vrx.a_flags = 0; //zero out the flags. 
+	vrx.a_context = args->a_context;
+
+	return hfs_vnop_renamex (&vrx);
+}
+
+
+
+/*
+ * Rename a cnode.
+ *
+ * The VFS layer guarantees that:
+ *   - source and destination will either both be directories, or
+ *     both not be directories.
+ *   - all the vnodes are from the same file system
+ *
+ * When the target is a directory, HFS must ensure that its empty.
+ *
+ * Note that this function requires up to 6 vnodes in order to work properly
+ * if it is operating on files (and not on directories).  This is because only
+ * files can have resource forks, and we now require iocounts to be held on the
+ * vnodes corresponding to the resource forks (if applicable) as well as
+ * the files or directories undergoing rename.  The problem with not holding 
+ * iocounts on the resource fork vnodes is that it can lead to a deadlock 
+ * situation: The rsrc fork of the source file may be recycled and reclaimed 
+ * in order to provide a vnode for the destination file's rsrc fork.  Since
+ * data and rsrc forks share the same cnode, we'd eventually try to lock the
+ * source file's cnode in order to sync its rsrc fork to disk, but it's already 
+ * been locked.  By taking the rsrc fork vnodes up front we ensure that they 
+ * cannot be recycled, and that the situation mentioned above cannot happen.
+ */
+int
+hfs_vnop_renamex(struct vnop_renamex_args *ap)
+{
+	struct vnode *tvp = ap->a_tvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *fdvp = ap->a_fdvp;
+	/*
+	 * Note that we only need locals for the target/destination's
+ 	 * resource fork vnode (and only if necessary).  We don't care if the
+	 * source has a resource fork vnode or not.
+	 */
+	struct vnode *tvp_rsrc = NULLVP;	
+	uint32_t tvp_rsrc_vid = 0;
+	struct componentname *tcnp = ap->a_tcnp;
+	struct componentname *fcnp = ap->a_fcnp;
+	struct proc *p = vfs_context_proc(ap->a_context);
+	struct cnode *fcp;
+	struct cnode *fdcp;
+	struct cnode *tdcp;
+	struct cnode *tcp;
+	struct cnode *error_cnode;
+	struct cat_desc from_desc;
+	struct cat_desc to_desc;
+	struct cat_desc out_desc;
+	struct hfsmount *hfsmp;
+	cat_cookie_t cookie;
+	int tvp_deleted = 0;
+	int started_tr = 0, got_cookie = 0;
+	int took_trunc_lock = 0;
+	int lockflags;
+	int error;
+	time_t orig_from_ctime, orig_to_ctime;
+	int emit_rename = 1;
+	int emit_delete = 1;
+	int is_tracked = 0;
+	int unlocked;
+	vnode_t old_doc_vp = NULL;
+	int rename_exclusive = 0;
+
+	orig_from_ctime = VTOC(fvp)->c_ctime;
+	if (tvp && VTOC(tvp)) {
+		orig_to_ctime = VTOC(tvp)->c_ctime;
+	} else {
+		orig_to_ctime = ~0;
+	}
+
+	hfsmp = VTOHFS(tdvp);
+	
+	/* Check the flags first, so we can avoid grabbing locks if necessary */
+	if (ap->a_flags) {
+		/* These are the only flags we support for now */
+		if ((ap->a_flags & (VFS_RENAME_EXCL)) == 0) {
+			return ENOTSUP;	
+		}
+		
+		/* The rename flags are mutually exclusive for HFS+ */
+		switch (ap->a_flags & VFS_RENAME_FLAGS_MASK) {
+			case VFS_RENAME_EXCL: 
+				rename_exclusive = true;			
+				break;
+			default: 
+				return ENOTSUP;
+		}
+	}	
+	
+	/* 
+	 * Do special case checks here.  If fvp == tvp then we need to check the
+	 * cnode with locks held.
+	 */
+	if (fvp == tvp) {
+		int is_hardlink = 0;
+		/* 
+		 * In this case, we do *NOT* ever emit a DELETE event.  
+		 * We may not necessarily emit a RENAME event 
+		 */	
+		emit_delete = 0;
+		if ((error = hfs_lock(VTOC(fvp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+			return error;
+		}
+		/* Check to see if the item is a hardlink or not */
+		is_hardlink = (VTOC(fvp)->c_flag & C_HARDLINK);
+		hfs_unlock (VTOC(fvp));
+		
+		/* 
+		 * If the item is not a hardlink, then case sensitivity must be off, otherwise
+		 * two names should not resolve to the same cnode unless they were case variants.
+		 */
+		if (is_hardlink) {
+			emit_rename = 0;
+			/*
+			 * Hardlinks are a little trickier.  We only want to emit a rename event
+			 * if the item is a hardlink, the parent directories are the same, case sensitivity
+			 * is off, and the case folded names are the same.  See the fvp == tvp case below for more
+			 * info.
+			 */
+
+			if ((fdvp == tdvp) && ((hfsmp->hfs_flags & HFS_CASE_SENSITIVE) == 0)) {
+				if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen,
+							(const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) {
+					/* Then in this case only it is ok to emit a rename */
+					emit_rename = 1;
+				}
+			}
+		}
+	}
+	if (emit_rename) {
+		/* c_bsdflags should only be assessed while holding the cnode lock.  
+		 * This is not done consistently throughout the code and can result 
+		 * in race.  This will be fixed via rdar://12181064
+		 */
+		if (VTOC(fvp)->c_bsdflags & UF_TRACKED) {
+			is_tracked = 1;
+		}
+		nspace_snapshot_event(fvp, orig_from_ctime, NAMESPACE_HANDLER_RENAME_OP, NULL);
+	}
+
+	if (tvp && VTOC(tvp)) {
+		if (emit_delete) {
+			nspace_snapshot_event(tvp, orig_to_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
+		}
+	}
+
+retry:
+	/* When tvp exists, take the truncate lock for hfs_removefile(). */
+	if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) {
+		hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		took_trunc_lock = 1;
+	}
+
+relock:
+	error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL,
+	                     HFS_EXCLUSIVE_LOCK, &error_cnode);
+	if (error) {
+		if (took_trunc_lock) {
+			hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT);
+			took_trunc_lock = 0;
+		}
+
+		/* 
+		 * We hit an error path.  If we were trying to re-acquire the locks
+		 * after coming through here once, we might have already obtained
+		 * an iocount on tvp's resource fork vnode.  Drop that before dealing
+		 * with the failure.  Note this is safe -- since we are in an
+		 * error handling path, we can't be holding the cnode locks.
+		 */
+		if (tvp_rsrc) {
+			vnode_put (tvp_rsrc);
+			tvp_rsrc_vid = 0;
+			tvp_rsrc = NULL;
+		}
+
+		/* 
+		 * tvp might no longer exist.  If the cause of the lock failure 
+		 * was tvp, then we can try again with tvp/tcp set to NULL.  
+		 * This is ok because the vfs syscall will vnode_put the vnodes 
+		 * after we return from hfs_vnop_rename.
+		 */
+		if ((error == ENOENT) && (tvp != NULL) && (error_cnode == VTOC(tvp))) {	
+			tcp = NULL;
+			tvp = NULL;
+			goto retry;
+		}
+
+		/* If we want to reintroduce notifications for failed renames, this
+		   is the place to do it. */
+
+		return (error);
+	}
+
+	fdcp = VTOC(fdvp);
+	fcp = VTOC(fvp);
+	tdcp = VTOC(tdvp);
+	tcp = tvp ? VTOC(tvp) : NULL;
+
+
+	/*
+	 * If caller requested an exclusive rename (VFS_RENAME_EXCL) and 'tcp' exists
+	 * then we must fail the operation. 
+	 */
+	if (tcp && rename_exclusive) {
+		error = EEXIST;
+		goto out;
+	}
+
+	//
+	// if the item is tracked but doesn't have a document_id, assign one and generate an fsevent for it
+	//
+	unlocked = 0;
+	if ((fcp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16))->document_id == 0) {
+		uint32_t newid;
+
+		hfs_unlockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL);
+		unlocked = 1;
+
+		if (hfs_generate_document_id(hfsmp, &newid) == 0) {
+			hfs_lock(fcp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			((struct FndrExtendedDirInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16))->document_id = newid;
+			add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+				    FSE_ARG_DEV,   hfsmp->hfs_raw_dev,
+				    FSE_ARG_INO,   (ino64_t)0,             // src inode #
+				    FSE_ARG_INO,   (ino64_t)fcp->c_fileid,  // dst inode #
+				    FSE_ARG_INT32, newid,
+				    FSE_ARG_DONE);
+			hfs_unlock(fcp);
+		} else {
+			// XXXdbg - couldn't get a new docid... what to do?  can't really fail the rename...
+		}
+
+		//
+		// check if we're going to need to fix tcp as well.  if we aren't, go back relock
+		// everything.  otherwise continue on and fix up tcp as well before relocking.
+		//
+		if (tcp == NULL || !(tcp->c_bsdflags & UF_TRACKED) || ((struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16))->document_id != 0) {
+			goto relock;
+		}
+	}
+
+	//
+	// same thing for tcp if it's set
+	//
+	if (tcp && (tcp->c_bsdflags & UF_TRACKED) && ((struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16))->document_id == 0) {
+		uint32_t newid;
+
+		if (!unlocked) {
+			hfs_unlockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL);
+			unlocked = 1;
+		}
+
+		if (hfs_generate_document_id(hfsmp, &newid) == 0) {
+			hfs_lock(tcp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+			((struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16))->document_id = newid;
+			add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+				    FSE_ARG_DEV,   hfsmp->hfs_raw_dev,
+				    FSE_ARG_INO,   (ino64_t)0,             // src inode #
+				    FSE_ARG_INO,   (ino64_t)tcp->c_fileid,  // dst inode #
+				    FSE_ARG_INT32, newid,
+				    FSE_ARG_DONE);
+			hfs_unlock(tcp);
+		} else {
+			// XXXdbg - couldn't get a new docid... what to do?  can't really fail the rename...
+		}
+
+		// go back up and relock everything.  next time through the if statement won't be true
+		// and we'll skip over this block of code.
+		goto relock;
+	}
+
+
+
+	/* 
+	 * Acquire iocounts on the destination's resource fork vnode 
+	 * if necessary. If dst/src are files and the dst has a resource 
+	 * fork vnode, then we need to try and acquire an iocount on the rsrc vnode. 
+	 * If it does not exist, then we don't care and can skip it.
+	 */
+	if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) {
+		if ((tvp) && (tcp->c_rsrc_vp) && (tvp_rsrc == NULL)) {
+			tvp_rsrc = tcp->c_rsrc_vp;
+			/*
+			 * We can look at the vid here because we're holding the 
+			 * cnode lock on the underlying cnode for this rsrc vnode. 
+			 */
+			tvp_rsrc_vid = vnode_vid (tvp_rsrc);
+
+			/* Unlock everything to acquire iocount on this rsrc vnode */
+			if (took_trunc_lock) {
+				hfs_unlock_truncate (VTOC(tvp), HFS_LOCK_DEFAULT);
+				took_trunc_lock = 0;	
+			}	
+			hfs_unlockfour(fdcp, fcp, tdcp, tcp);
+
+			if (vnode_getwithvid (tvp_rsrc, tvp_rsrc_vid)) {
+				/* iocount acquisition failed.  Reset fields and start over.. */
+				tvp_rsrc_vid = 0;
+				tvp_rsrc = NULL;
+			}
+			goto retry;
+		}
+	}
+
+
+
+	/* Ensure we didn't race src or dst parent directories with rmdir. */
+	if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
+		error = ENOENT;
+		goto out;
+	}
+
+	if (tdcp->c_flag & (C_NOEXISTS | C_DELETED)) {
+		error = ENOENT;
+		goto out;	
+	}
+
+
+	/* Check for a race against unlink.  The hfs_valid_cnode checks validate
+	 * the parent/child relationship with fdcp and tdcp, as well as the
+	 * component name of the target cnodes.  
+	 */
+	if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid, NULL, &error)) {
+		error = ENOENT;
+		goto out;
+	}
+
+	if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid, NULL, &error))) {
+	    //
+	    // hmm, the destination vnode isn't valid any more.
+	    // in this case we can just drop him and pretend he
+	    // never existed in the first place.
+	    //
+	    if (took_trunc_lock) {
+			hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT);
+			took_trunc_lock = 0;
+	    }
+		error = 0;
+
+	    hfs_unlockfour(fdcp, fcp, tdcp, tcp);
+
+	    tcp = NULL;
+	    tvp = NULL;
+	    
+	    // retry the locking with tvp null'ed out
+	    goto retry;
+	}
+
+	fdcp->c_flag |= C_DIR_MODIFICATION;
+	if (fdvp != tdvp) {
+	    tdcp->c_flag |= C_DIR_MODIFICATION;
+	}
+
+	/*
+	 * Disallow renaming of a directory hard link if the source and 
+	 * destination parent directories are different, or a directory whose 
+	 * descendant is a directory hard link and the one of the ancestors
+	 * of the destination directory is a directory hard link.
+	 */
+	if (vnode_isdir(fvp) && (fdvp != tdvp)) {
+		if (fcp->c_flag & C_HARDLINK) {
+			error = EPERM;
+			goto out;
+		}
+		if (fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) {
+		    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		    if (cat_check_link_ancestry(hfsmp, tdcp->c_fileid, 0)) {
+				error = EPERM;
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				goto out;
+			}
+			hfs_systemfile_unlock(hfsmp, lockflags);
+		}
+	}
+
+	/*
+	 * The following edge case is caught here:
+	 * (to cannot be a descendent of from)
+	 *
+	 *       o fdvp
+	 *      /
+	 *     /
+	 *    o fvp
+	 *     \
+	 *      \
+	 *       o tdvp
+	 *      /
+	 *     /
+	 *    o tvp
+	 */
+	if (tdcp->c_parentcnid == fcp->c_fileid) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * The following two edge cases are caught here:
+	 * (note tvp is not empty)
+	 *
+	 *       o tdvp               o tdvp
+	 *      /                    /
+	 *     /                    /
+	 *    o tvp            tvp o fdvp
+	 *     \                    \
+	 *      \                    \
+	 *       o fdvp               o fvp
+	 *      /
+	 *     /
+	 *    o fvp
+	 */
+	if (tvp && vnode_isdir(tvp) && (tcp->c_entries != 0) && fvp != tvp) {
+		error = ENOTEMPTY;
+		goto out;
+	}
+
+	/*
+	 * The following edge case is caught here:
+	 * (the from child and parent are the same)
+	 *
+	 *          o tdvp
+	 *         /
+	 *        /
+	 *  fdvp o fvp
+	 */
+	if (fdvp == fvp) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Make sure "from" vnode and its parent are changeable.
+	 */
+	if ((fcp->c_bsdflags & (IMMUTABLE | APPEND)) || (fdcp->c_bsdflags & APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/*
+	 * If the destination parent directory is "sticky", then the
+	 * user must own the parent directory, or the destination of
+	 * the rename, otherwise the destination may not be changed
+	 * (except by root). This implements append-only directories.
+	 *
+	 * Note that checks for immutable and write access are done
+	 * by the call to hfs_removefile.
+	 */
+	if (tvp && (tdcp->c_mode & S_ISTXT) &&
+	    (suser(vfs_context_ucred(ap->a_context), NULL)) &&
+	    (kauth_cred_getuid(vfs_context_ucred(ap->a_context)) != tdcp->c_uid) &&
+	    (hfs_owner_rights(hfsmp, tcp->c_uid, vfs_context_ucred(ap->a_context), p, false)) ) {
+		error = EPERM;
+		goto out;
+	}
+
+	/* Don't allow modification of the journal or journal_info_block */
+	if (hfs_is_journal_file(hfsmp, fcp) ||
+	    (tcp && hfs_is_journal_file(hfsmp, tcp))) {
+		error = EPERM;
+		goto out;
+	}
+
+#if QUOTA
+	if (tvp)
+		(void)hfs_getinoquota(tcp);
+#endif
+	/* Preflighting done, take fvp out of the name space. */
+	cache_purge(fvp);
+
+#if CONFIG_SECLUDED_RENAME
+	/*
+	 * Check for "secure" rename that imposes additional restrictions on the
+	 * source vnode.  We wait until here to check in order to prevent a race
+	 * with other threads that manage to look up fvp, but their open or link
+	 * is blocked by our locks.  At this point, with fvp out of the name cache,
+	 * and holding the lock on fdvp, no other thread can find fvp.
+	 *
+	 * TODO: Do we need to limit these checks to regular files only?
+	 */
+	if (fcnp->cn_flags & CN_SECLUDE_RENAME) {
+		if (vnode_isdir(fvp)) {
+			error = EISDIR;
+	        	goto out;
+	    	}
+	    
+		/*
+		 * Neither fork of source may be open or memory mapped.
+		 * We also don't want it in use by any other system call.
+		 * The file must not have hard links.
+		 *
+		 * We can't simply use vnode_isinuse() because that does not
+		 * count opens with O_EVTONLY.  We don't want a malicious
+		 * process using O_EVTONLY to subvert a secluded rename.
+		 */
+		if (fcp->c_linkcount != 1) {
+			error = EMLINK;
+			goto out;
+		}
+
+		if (fcp->c_rsrc_vp && (vnode_usecount(fcp->c_rsrc_vp) > 0 ||
+				       vnode_iocount(fcp->c_rsrc_vp) > 0)) {
+			/* Resource fork is in use (including O_EVTONLY) */
+			error = EBUSY;
+			goto out;
+		}
+		if (fcp->c_vp && (vnode_usecount(fcp->c_vp) > (fcp->c_rsrc_vp ? 1 : 0) ||
+				  vnode_iocount(fcp->c_vp) > 1)) {
+			/*
+			 * Data fork is in use, including O_EVTONLY, but not
+			 * including a reference from the resource fork.
+			 */
+			error = EBUSY;
+			goto out;
+		}
+	}
+#endif
+
+	bzero(&from_desc, sizeof(from_desc));
+	from_desc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr;
+	from_desc.cd_namelen = fcnp->cn_namelen;
+	from_desc.cd_parentcnid = fdcp->c_fileid;
+	from_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
+	from_desc.cd_cnid = fcp->c_cnid;
+
+	bzero(&to_desc, sizeof(to_desc));
+	to_desc.cd_nameptr = (const u_int8_t *)tcnp->cn_nameptr;
+	to_desc.cd_namelen = tcnp->cn_namelen;
+	to_desc.cd_parentcnid = tdcp->c_fileid;
+	to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED);
+	to_desc.cd_cnid = fcp->c_cnid;
+
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+	    goto out;
+	}
+	started_tr = 1;
+
+	/* hfs_vnop_link() and hfs_vnop_rename() set kHFSHasChildLinkMask 
+	 * inside a journal transaction and without holding a cnode lock.  
+	 * As setting of this bit depends on being in journal transaction for 
+	 * concurrency, check this bit again after we start journal transaction for rename
+	 * to ensure that this directory does not have any descendant that
+	 * is a directory hard link. 
+	 */
+	if (vnode_isdir(fvp) && (fdvp != tdvp)) {
+		if (fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) {
+		    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		    if (cat_check_link_ancestry(hfsmp, tdcp->c_fileid, 0)) {
+				error = EPERM;
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				goto out;
+			}
+			hfs_systemfile_unlock(hfsmp, lockflags);
+		}
+	}
+
+	// if it's a hardlink then re-lookup the name so
+	// that we get the correct cnid in from_desc (see
+	// the comment in hfs_removefile for more details)
+	//
+	if (fcp->c_flag & C_HARDLINK) {
+	    struct cat_desc tmpdesc;
+	    cnid_t real_cnid;
+
+	    tmpdesc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr;
+	    tmpdesc.cd_namelen = fcnp->cn_namelen;
+	    tmpdesc.cd_parentcnid = fdcp->c_fileid;
+	    tmpdesc.cd_hint = fdcp->c_childhint;
+	    tmpdesc.cd_flags = fcp->c_desc.cd_flags & CD_ISDIR;
+	    tmpdesc.cd_encoding = 0;
+	    
+	    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	    if (cat_lookup(hfsmp, &tmpdesc, 0, 0, NULL, NULL, NULL, &real_cnid) != 0) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		goto out;
+	    }
+
+	    // use the real cnid instead of whatever happened to be there
+	    from_desc.cd_cnid = real_cnid;
+	    hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+
+	/*
+	 * Reserve some space in the Catalog file.
+	 */
+	if ((error = cat_preflight(hfsmp, CAT_RENAME + CAT_DELETE, &cookie, p))) {
+		goto out;
+	}
+	got_cookie = 1;
+
+	/*
+	 * If the destination exists then it may need to be removed.  
+	 * 
+	 * Due to HFS's locking system, we should always move the 
+	 * existing 'tvp' element to the hidden directory in hfs_vnop_rename.
+	 * Because the VNOP_LOOKUP call enters and exits the filesystem independently
+	 * of the actual vnop that it was trying to do (stat, link, readlink),
+	 * we must release the cnode lock of that element during the interim to 
+	 * do MAC checking, vnode authorization, and other calls.  In that time, 
+	 * the item can be deleted (or renamed over). However, only in the rename 
+	 * case is it inappropriate to return ENOENT from any of those calls.  Either 
+	 * the call should return information about the old element (stale), or get 
+	 * information about the newer element that we are about to write in its place.  
+	 * 
+	 * HFS lookup has been modified to detect a rename and re-drive its 
+	 * lookup internally. For other calls that have already succeeded in 
+	 * their lookup call and are waiting to acquire the cnode lock in order 
+	 * to proceed, that cnode lock will not fail due to the cnode being marked 
+	 * C_NOEXISTS, because it won't have been marked as such.  It will only 
+	 * have C_DELETED.  Thus, they will simply act on the stale open-unlinked
+	 * element.  All future callers will get the new element.
+	 *
+	 * To implement this behavior, we pass the "only_unlink" argument to 
+	 * hfs_removefile and hfs_removedir.  This will result in the vnode acting 
+	 * as though it is open-unlinked.  Additionally, when we are done moving the 
+	 * element to the hidden directory, we vnode_recycle the target so that it is 
+	 * reclaimed as soon as possible.  Reclaim and inactive are both 
+	 * capable of clearing out unused blocks for an open-unlinked file or dir.
+	 */
+	if (tvp) {
+		//
+		// if the destination has a document id, we need to preserve it
+		//
+		if (fvp != tvp) {
+			uint32_t document_id;
+			struct FndrExtendedDirInfo *ffip = (struct FndrExtendedDirInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16);
+			struct FndrExtendedDirInfo *tfip = (struct FndrExtendedDirInfo *)((char *)&tcp->c_attr.ca_finderinfo + 16);
+			
+			if (ffip->document_id && tfip->document_id) {
+				// both documents are tracked.  only save a tombstone from tcp and do nothing else.
+				doc_tombstone_save(tdvp, tvp, tcnp, hfs_get_document_id(tcp),
+								   tcp->c_fileid);
+			} else {
+				struct  doc_tombstone *ut;
+				ut = doc_tombstone_get();
+
+				document_id = tfip->document_id;
+				tfip->document_id = 0;
+			
+				if (document_id != 0) {
+					// clear UF_TRACKED as well since tcp is now no longer tracked
+					tcp->c_bsdflags &= ~UF_TRACKED;
+					(void) cat_update(hfsmp, &tcp->c_desc, &tcp->c_attr, NULL, NULL);
+				}
+
+				if (ffip->document_id == 0 && document_id != 0) {
+					// printf("RENAME: preserving doc-id %d onto %s (from ino %d, to ino %d)\n", document_id, tcp->c_desc.cd_nameptr, tcp->c_desc.cd_cnid, fcp->c_desc.cd_cnid);
+					fcp->c_bsdflags |= UF_TRACKED;
+					ffip->document_id = document_id;
+					
+					(void) cat_update(hfsmp, &fcp->c_desc, &fcp->c_attr, NULL, NULL);
+					add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+						    FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+						    FSE_ARG_INO, (ino64_t)tcp->c_fileid,           // src inode #
+						    FSE_ARG_INO, (ino64_t)fcp->c_fileid,           // dst inode #
+						    FSE_ARG_INT32, (uint32_t)ffip->document_id,
+						    FSE_ARG_DONE);
+				}
+				else if ((fcp->c_bsdflags & UF_TRACKED) && doc_tombstone_should_save(ut, fvp, fcnp)) {
+
+					if (ut->t_lastop_document_id) {
+						doc_tombstone_clear(ut, NULL);
+					}
+					doc_tombstone_save(fdvp, fvp, fcnp,
+									   hfs_get_document_id(fcp), fcp->c_fileid);
+
+					//printf("RENAME: (dest-exists): saving tombstone doc-id %lld @ %s (ino %d)\n",
+					//       ut->t_lastop_document_id, ut->t_lastop_filename, fcp->c_desc.cd_cnid);
+				}
+			}
+		}
+
+		/*
+		 * When fvp matches tvp they could be case variants
+		 * or matching hard links.
+		 */
+		if (fvp == tvp) {
+			if (!(fcp->c_flag & C_HARDLINK)) {
+				/* 
+				 * If they're not hardlinks, then fvp == tvp must mean we 
+				 * are using case-insensitive HFS because case-sensitive would
+				 * not use the same vnode for both.  In this case we just update
+				 * the catalog for: a -> A
+				 */
+				goto skip_rm;  /* simple case variant */
+
+			}
+		   	/* For all cases below, we must be using hardlinks */	
+			else if ((fdvp != tdvp) ||
+			           (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) {
+				/*
+				 * If the parent directories are not the same, AND the two items
+				 * are hardlinks, posix says to do nothing:
+				 * dir1/fred <-> dir2/bob   and the op was mv dir1/fred -> dir2/bob
+				 * We just return 0 in this case.
+				 *
+				 * If case sensitivity is on, and we are using hardlinks 
+				 * then renaming is supposed to do nothing.
+				 * dir1/fred <-> dir2/FRED, and op == mv dir1/fred -> dir2/FRED
+				 */
+				goto out;  /* matching hardlinks, nothing to do */
+
+			} else if (hfs_namecmp((const u_int8_t *)fcnp->cn_nameptr, fcnp->cn_namelen,
+			                       (const u_int8_t *)tcnp->cn_nameptr, tcnp->cn_namelen) == 0) {
+				/*
+				 * If we get here, then the following must be true:
+				 * a) We are running case-insensitive HFS+.
+				 * b) Both paths 'fvp' and 'tvp' are in the same parent directory.
+				 * c) the two names are case-variants of each other.
+				 *
+				 * In this case, we are really only dealing with a single catalog record
+				 * whose name is being updated.
+				 * 
+				 * op is dir1/fred -> dir1/FRED
+				 * 
+				 * We need to special case the name matching, because if
+				 * dir1/fred <-> dir1/bob were the two links, and the 
+				 * op was dir1/fred -> dir1/bob
+				 * That would fail/do nothing.
+				 */
+				goto skip_rm;  /* case-variant hardlink in the same dir */
+			} else {
+				goto out;  /* matching hardlink, nothing to do */
+			}
+		}
+
+		
+		if (vnode_isdir(tvp)) {
+			/*
+			 * hfs_removedir will eventually call hfs_removefile on the directory
+			 * we're working on, because only hfs_removefile does the renaming of the
+			 * item to the hidden directory.  The directory will stay around in the
+			 * hidden directory with C_DELETED until it gets an inactive or a reclaim.
+			 * That way, we can destroy all of the EAs as needed and allow new ones to be
+			 * written.
+			 */
+			error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE, 1);
+		}
+		else {
+			error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, NULL, 1);
+			
+			/*
+			 * If the destination file had a resource fork vnode, then we need to get rid of
+			 * its blocks when there are no more references to it.  Because the call to
+			 * hfs_removefile above always open-unlinks things, we need to force an inactive/reclaim
+			 * on the resource fork vnode, in order to prevent block leaks.  Otherwise,
+			 * the resource fork vnode could prevent the data fork vnode from going out of scope
+			 * because it holds a v_parent reference on it.  So we mark it for termination
+			 * with a call to vnode_recycle. hfs_vnop_reclaim has been modified so that it 
+			 * can clean up the blocks of open-unlinked files and resource forks. 
+			 *
+			 * We can safely call vnode_recycle on the resource fork because we took an iocount
+			 * reference on it at the beginning of the function. 
+			 */ 
+			
+			if ((error == 0) && (tcp->c_flag & C_DELETED) && (tvp_rsrc)) {
+				vnode_recycle(tvp_rsrc);
+			}
+		}
+
+		if (error) {
+			goto out;
+		}
+		
+		tvp_deleted = 1;
+		
+		/* Mark 'tcp' as being deleted due to a rename */
+		tcp->c_flag |= C_RENAMED;
+		
+		/*
+		 * Aggressively mark tvp/tcp for termination to ensure that we recover all blocks
+		 * as quickly as possible.
+		 */
+		vnode_recycle(tvp);
+	} else {
+		struct  doc_tombstone *ut;
+		ut = doc_tombstone_get();
+		
+		//
+		// There is nothing at the destination.  If the file being renamed is
+		// tracked, save a "tombstone" of the document_id.  If the file is
+		// not a tracked file, then see if it needs to inherit a tombstone.
+		//
+		// NOTE: we do not save a tombstone if the file being renamed begins
+		//       with "atmp" which is done to work-around AutoCad's bizarre
+		//       5-step un-safe save behavior
+		//
+		if (fcp->c_bsdflags & UF_TRACKED) {
+			if (doc_tombstone_should_save(ut, fvp, fcnp)) {
+				doc_tombstone_save(fdvp, fvp, fcnp, hfs_get_document_id(fcp),
+								   fcp->c_fileid);
+
+				//printf("RENAME: (no dest): saving tombstone doc-id %lld @ %s (ino %d)\n",
+				//       ut->t_lastop_document_id, ut->t_lastop_filename, fcp->c_desc.cd_cnid);
+			} else {
+				// intentionally do nothing
+			}
+		} else if (   ut->t_lastop_document_id != 0
+			   && tdvp == ut->t_lastop_parent
+			   && vnode_vid(tdvp) == ut->t_lastop_parent_vid
+			   && strcmp((char *)ut->t_lastop_filename, (char *)tcnp->cn_nameptr) == 0) {
+
+			//printf("RENAME: %s (ino %d) inheriting doc-id %lld\n", tcnp->cn_nameptr, fcp->c_desc.cd_cnid, ut->t_lastop_document_id);
+			struct FndrExtendedFileInfo *fip = (struct FndrExtendedFileInfo *)((char *)&fcp->c_attr.ca_finderinfo + 16);
+			fcp->c_bsdflags |= UF_TRACKED;
+			fip->document_id = ut->t_lastop_document_id;
+			cat_update(hfsmp, &fcp->c_desc, &fcp->c_attr, NULL, NULL);
+
+			doc_tombstone_clear(ut, &old_doc_vp);
+		} else if (ut->t_lastop_document_id && doc_tombstone_should_save(ut, fvp, fcnp) && doc_tombstone_should_save(ut, tvp, tcnp)) {
+			// no match, clear the tombstone
+			//printf("RENAME: clearing the tombstone %lld @ %s\n", ut->t_lastop_document_id, ut->t_lastop_filename);
+			doc_tombstone_clear(ut, NULL);
+		}
+
+	}
+skip_rm:
+	/*
+	 * All done with tvp and fvp. 
+	 * 
+	 * We also jump to this point if there was no destination observed during lookup and namei.
+	 * However, because only iocounts are held at the VFS layer, there is nothing preventing a 
+	 * competing thread from racing us and creating a file or dir at the destination of this rename 
+	 * operation.  If this occurs, it may cause us to get a spurious EEXIST out of the cat_rename 
+	 * call below.  To preserve rename's atomicity, we need to signal VFS to re-drive the 
+	 * namei/lookup and restart the rename operation.  EEXIST is an allowable errno to be bubbled 
+	 * out of the rename syscall, but not for this reason, since it is a synonym errno for ENOTEMPTY.
+	 * To signal VFS, we return ERECYCLE (which is also used for lookup restarts). This errno
+	 * will be swallowed and it will restart the operation.
+	 */
+	
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+	error = cat_rename(hfsmp, &from_desc, &tdcp->c_desc, &to_desc, &out_desc);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (error) {
+		if (error == EEXIST) {
+			error = ERECYCLE;
+		}
+		goto out;
+	}
+
+	/* Invalidate negative cache entries in the destination directory */
+	if (tdcp->c_flag & C_NEG_ENTRIES) {
+		cache_purge_negatives(tdvp);
+		tdcp->c_flag &= ~C_NEG_ENTRIES;
+	}
+
+	/* Update cnode's catalog descriptor */
+	replace_desc(fcp, &out_desc);
+	fcp->c_parentcnid = tdcp->c_fileid;
+	fcp->c_hint = 0;
+
+	/*
+	 * Now indicate this cnode needs to have date-added written to the
+	 * finderinfo, but only if moving to a different directory, or if
+	 * it doesn't already have it.
+	 */
+	if (fdvp != tdvp || !ISSET(fcp->c_attr.ca_recflags, kHFSHasDateAddedMask))
+		fcp->c_flag |= C_NEEDS_DATEADDED;
+
+	(void) hfs_update (fvp, 0);
+
+	hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_RMDIR : VOL_RMFILE,
+	              (fdcp->c_cnid == kHFSRootFolderID));
+	hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_MKDIR : VOL_MKFILE,
+	              (tdcp->c_cnid == kHFSRootFolderID));
+
+	/* Update both parent directories. */
+	if (fdvp != tdvp) {
+		if (vnode_isdir(fvp)) {
+			/* If the source directory has directory hard link 
+			 * descendants, set the kHFSHasChildLinkBit in the 
+			 * destination parent hierarchy 
+			 */
+			if ((fcp->c_attr.ca_recflags & kHFSHasChildLinkMask) && 
+			    !(tdcp->c_attr.ca_recflags & kHFSHasChildLinkMask)) {
+
+				tdcp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
+
+				error = cat_set_childlinkbit(hfsmp, tdcp->c_parentcnid);
+				if (error) {
+					printf ("hfs_vnop_rename: error updating parent chain for %u\n", tdcp->c_cnid);
+					error = 0;
+				}
+			}
+			INC_FOLDERCOUNT(hfsmp, tdcp->c_attr);
+			DEC_FOLDERCOUNT(hfsmp, fdcp->c_attr);
+		}
+		tdcp->c_entries++;
+		tdcp->c_dirchangecnt++;
+		tdcp->c_flag |= C_MODIFIED;
+		hfs_incr_gencount(tdcp);
+
+		if (fdcp->c_entries > 0)
+			fdcp->c_entries--;
+		fdcp->c_dirchangecnt++;
+		fdcp->c_flag |= C_MODIFIED;
+		fdcp->c_touch_chgtime = TRUE;
+		fdcp->c_touch_modtime = TRUE;
+
+		if (ISSET(fcp->c_flag, C_HARDLINK)) {
+			hfs_relorigin(fcp, fdcp->c_fileid);
+			if (fdcp->c_fileid != fdcp->c_cnid)
+				hfs_relorigin(fcp, fdcp->c_cnid);
+		}
+
+		(void) hfs_update(fdvp, 0);
+	}
+	hfs_incr_gencount(fdcp);
+		
+	tdcp->c_childhint = out_desc.cd_hint;	/* Cache directory's location */
+	tdcp->c_touch_chgtime = TRUE;
+	tdcp->c_touch_modtime = TRUE;
+
+	(void) hfs_update(tdvp, 0);
+
+	/* Update the vnode's name now that the rename has completed. */
+	vnode_update_identity(fvp, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, 
+			tcnp->cn_hash, (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME));
+	
+	/* 
+	 * At this point, we may have a resource fork vnode attached to the 
+	 * 'from' vnode.  If it exists, we will want to update its name, because
+	 * it contains the old name + _PATH_RSRCFORKSPEC. ("/..namedfork/rsrc").
+	 *
+	 * Note that the only thing we need to update here is the name attached to
+	 * the vnode, since a resource fork vnode does not have a separate resource
+	 * cnode -- it's still 'fcp'.
+	 */
+	if (fcp->c_rsrc_vp) {
+		char* rsrc_path = NULL;
+		int len;
+
+		/* Create a new temporary buffer that's going to hold the new name */
+		rsrc_path = hfs_malloc(MAXPATHLEN);
+		len = snprintf (rsrc_path, MAXPATHLEN, "%s%s", tcnp->cn_nameptr, _PATH_RSRCFORKSPEC);
+		len = MIN(len, MAXPATHLEN);
+
+		/* 
+		 * vnode_update_identity will do the following for us:
+		 * 1) release reference on the existing rsrc vnode's name.
+		 * 2) copy/insert new name into the name cache
+		 * 3) attach the new name to the resource vnode
+		 * 4) update the vnode's vid
+		 */
+		vnode_update_identity (fcp->c_rsrc_vp, fvp, rsrc_path, len, 0, (VNODE_UPDATE_NAME | VNODE_UPDATE_CACHE));
+		
+		/* Free the memory associated with the resource fork's name */
+		hfs_free(rsrc_path, MAXPATHLEN);
+	}
+out:
+	if (got_cookie) {
+		cat_postflight(hfsmp, &cookie, p);
+	}
+	if (started_tr) {
+	    hfs_end_transaction(hfsmp);
+	}
+
+	fdcp->c_flag &= ~C_DIR_MODIFICATION;
+	wakeup((caddr_t)&fdcp->c_flag);
+	if (fdvp != tdvp) {
+	    tdcp->c_flag &= ~C_DIR_MODIFICATION;
+	    wakeup((caddr_t)&tdcp->c_flag);
+	}
+
+	const ino64_t file_id = fcp->c_fileid;
+
+	hfs_unlockfour(fdcp, fcp, tdcp, tcp);
+
+	if (took_trunc_lock) {
+		hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT);	
+	}
+
+	/* Now vnode_put the resource forks vnodes if necessary */
+	if (tvp_rsrc) {
+		vnode_put(tvp_rsrc);
+		tvp_rsrc = NULL;	
+	}
+
+	/* After tvp is removed the only acceptable error is EIO */
+	if (error && tvp_deleted)
+		error = EIO;
+
+	/* If we want to reintroduce notifications for renames, this is the
+	   place to do it. */
+
+	if (old_doc_vp) {
+		cnode_t *ocp = VTOC(old_doc_vp);
+		hfs_lock_always(ocp, HFS_EXCLUSIVE_LOCK);
+		struct FndrExtendedFileInfo *ofip = (struct FndrExtendedFileInfo *)((char *)&ocp->c_attr.ca_finderinfo + 16);
+
+		const uint32_t doc_id = ofip->document_id;
+		const ino64_t old_file_id = ocp->c_fileid;
+
+		// printf("clearing doc-id from ino %d\n", ocp->c_desc.cd_cnid);
+		ofip->document_id = 0;
+		ocp->c_bsdflags &= ~UF_TRACKED;
+		ocp->c_flag |= C_MODIFIED;
+
+		hfs_unlock(ocp);
+		vnode_put(old_doc_vp);
+
+		add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+					FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+					FSE_ARG_INO, old_file_id,     // src inode #
+					FSE_ARG_INO, file_id,		  // dst inode #
+					FSE_ARG_INT32, doc_id,
+					FSE_ARG_DONE);
+	}
+
+	return (error);
+}
+
+
+/*
+ * Make a directory.
+ */
+int
+hfs_vnop_mkdir(struct vnop_mkdir_args *ap)
+{
+	/***** HACK ALERT ********/
+	ap->a_cnp->cn_flags |= MAKEENTRY;
+	return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context);
+}
+
+
+/*
+ * Create a symbolic link.
+ */
+int
+hfs_vnop_symlink(struct vnop_symlink_args *ap)
+{
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	struct vnode *vp = NULL;
+	struct cnode *cp = NULL;
+	struct hfsmount *hfsmp;
+	struct filefork *fp;
+	struct buf *bp = NULL;
+	char *datap;
+	int started_tr = 0;
+	u_int32_t len;
+	int error;
+
+	/* HFS standard disks don't support symbolic links */
+	if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord)
+		return (ENOTSUP);
+
+	/* Check for empty target name */
+	if (ap->a_target[0] == 0)
+		return (EINVAL);
+
+	hfsmp = VTOHFS(dvp);
+	
+	len = strlen(ap->a_target);
+	if (len > MAXPATHLEN)
+		return (ENAMETOOLONG);
+
+	/* Check for free space */
+	if (((u_int64_t)hfs_freeblks(hfsmp, 0) * (u_int64_t)hfsmp->blockSize) < len) {
+		return (ENOSPC);
+	}
+
+	/* Create the vnode */
+	ap->a_vap->va_mode |= S_IFLNK;
+	if ((error = hfs_makenode(dvp, vpp, ap->a_cnp, ap->a_vap, ap->a_context))) {
+		goto out;
+	}
+	vp = *vpp;
+	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		goto out;
+	}
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+
+	if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+	    goto out;
+	}
+
+#if QUOTA
+	(void)hfs_getinoquota(cp);
+#endif /* QUOTA */
+
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+	    goto out;
+	}
+	started_tr = 1;
+
+	/*
+	 * Allocate space for the link.
+	 *
+	 * Since we're already inside a transaction,
+	 *
+	 * Don't need truncate lock since a symlink is treated as a system file.
+	 */
+	error = hfs_truncate(vp, len, IO_NOZEROFILL, 0, ap->a_context);
+
+	/* On errors, remove the symlink file */
+	if (error) {
+		/*
+		 * End the transaction so we don't re-take the cnode lock
+		 * below while inside a transaction (lock order violation).
+		 */
+		hfs_end_transaction(hfsmp);
+
+		/* hfs_removefile() requires holding the truncate lock */
+		hfs_unlock(cp);
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
+
+		if (hfs_start_transaction(hfsmp) != 0) {
+			started_tr = 0;
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			goto out;
+		}
+		
+		(void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL, 0);
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		goto out;	
+	}
+
+	/* Write the link to disk */
+	bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, hfsmp->hfs_physical_block_size),
+			0, 0, BLK_META);
+	if (hfsmp->jnl) {
+		journal_modify_block_start(hfsmp->jnl, bp);
+	}
+	datap = (char *)buf_dataptr(bp);
+	bzero(datap, buf_size(bp));
+	bcopy(ap->a_target, datap, len);
+
+	if (hfsmp->jnl) {
+		journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
+	} else {
+		buf_bawrite(bp);
+	}
+out:
+	if (started_tr)
+	    hfs_end_transaction(hfsmp);
+	if ((cp != NULL) && (vp != NULL)) {
+		hfs_unlock(cp);
+	}
+	if (error) {
+		if (vp) {
+			vnode_put(vp);
+		}
+		*vpp = NULL;
+	}
+	return (error);
+}
+
+
+/* structures to hold a "." or ".." directory entry */
+struct hfs_stddotentry {
+	u_int32_t	d_fileno;   /* unique file number */
+	u_int16_t	d_reclen;   /* length of this structure */
+	u_int8_t	d_type;     /* dirent file type */
+	u_int8_t	d_namlen;   /* len of filename */
+	char		d_name[4];  /* "." or ".." */
+};
+
+struct hfs_extdotentry {
+	u_int64_t  d_fileno;   /* unique file number */
+	u_int64_t  d_seekoff;  /* seek offset (optional, used by servers) */
+	u_int16_t  d_reclen;   /* length of this structure */
+	u_int16_t  d_namlen;   /* len of filename */
+	u_int8_t   d_type;     /* dirent file type */
+	u_char     d_name[3];  /* "." or ".." */
+};
+
+typedef union {
+	struct hfs_stddotentry  std;
+	struct hfs_extdotentry  ext;
+} hfs_dotentry_t;
+
+/*
+ *  hfs_vnop_readdir reads directory entries into the buffer pointed
+ *  to by uio, in a filesystem independent format.  Up to uio_resid
+ *  bytes of data can be transferred.  The data in the buffer is a
+ *  series of packed dirent structures where each one contains the
+ *  following entries:
+ *
+ *	u_int32_t   d_fileno;              // file number of entry
+ *	u_int16_t   d_reclen;              // length of this record
+ *	u_int8_t    d_type;                // file type
+ *	u_int8_t    d_namlen;              // length of string in d_name
+ *	char        d_name[MAXNAMELEN+1];  // null terminated file name
+ *
+ *  The current position (uio_offset) refers to the next block of
+ *  entries.  The offset can only be set to a value previously
+ *  returned by hfs_vnop_readdir or zero.  This offset does not have
+ *  to match the number of bytes returned (in uio_resid).
+ *
+ *  In fact, the offset used by HFS is essentially an index (26 bits)
+ *  with a tag (6 bits).  The tag is for associating the next request
+  *  with the current request.  This enables us to have multiple threads
+ *  reading the directory while the directory is also being modified.
+ *
+ *  Each tag/index pair is tied to a unique directory hint.  The hint
+ *  contains information (filename) needed to build the catalog b-tree
+ *  key for finding the next set of entries.
+ *
+ * If the directory is marked as deleted-but-in-use (cp->c_flag & C_DELETED),
+ * do NOT synthesize entries for "." and "..".
+ */
+int
+hfs_vnop_readdir(struct vnop_readdir_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	uio_t uio = ap->a_uio;
+	struct cnode *cp = VTOC(vp);
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	directoryhint_t *dirhint = NULL;
+	directoryhint_t localhint;
+	off_t offset;
+	off_t startoffset;
+	int error = 0;
+	int eofflag = 0;
+	user_addr_t user_start = 0;
+	user_size_t user_len = 0;
+	user_size_t user_original_resid = 0;
+	int index;
+	unsigned int tag;
+	int items;
+	int lockflags;
+	int extended;
+	int nfs_cookies;
+	cnid_t cnid_hint = 0;
+	int bump_valence = 0;
+
+	items = 0;
+	startoffset = offset = uio_offset(uio);
+	extended = (ap->a_flags & VNODE_READDIR_EXTENDED);
+	nfs_cookies = extended && (ap->a_flags & VNODE_READDIR_REQSEEKOFF);
+
+	/* Sanity check the uio data. */
+	if (uio_iovcnt(uio) > 1)
+		return (EINVAL);
+
+	if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
+		int compressed = hfs_file_is_compressed(VTOC(vp), 0);  /* 0 == take the cnode lock */
+		if (VTOCMP(vp) != NULL && !compressed) {
+			error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+			if (error) {
+				return error;
+			}
+		}
+	}
+
+	//
+	// We have to lock the user's buffer here so that we won't
+	// fault on it after we've acquired a shared lock on the
+	// catalog file.  The issue is that you can get a 3-way
+	// deadlock if someone else starts a transaction and then
+	// tries to lock the catalog file but can't because we're
+	// here and we can't service our page fault because VM is
+	// blocked trying to start a transaction as a result of
+	// trying to free up pages for our page fault.  It's messy
+	// but it does happen on dual-processors that are paging
+	// heavily (see radar 3082639 for more info).  By locking
+	// the buffer up-front we prevent ourselves from faulting
+	// while holding the shared catalog file lock.
+	//
+	// Fortunately this and hfs_search() are the only two places
+	// currently (10/30/02) that can fault on user data with a
+	// shared lock on the catalog file.
+	//
+	if (hfsmp->jnl && uio_isuserspace(uio)) {
+		user_start = uio_curriovbase(uio);
+		user_len = uio_curriovlen(uio);
+
+		/* Bounds check the user buffer */
+		if (user_len > (256 * 1024)) {
+			/* only allow the user to wire down at most 256k */
+			user_len = (256 * 1024);
+			user_original_resid = uio_resid(uio);
+			uio_setresid (uio, (user_ssize_t)(256 * 1024));
+		}
+
+		if ((error = vslock(user_start, user_len)) != 0) {
+			if (user_original_resid > 0) {
+				uio_setresid(uio, user_original_resid);
+				user_original_resid = 0;
+			}
+			return error;
+		}
+	}
+
+	/* Note that the dirhint calls require an exclusive lock. */
+	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		if (user_start) {
+			if (user_original_resid > 0) {
+				uio_setresid(uio, user_original_resid);
+				user_original_resid = 0;
+			}
+			vsunlock(user_start, user_len, TRUE);
+		}
+		return error;
+	}
+
+	/* Pick up cnid hint (if any). */
+	if (nfs_cookies) {
+		cnid_hint = (cnid_t)(uio_offset(uio) >> 32);
+		uio_setoffset(uio, uio_offset(uio) & 0x00000000ffffffffLL);
+		if (cnid_hint == INT_MAX) { /* searching pass the last item */
+			eofflag = 1;
+			goto out;
+		}
+	}
+	/*
+	 * Synthesize entries for "." and "..", unless the directory has
+	 * been deleted, but not closed yet (lazy delete in progress).
+	 */
+	if (offset == 0 && !(cp->c_flag & C_DELETED)) {	
+		
+		size_t  uiosize;
+
+		/* 
+		 * We could use a union of the two types of dot entries (HFS / HFS+)
+		 * but it makes static analysis of this code difficult.  The problem is that
+		 * the HFS standard dot entry is smaller than the HFS+ one, and we also ideally
+		 * want the uiomove to operate on a two-element adjacent array. If we used the
+		 * array of unions, we would have to do two separate uiomoves because the memory 
+		 * for the hfs standard dot entries would not be adjacent to one another.
+		 * So just allocate the entries on the stack in separate cases.
+		 */
+
+		if (extended) {
+			hfs_dotentry_t  dotentry[2];
+			
+			/* HFS Plus */
+			struct hfs_extdotentry *entry = &dotentry[0].ext;
+
+			entry->d_fileno = cp->c_cnid;
+			entry->d_reclen = sizeof(struct hfs_extdotentry);
+			entry->d_type = DT_DIR;
+			entry->d_namlen = 1;
+			entry->d_name[0] = '.';
+			entry->d_name[1] = '\0';
+			entry->d_name[2] = '\0';
+			entry->d_seekoff = 1;
+
+			++entry;
+			entry->d_fileno = cp->c_parentcnid;
+			entry->d_reclen = sizeof(struct hfs_extdotentry);
+			entry->d_type = DT_DIR;
+			entry->d_namlen = 2;
+			entry->d_name[0] = '.';
+			entry->d_name[1] = '.';
+			entry->d_name[2] = '\0';
+			entry->d_seekoff = 2;
+			uiosize = 2 * sizeof(struct hfs_extdotentry);
+	
+			if ((error = uiomove((caddr_t)dotentry, uiosize, uio))) {
+				goto out;
+			}
+
+		} else {
+			struct hfs_stddotentry hfs_std_dotentries[2];
+
+			/* HFS Standard */
+			struct hfs_stddotentry *entry = &hfs_std_dotentries[0];
+
+			entry->d_fileno = cp->c_cnid;
+			entry->d_reclen = sizeof(struct hfs_stddotentry);
+			entry->d_type = DT_DIR;
+			entry->d_namlen = 1;
+			*(int *)&entry->d_name[0] = 0;
+			entry->d_name[0] = '.';
+	
+			++entry;
+			entry->d_fileno = cp->c_parentcnid;
+			entry->d_reclen = sizeof(struct hfs_stddotentry);
+			entry->d_type = DT_DIR;
+			entry->d_namlen = 2;
+			*(int *)&entry->d_name[0] = 0;
+			entry->d_name[0] = '.';
+			entry->d_name[1] = '.';
+			uiosize = 2 * sizeof(struct hfs_stddotentry);
+	
+			if ((error = uiomove((caddr_t)hfs_std_dotentries, uiosize, uio))) {
+				goto out;
+			}
+		}
+	
+		offset += 2;
+	}
+
+	/* 
+	 * Intentionally avoid checking the valence here.  If we
+	 * have FS corruption that reports the valence is 0, even though it
+	 * has contents, we might artificially skip over iterating 
+	 * this directory.
+	 */
+	
+	/* Convert offset into a catalog directory index. */
+	index = (offset & HFS_INDEX_MASK) - 2;
+	tag = offset & ~HFS_INDEX_MASK;
+
+	/* Lock catalog during cat_findname and cat_getdirentries. */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+	/* When called from NFS, try and resolve a cnid hint. */
+	if (nfs_cookies && cnid_hint != 0) {
+		if (cat_findname(hfsmp, cnid_hint, &localhint.dh_desc) == 0) {
+			if ( localhint.dh_desc.cd_parentcnid == cp->c_fileid) {
+				localhint.dh_index = index - 1;
+				localhint.dh_time = 0;
+				bzero(&localhint.dh_link, sizeof(localhint.dh_link));
+				dirhint = &localhint;  /* don't forget to release the descriptor */
+			} else {
+				cat_releasedesc(&localhint.dh_desc);
+			}
+		}
+	}
+
+	/* Get a directory hint (cnode must be locked exclusive) */
+	if (dirhint == NULL) {
+		dirhint = hfs_getdirhint(cp, ((index - 1) & HFS_INDEX_MASK) | tag, 0);
+
+		/* Hide tag from catalog layer. */
+		dirhint->dh_index &= HFS_INDEX_MASK;
+		if (dirhint->dh_index == HFS_INDEX_MASK) {
+			dirhint->dh_index = -1;
+		}
+	}
+	
+	if (index == 0) {
+		dirhint->dh_threadhint = cp->c_dirthreadhint;
+	} 
+	else {
+		/*
+		 * If we have a non-zero index, there is a possibility that during the last
+		 * call to hfs_vnop_readdir we hit EOF for this directory.  If that is the case
+		 * then we don't want to return any new entries for the caller.  Just return 0
+		 * items, mark the eofflag, and bail out.  Because we won't have done any work, the 
+		 * code at the end of the function will release the dirhint for us.  
+		 *
+		 * Don't forget to unlock the catalog lock on the way out, too.
+		 */
+		if (dirhint->dh_desc.cd_flags & CD_EOF) {
+			error = 0;
+			eofflag = 1;
+			uio_setoffset(uio, startoffset);
+			if (user_original_resid > 0) {
+				uio_setresid(uio, user_original_resid);
+				user_original_resid = 0;
+			}
+			hfs_systemfile_unlock (hfsmp, lockflags);
+
+			goto seekoffcalc;
+		}
+	}
+
+	/* Pack the buffer with dirent entries. */
+	error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, ap->a_flags, &items, &eofflag);
+
+	if (user_original_resid > 0) {
+		user_original_resid = user_original_resid - ((user_ssize_t)256*1024 - uio_resid(uio));
+		uio_setresid(uio, user_original_resid);
+		user_original_resid = 0;
+	}
+
+	if (index == 0 && error == 0) {
+		cp->c_dirthreadhint = dirhint->dh_threadhint;
+	}
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (error != 0) {
+		goto out;
+	}
+	
+	/* Get index to the next item */
+	index += items;
+	
+	if (items >= (int)cp->c_entries) {
+		eofflag = 1;
+	}
+	
+	/* 
+	 * Detect valence FS corruption.
+	 *
+	 * We are holding the cnode lock exclusive, so there should not be
+	 * anybody modifying the valence field of this cnode.  If we enter
+	 * this block, that means we observed filesystem corruption, because
+	 * this directory reported a valence of 0, yet we found at least one
+	 * item.  In this case, we need to minimally self-heal this
+	 * directory to prevent userland from tripping over a directory
+	 * that appears empty (getattr of valence reports 0), but actually 
+	 * has contents. 
+	 *
+	 * We'll force the cnode update at the end of the function after 
+	 * completing all of the normal getdirentries steps.
+	 */ 
+	if ((cp->c_entries == 0) && (items > 0)) {
+		/* disk corruption */
+		cp->c_entries++;
+		/* Mark the cnode as dirty. */
+		cp->c_flag |= C_MODIFIED;
+		printf("hfs_vnop_readdir: repairing valence to non-zero! \n");
+		bump_valence++;
+	}
+
+
+	/* Convert catalog directory index back into an offset. */
+	while (tag == 0)
+		tag = (++cp->c_dirhinttag) << HFS_INDEX_BITS;	
+	uio_setoffset(uio, (index + 2) | tag);
+	dirhint->dh_index |= tag;
+
+seekoffcalc:
+	cp->c_touch_acctime = TRUE;
+
+	if (ap->a_numdirent) {
+		if (startoffset == 0)
+			items += 2;
+		*ap->a_numdirent = items;
+	}
+
+out:
+	if (user_start) {
+		if (user_original_resid > 0) {
+			uio_setresid(uio, user_original_resid);
+			user_original_resid = 0;
+		}
+		vsunlock(user_start, user_len, TRUE);
+	}
+	/* If we didn't do anything then go ahead and dump the hint. */
+	if ((dirhint != NULL) &&
+	    (dirhint != &localhint) &&
+	    (uio_offset(uio) == startoffset)) {
+		hfs_reldirhint(cp, dirhint);
+		eofflag = 1;
+	}
+	if (ap->a_eofflag) {
+		*ap->a_eofflag = eofflag;
+	}
+	if (dirhint == &localhint) {
+		cat_releasedesc(&localhint.dh_desc);
+	}
+
+	if (bump_valence) {
+		/* force the update before dropping the cnode lock*/
+		hfs_update(vp, 0);
+	}
+
+	hfs_unlock(cp);
+	
+	return (error);
+}
+
+
+/*
+ * Read contents of a symbolic link.
+ */
+int
+hfs_vnop_readlink(struct vnop_readlink_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct filefork *fp;
+	int error;
+
+	if (!vnode_islnk(vp))
+		return (EINVAL);
+ 
+	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+		return (error);
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+   
+	/* Zero length sym links are not allowed */
+	if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) {
+		error = EINVAL;
+		goto exit;
+	}
+    
+	/* Cache the path so we don't waste buffer cache resources */
+	if (fp->ff_symlinkptr == NULL) {
+		struct buf *bp = NULL;
+
+		fp->ff_symlinkptr = hfs_malloc(fp->ff_size);
+		error = (int)buf_meta_bread(vp, (daddr64_t)0,
+		                            roundup((int)fp->ff_size, VTOHFS(vp)->hfs_physical_block_size),
+		                            vfs_context_ucred(ap->a_context), &bp);
+		if (error) {
+			if (bp)
+				buf_brelse(bp);
+			if (fp->ff_symlinkptr) {
+				hfs_free(fp->ff_symlinkptr, fp->ff_size);
+				fp->ff_symlinkptr = NULL;
+			}
+			goto exit;
+		}
+		bcopy((char *)buf_dataptr(bp), fp->ff_symlinkptr, (size_t)fp->ff_size);
+
+		if (VTOHFS(vp)->jnl && (buf_flags(bp) & B_LOCKED) == 0) {
+		        buf_markinvalid(bp);		/* data no longer needed */
+		}
+		buf_brelse(bp);
+	}
+	error = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio);
+
+	/*
+	 * Keep track blocks read
+	 */
+	if ((VTOHFS(vp)->hfc_stage == HFC_RECORDING) && (error == 0)) {
+		
+		/*
+		 * If this file hasn't been seen since the start of
+		 * the current sampling period then start over.
+		 */
+		if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
+			VTOF(vp)->ff_bytesread = fp->ff_size;
+		else
+			VTOF(vp)->ff_bytesread += fp->ff_size;
+		
+	//	if (VTOF(vp)->ff_bytesread > fp->ff_size)
+	//		cp->c_touch_acctime = TRUE;
+	}
+
+exit:
+	hfs_unlock(cp);
+	return (error);
+}
+
+
+/*
+ * Get configurable pathname variables.
+ */
+int
+hfs_vnop_pathconf(struct vnop_pathconf_args *ap)
+{
+
+#if CONFIG_HFS_STD
+	int std_hfs = (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD);
+#endif
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			*ap->a_retval = 1;
+		} else
+#endif
+		{
+			*ap->a_retval = HFS_LINK_MAX;
+		}
+		break;
+	case _PC_NAME_MAX:
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			*ap->a_retval = kHFSMaxFileNameChars;  /* 31 */
+		} else
+#endif
+		{
+			*ap->a_retval = kHFSPlusMaxFileNameChars;  /* 255 */
+		}
+		break;
+	case _PC_PATH_MAX:
+		*ap->a_retval = PATH_MAX;  /* 1024 */
+		break;
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		break;
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 200112;		/* _POSIX_CHOWN_RESTRICTED */
+		break;
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 200112;		/* _POSIX_NO_TRUNC */
+		break;
+	case _PC_NAME_CHARS_MAX:
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			*ap->a_retval = kHFSMaxFileNameChars; /* 31 */
+		} else
+#endif
+		{
+			*ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */
+		}
+		break;
+	case _PC_CASE_SENSITIVE:
+		if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE)
+			*ap->a_retval = 1;
+		else
+			*ap->a_retval = 0;
+		break;
+	case _PC_CASE_PRESERVING:
+		*ap->a_retval = 1;
+		break;
+	case _PC_FILESIZEBITS:
+		/* number of bits to store max file size */
+#if CONFIG_HFS_STD
+		if (std_hfs) {
+			*ap->a_retval = 32;
+		} else
+#endif
+		{
+			*ap->a_retval = 64;	
+		}
+		break;
+	case _PC_XATTR_SIZE_BITS:
+		/* Number of bits to store maximum extended attribute size */
+		*ap->a_retval = HFS_XATTR_SIZE_BITS;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+/*
+ * Prepares a fork for cat_update by making sure ff_size and ff_blocks
+ * are no bigger than the valid data on disk thus reducing the chance
+ * of exposing uninitialised data in the event of a non clean unmount.
+ * fork_buf is where to put the temporary copy if required.  (It can
+ * be inside pfork.)
+ */
+const struct cat_fork *
+hfs_prepare_fork_for_update(filefork_t *ff,
+							const struct cat_fork *cf,
+							struct cat_fork *cf_buf,
+							uint32_t block_size)
+{
+	if (!ff)
+		return NULL;
+
+	if (!cf)
+		cf = &ff->ff_data;
+	if (!cf_buf)
+		cf_buf = &ff->ff_data;
+
+	off_t max_size = ff->ff_size;
+
+	// Check first invalid range
+	if (!TAILQ_EMPTY(&ff->ff_invalidranges))
+		max_size = TAILQ_FIRST(&ff->ff_invalidranges)->rl_start;
+
+	if (!ff->ff_unallocblocks && ff->ff_size <= max_size)
+		return cf; // Nothing to do
+
+	if (ff->ff_blocks < ff->ff_unallocblocks) {
+		panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+			  ff->ff_blocks, ff->ff_unallocblocks);
+	}
+
+	struct cat_fork *out = cf_buf;
+
+	if (out != cf)
+		bcopy(cf, out, sizeof(*cf));
+
+	// Adjust cf_blocks for cf_vblocks
+	out->cf_blocks -= out->cf_vblocks;
+
+	/*
+	 * Here we trim the size with the updated cf_blocks.  This is
+	 * probably unnecessary now because the invalid ranges should
+	 * catch this (but that wasn't always the case).
+	 */
+	off_t alloc_bytes = hfs_blk_to_bytes(out->cf_blocks, block_size);
+	if (out->cf_size > alloc_bytes)
+		out->cf_size = alloc_bytes;
+
+	// Trim cf_size to first invalid range
+	if (out->cf_size > max_size)
+		out->cf_size = max_size;
+
+	return out;
+}
+
+/*
+ * Update a cnode's on-disk metadata.
+ *
+ * The cnode must be locked exclusive.  See declaration for possible
+ * options.
+ */
+int
+hfs_update(struct vnode *vp, int options)
+{
+	struct cnode *cp = VTOC(vp);
+	struct proc *p;
+	const struct cat_fork *dataforkp = NULL;
+	const struct cat_fork *rsrcforkp = NULL;
+	struct cat_fork datafork;
+	struct cat_fork rsrcfork;
+	struct hfsmount *hfsmp;
+	int lockflags;
+	int error;
+	uint32_t tstate = 0;
+
+	if (ISSET(cp->c_flag, C_NOEXISTS))
+		return 0;
+
+	p = current_proc();
+	hfsmp = VTOHFS(vp);
+
+	if (((vnode_issystem(vp) && (cp->c_cnid < kHFSFirstUserCatalogNodeID))) || 
+	   	hfsmp->hfs_catalog_vp == NULL){
+		return (0);
+	}
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) {
+		CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED);
+		cp->c_touch_acctime = 0;
+		cp->c_touch_chgtime = 0;
+		cp->c_touch_modtime = 0;
+		return (0);
+	}
+	if (kdebug_enable) {
+		if (cp->c_touch_acctime || cp->c_atime != cp->c_attr.ca_atimeondisk)
+			tstate |= DBG_HFS_UPDATE_ACCTIME;
+		if (cp->c_touch_modtime)
+			tstate |= DBG_HFS_UPDATE_MODTIME;
+		if (cp->c_touch_chgtime)
+			tstate |= DBG_HFS_UPDATE_CHGTIME;
+
+		if (cp->c_flag & C_MODIFIED)
+			tstate |= DBG_HFS_UPDATE_MODIFIED;
+		if (ISSET(options, HFS_UPDATE_FORCE))
+			tstate |= DBG_HFS_UPDATE_FORCE;
+		if (cp->c_flag & C_NEEDS_DATEADDED)
+			tstate |= DBG_HFS_UPDATE_DATEADDED;
+		if (cp->c_flag & C_MINOR_MOD)
+			tstate |= DBG_HFS_UPDATE_MINOR;
+	}
+	hfs_touchtimes(hfsmp, cp);
+
+	if (!ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD)
+		&& !hfs_should_save_atime(cp)) {
+		// Nothing to update
+		return 0;
+	}
+
+	KDBG(HFSDBG_UPDATE | DBG_FUNC_START, kdebug_vnode(vp), tstate);
+
+	bool check_txn = false;
+
+	if (!ISSET(options, HFS_UPDATE_FORCE) && !ISSET(cp->c_flag, C_MODIFIED)) {
+		/*
+		 * This must be a minor modification.  If the current
+		 * transaction already has an update for this node, then we
+		 * bundle in the modification.
+		 */
+		if (hfsmp->jnl
+			&& journal_current_txn(hfsmp->jnl) == cp->c_update_txn) {
+			check_txn = true;
+		} else {
+			tstate |= DBG_HFS_UPDATE_SKIPPED;
+			error = 0;
+			goto exit;
+		}
+	}
+
+	if ((error = hfs_start_transaction(hfsmp)) != 0)
+		goto exit;
+
+	if (check_txn
+		&& journal_current_txn(hfsmp->jnl) != cp->c_update_txn) {
+		hfs_end_transaction(hfsmp);
+		tstate |= DBG_HFS_UPDATE_SKIPPED;
+		error = 0;
+		goto exit;
+	}
+
+	if (cp->c_datafork)
+		dataforkp = &cp->c_datafork->ff_data;
+	if (cp->c_rsrcfork)
+		rsrcforkp = &cp->c_rsrcfork->ff_data;
+
+    /*
+     * Modify the values passed to cat_update based on whether or not
+     * the file has invalid ranges or borrowed blocks.
+     */
+    dataforkp = hfs_prepare_fork_for_update(cp->c_datafork, NULL, &datafork, hfsmp->blockSize);
+	rsrcforkp = hfs_prepare_fork_for_update(cp->c_rsrcfork, NULL, &rsrcfork, hfsmp->blockSize);
+
+	if (__builtin_expect(kdebug_enable & KDEBUG_TRACE, 0)) {
+		long dbg_parms[NUMPARMS];
+		int  dbg_namelen;
+
+		dbg_namelen = NUMPARMS * sizeof(long);
+		vn_getpath(vp, (char *)dbg_parms, &dbg_namelen);
+
+		if (dbg_namelen < (int)sizeof(dbg_parms))
+			memset((char *)dbg_parms + dbg_namelen, 0, sizeof(dbg_parms) - dbg_namelen);
+
+		kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
+	}
+
+	/*
+	 * Lock the Catalog b-tree file.
+	 */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+	error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp);
+
+	if (hfsmp->jnl)
+		cp->c_update_txn = journal_current_txn(hfsmp->jnl);
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	CLR(cp->c_flag, C_MODIFIED | C_MINOR_MOD);
+
+	hfs_end_transaction(hfsmp);
+
+exit:
+
+	KDBG(HFSDBG_UPDATE | DBG_FUNC_END, kdebug_vnode(vp), tstate, error);
+
+	return error;
+}
+
+/*
+ * Allocate a new node
+ */
+int
+hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
+             struct vnode_attr *vap, vfs_context_t ctx)
+{
+	struct cnode *cp = NULL;
+	struct cnode *dcp = NULL;
+	struct vnode *tvp;
+	struct hfsmount *hfsmp;
+	struct cat_desc in_desc, out_desc;
+	struct cat_attr attr;
+	struct timeval tv;
+	int lockflags;
+	int error, started_tr = 0;
+	enum vtype vnodetype;
+	int mode;
+	int newvnode_flags = 0;
+	u_int32_t gnv_flags = 0;
+	int protectable_target = 0;
+	int nocache = 0;
+	vnode_t old_doc_vp = NULL;
+
+#if CONFIG_PROTECT
+	struct cprotect *entry = NULL;
+	int32_t cp_class = -1;
+
+	/* 
+	 * By default, it's OK for AKS to overrride our target class preferences.
+	 */
+	uint32_t keywrap_flags = CP_KEYWRAP_DIFFCLASS;
+
+	if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) {
+		cp_class = (int32_t)vap->va_dataprotect_class;
+		/* 
+		 * Since the user specifically requested this target class be used,
+		 * we want to fail this creation operation if we cannot wrap to their 
+		 * target class. The CP_KEYWRAP_DIFFCLASS bit says that it is OK to
+		 * use a different class than the one specified, so we turn that off
+		 * now.
+		 */
+		keywrap_flags &= ~CP_KEYWRAP_DIFFCLASS;
+	}
+	int protected_mount = 0;	
+#endif
+
+
+	if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)))
+		return (error);
+
+	/* set the cnode pointer only after successfully acquiring lock */
+	dcp = VTOC(dvp);
+
+	/* Don't allow creation of new entries in open-unlinked directories */
+	if ((error = hfs_checkdeleted(dcp))) {
+		hfs_unlock(dcp);
+		return error;
+	}
+
+	dcp->c_flag |= C_DIR_MODIFICATION;
+
+	hfsmp = VTOHFS(dvp);
+
+	*vpp = NULL;
+	tvp = NULL;
+	out_desc.cd_flags = 0;
+	out_desc.cd_nameptr = NULL;
+
+	vnodetype = vap->va_type;
+	if (vnodetype == VNON)
+		vnodetype = VREG;
+	mode = MAKEIMODE(vnodetype, vap->va_mode);
+
+	if (S_ISDIR (mode) || S_ISREG (mode)) {
+		protectable_target = 1;
+	}
+	
+
+	/* Check if were out of usable disk space. */
+	if ((hfs_freeblks(hfsmp, 1) == 0) && (vfs_context_suser(ctx) != 0)) {
+		error = ENOSPC;
+		goto exit;
+	}
+
+	microtime(&tv);
+
+	/* Setup the default attributes */
+	bzero(&attr, sizeof(attr));
+	attr.ca_mode = mode;
+	attr.ca_linkcount = 1;
+	if (VATTR_IS_ACTIVE(vap, va_rdev)) {
+		attr.ca_rdev = vap->va_rdev;
+	}
+	if (VATTR_IS_ACTIVE(vap, va_create_time)) {
+		VATTR_SET_SUPPORTED(vap, va_create_time);
+		attr.ca_itime = vap->va_create_time.tv_sec;
+	} else {
+		attr.ca_itime = tv.tv_sec;
+	}
+#if CONFIG_HFS_STD
+	if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
+		attr.ca_itime += 3600;	/* Same as what hfs_update does */
+	}
+#endif
+	attr.ca_atime = attr.ca_ctime = attr.ca_mtime = attr.ca_itime;
+	attr.ca_atimeondisk = attr.ca_atime;
+	if (VATTR_IS_ACTIVE(vap, va_flags)) {
+		VATTR_SET_SUPPORTED(vap, va_flags);
+		attr.ca_flags = vap->va_flags;
+	}
+	
+	/* 
+	 * HFS+ only: all files get ThreadExists
+	 * HFSX only: dirs get HasFolderCount
+	 */
+#if CONFIG_HFS_STD
+	if (!(hfsmp->hfs_flags & HFS_STANDARD))
+#endif
+	{
+		if (vnodetype == VDIR) {
+			if (hfsmp->hfs_flags & HFS_FOLDERCOUNT)
+				attr.ca_recflags = kHFSHasFolderCountMask;
+		} else {
+			attr.ca_recflags = kHFSThreadExistsMask;
+		}
+	}
+
+#if CONFIG_PROTECT	
+	if (cp_fs_protected(hfsmp->hfs_mp)) {
+		protected_mount = 1;
+	}
+	/*
+	 * On a content-protected HFS+/HFSX filesystem, files and directories
+	 * cannot be created without atomically setting/creating the EA that 
+	 * contains the protection class metadata and keys at the same time, in
+	 * the same transaction.  As a result, pre-set the "EAs exist" flag
+	 * on the cat_attr for protectable catalog record creations.  This will
+	 * cause the cnode creation routine in hfs_getnewvnode to mark the cnode
+	 * as having EAs.
+	 */
+	if ((protected_mount) && (protectable_target)) {
+		attr.ca_recflags |= kHFSHasAttributesMask;
+		/* delay entering in the namecache */
+		nocache = 1;
+	}
+#endif
+
+
+	/* 
+	 * Add the date added to the item. See above, as
+	 * all of the dates are set to the itime.
+	 */
+	hfs_write_dateadded (&attr, attr.ca_atime);
+
+	/* Initialize the gen counter to 1 */
+	hfs_write_gencount(&attr, (uint32_t)1);
+
+	attr.ca_uid = vap->va_uid;
+	attr.ca_gid = vap->va_gid;
+	VATTR_SET_SUPPORTED(vap, va_mode);
+	VATTR_SET_SUPPORTED(vap, va_uid);
+	VATTR_SET_SUPPORTED(vap, va_gid);
+
+#if QUOTA
+	/* check to see if this node's creation would cause us to go over
+	 * quota.  If so, abort this operation.
+	 */
+   	if (hfsmp->hfs_flags & HFS_QUOTAS) {
+		if ((error = hfs_quotacheck(hfsmp, 1, attr.ca_uid, attr.ca_gid,
+									vfs_context_ucred(ctx)))) {
+			goto exit;
+		}
+	}	
+#endif
+
+
+	/* Tag symlinks with a type and creator. */
+	if (vnodetype == VLNK) {
+		struct FndrFileInfo *fip;
+
+		fip = (struct FndrFileInfo *)&attr.ca_finderinfo;
+		fip->fdType    = SWAP_BE32(kSymLinkFileType);
+		fip->fdCreator = SWAP_BE32(kSymLinkCreator);
+	}
+
+	/* Setup the descriptor */
+	in_desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
+	in_desc.cd_namelen = cnp->cn_namelen;
+	in_desc.cd_parentcnid = dcp->c_fileid;
+	in_desc.cd_flags = S_ISDIR(mode) ? CD_ISDIR : 0;
+	in_desc.cd_hint = dcp->c_childhint;
+	in_desc.cd_encoding = 0;
+
+#if CONFIG_PROTECT
+	/*
+	 * To preserve file creation atomicity with regards to the content protection EA,
+	 * we must create the file in the catalog and then write out its EA in the same
+	 * transaction.  
+	 * 
+	 * We only denote the target class in this EA; key generation is not completed
+	 * until the file has been inserted into the catalog and will be done
+	 * in a separate transaction.
+	 */
+	if ((protected_mount) && (protectable_target)) {
+		error = cp_setup_newentry(hfsmp, dcp, cp_class, attr.ca_mode, &entry);
+		if (error) {
+			goto exit;
+		}
+	}
+#endif
+
+	if ((error = hfs_start_transaction(hfsmp)) != 0) {
+	    goto exit;
+	}
+	started_tr = 1;
+
+	// have to also lock the attribute file because cat_create() needs
+	// to check that any fileID it wants to use does not have orphaned
+	// attributes in it.
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+	cnid_t new_id;
+
+	/* Reserve some space in the Catalog file. */
+	if ((error = cat_preflight(hfsmp, CAT_CREATE, NULL, 0))) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		goto exit;
+	}
+
+	if ((error = cat_acquire_cnid(hfsmp, &new_id))) {
+		hfs_systemfile_unlock (hfsmp, lockflags);
+		goto exit;
+	}
+
+	error = cat_create(hfsmp, new_id, &in_desc, &attr, &out_desc);
+	if (error == 0) {
+		/* Update the parent directory */
+		dcp->c_childhint = out_desc.cd_hint;	/* Cache directory's location */
+		dcp->c_entries++;
+
+		if (vnodetype == VDIR) {
+			INC_FOLDERCOUNT(hfsmp, dcp->c_attr);
+		}
+		dcp->c_dirchangecnt++;
+		hfs_incr_gencount(dcp);
+
+		dcp->c_touch_chgtime = dcp->c_touch_modtime = true;
+		dcp->c_flag |= C_MODIFIED;
+
+		hfs_update(dcp->c_vp, 0);
+
+#if CONFIG_PROTECT
+		/*
+		 * If we are creating a content protected file, now is when
+		 * we create the EA. We must create it in the same transaction
+		 * that creates the file.  We can also guarantee that the file 
+		 * MUST exist because we are still holding the catalog lock
+		 * at this point.
+		 */
+		if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) {
+			error = cp_setxattr (NULL, entry, hfsmp, attr.ca_fileid, XATTR_CREATE);
+			
+			if (error) {
+				int delete_err;
+				/* 
+				 * If we fail the EA creation, then we need to delete the file. 
+				 * Luckily, we are still holding all of the right locks.
+				 */
+				delete_err = cat_delete (hfsmp, &out_desc, &attr);
+				if (delete_err == 0) {
+					/* Update the parent directory */
+					if (dcp->c_entries > 0)
+						dcp->c_entries--;
+					dcp->c_dirchangecnt++;
+					dcp->c_ctime = tv.tv_sec;
+					dcp->c_mtime = tv.tv_sec;
+					(void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
+				}
+
+				/* Emit EINVAL if we fail to create EA*/
+				error = EINVAL;
+			}
+		}		
+#endif
+	}
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	if (error)
+		goto exit;
+
+	uint32_t txn = hfsmp->jnl ? journal_current_txn(hfsmp->jnl) : 0;
+
+	/* Invalidate negative cache entries in the directory */
+	if (dcp->c_flag & C_NEG_ENTRIES) {
+		cache_purge_negatives(dvp);
+		dcp->c_flag &= ~C_NEG_ENTRIES;
+	}
+
+	hfs_volupdate(hfsmp, vnodetype == VDIR ? VOL_MKDIR : VOL_MKFILE,
+		(dcp->c_cnid == kHFSRootFolderID));
+
+	// XXXdbg
+	// have to end the transaction here before we call hfs_getnewvnode()
+	// because that can cause us to try and reclaim a vnode on a different
+	// file system which could cause us to start a transaction which can
+	// deadlock with someone on that other file system (since we could be
+	// holding two transaction locks as well as various vnodes and we did
+	// not obtain the locks on them in the proper order).
+	//
+	// NOTE: this means that if the quota check fails or we have to update
+	//       the change time on a block-special device that those changes
+	//       will happen as part of independent transactions.
+	//
+	if (started_tr) {
+	    hfs_end_transaction(hfsmp);
+	    started_tr = 0;
+	}
+
+#if CONFIG_PROTECT
+	/* 
+	 * At this point, we must have encountered success with writing the EA.
+	 * Destroy our temporary cprotect (which had no keys). 
+	 */
+
+	if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target))  {
+		cp_entry_destroy (hfsmp, entry);
+		entry = NULL;
+	}
+#endif
+	gnv_flags |= GNV_CREATE;
+	if (nocache) {
+		gnv_flags |= GNV_NOCACHE;
+	}
+
+	/*
+	 * Create a vnode for the object just created.
+	 * 
+	 * NOTE: Maintaining the cnode lock on the parent directory is important,
+	 * as it prevents race conditions where other threads want to look up entries 
+	 * in the directory and/or add things as we are in the process of creating
+	 * the vnode below.  However, this has the potential for causing a 
+	 * double lock panic when dealing with shadow files on a HFS boot partition. 
+	 * The panic could occur if we are not cleaning up after ourselves properly 
+	 * when done with a shadow file or in the error cases.  The error would occur if we 
+	 * try to create a new vnode, and then end up reclaiming another shadow vnode to 
+	 * create the new one.  However, if everything is working properly, this should
+	 * be a non-issue as we would never enter that reclaim codepath.
+	 * 
+	 * The cnode is locked on successful return.
+	 */
+	error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, gnv_flags, &attr, 
+							NULL, &tvp, &newvnode_flags);
+	if (error)
+		goto exit;
+
+	cp = VTOC(tvp);
+
+	cp->c_update_txn = txn;
+
+	struct doc_tombstone *ut;
+	ut = doc_tombstone_get();
+	if (   ut->t_lastop_document_id != 0 
+	    && ut->t_lastop_parent == dvp
+	    && ut->t_lastop_parent_vid == vnode_vid(dvp)
+	    && strcmp((char *)ut->t_lastop_filename, (const char *)cp->c_desc.cd_nameptr) == 0) {
+		struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&cp->c_attr.ca_finderinfo + 16);
+
+		//printf("CREATE: preserving doc-id %lld on %s\n", ut->t_lastop_document_id, ut->t_lastop_filename);
+		fip->document_id = (uint32_t)(ut->t_lastop_document_id & 0xffffffff);
+
+		cp->c_bsdflags |= UF_TRACKED;
+		cp->c_flag |= C_MODIFIED;
+
+		if ((error = hfs_start_transaction(hfsmp)) == 0) {
+			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
+
+			(void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
+
+			hfs_systemfile_unlock (hfsmp, lockflags);
+			(void) hfs_end_transaction(hfsmp);
+		}
+
+		doc_tombstone_clear(ut, &old_doc_vp);
+	} else if (ut->t_lastop_document_id != 0) {
+		int len = cnp->cn_namelen;
+		if (len == 0) {
+			len = strlen(cnp->cn_nameptr);
+		}
+
+		if (doc_tombstone_should_ignore_name(cnp->cn_nameptr, cnp->cn_namelen)) {
+			// printf("CREATE: not clearing tombstone because %s is a temp name.\n", cnp->cn_nameptr);
+		} else {
+			// Clear the tombstone because the thread is not recreating the same path
+			// printf("CREATE: clearing tombstone because %s is NOT a temp name.\n", cnp->cn_nameptr);
+			doc_tombstone_clear(ut, NULL);
+		}
+	}
+
+	if ((hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) && (vnode_isfastdevicecandidate(dvp) && !vnode_isautocandidate(dvp))) {
+
+		//printf("hfs: flagging %s (fileid: %d) as VFASTDEVCANDIDATE (dvp name: %s)\n",
+		//       cnp->cn_nameptr ? cnp->cn_nameptr : "<NONAME>",
+		//       cp->c_fileid,
+		//       dvp->v_name ? dvp->v_name : "no-dir-name");
+
+		//
+		// On new files we set the FastDevCandidate flag so that
+		// any new blocks allocated to it will be pinned.
+		//
+		cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
+		vnode_setfastdevicecandidate(tvp);
+
+		//
+		// properly inherit auto-cached flags
+		//
+		if (vnode_isautocandidate(dvp)) {
+			cp->c_attr.ca_recflags |= kHFSAutoCandidateMask;
+			vnode_setautocandidate(tvp);
+		}
+
+
+		//
+		// We also want to add it to the hotfile adoption list so
+		// that it will eventually land in the hotfile btree
+		//
+		(void) hfs_addhotfile(tvp);
+	}
+	
+	*vpp = tvp;
+
+#if CONFIG_PROTECT
+	/* 
+	 * Now that we have a vnode-in-hand, generate keys for this namespace item.
+	 * If we fail to create the keys, then attempt to delete the item from the 
+	 * namespace.  If we can't delete the item, that's not desirable but also not fatal..
+	 *  All of the places which deal with restoring/unwrapping keys must also be 
+	 * prepared to encounter an entry that does not have keys. 
+	 */
+	if ((protectable_target) && (protected_mount)) {
+		struct cprotect *keyed_entry = NULL;
+
+		if (cp->c_cpentry == NULL) {
+			panic ("hfs_makenode: no cpentry for cnode (%p)", cp);
+		}
+
+		error = cp_generate_keys (hfsmp, cp, CP_CLASS(cp->c_cpentry->cp_pclass), keywrap_flags, &keyed_entry);
+		if (error == 0) {
+			/* 
+			 * Upon success, the keys were generated and written out. 
+			 * Update the cp pointer in the cnode.
+			 */
+			cp_replace_entry (hfsmp, cp, keyed_entry);
+			if (nocache) {
+				cache_enter (dvp, tvp, cnp);
+			}
+		}
+		else {
+			/* If key creation OR the setxattr failed, emit EPERM to userland */
+			error = EPERM;
+
+			/* 
+			 * Beware! This slightly violates the lock ordering for the
+			 * cnode/vnode 'tvp'.  Ordinarily, you must acquire the truncate lock
+			 * which guards file size changes before acquiring the normal cnode lock
+			 * and calling hfs_removefile on an item.  
+			 * 
+			 * However, in this case, we are still holding the directory lock so 
+			 * 'tvp' is not lookup-able and it was a newly created vnode so it 
+			 * cannot have any content yet. The only reason we are initiating 
+			 * the removefile is because we could not generate content protection keys
+			 * for this namespace item. Note also that we pass a '1' in the allow_dirs
+			 * argument for hfs_removefile because we may be creating a directory here.
+			 * 
+			 * All this to say that while it is technically a violation it is 
+			 * impossible to race with another thread for this cnode so it is safe.
+			 */
+			int err = hfs_removefile (dvp, tvp, cnp, 0, 0, 1, NULL, 0); 
+			if (err) {
+				printf("hfs_makenode: removefile failed (%d) for CP entry %p\n", err, tvp);
+			}
+
+			/* Release the cnode lock and mark the vnode for termination */	
+			hfs_unlock (cp);
+			err = vnode_recycle (tvp);
+			if (err) {
+				printf("hfs_makenode: vnode_recycle failed (%d) for CP entry %p\n", err, tvp);
+			}
+
+			/* Drop the iocount on the new vnode to force reclamation/recycling */
+			vnode_put (tvp);
+			cp = NULL;
+			*vpp = NULL;
+		}	
+	}
+#endif
+
+#if QUOTA
+	/* 
+	 * Once we create this vnode, we need to initialize its quota data 
+	 * structures, if necessary.  We know that it is OK to just go ahead and 
+	 * initialize because we've already validated earlier (through the hfs_quotacheck 
+	 * function) to see if creating this cnode/vnode would cause us to go over quota. 
+	 */
+	if (hfsmp->hfs_flags & HFS_QUOTAS) {
+		if (cp) {
+			/* cp could have been zeroed earlier */ 
+			(void) hfs_getinoquota(cp); 
+		}
+	}
+#endif
+
+exit:
+	cat_releasedesc(&out_desc);
+	
+#if CONFIG_PROTECT
+	/*  
+	 * We may have jumped here in error-handling various situations above.
+	 * If we haven't already dumped the temporary CP used to initialize
+	 * the file atomically, then free it now. cp_entry_destroy should null
+	 * out the pointer if it was called already.
+	 */
+	if (entry) {
+		cp_entry_destroy (hfsmp, entry);
+		entry = NULL;
+	}	
+#endif
+
+	/*
+	 * Make sure we release cnode lock on dcp.
+	 */
+	if (dcp) {
+		dcp->c_flag &= ~C_DIR_MODIFICATION;
+		wakeup((caddr_t)&dcp->c_flag);
+		
+		hfs_unlock(dcp);
+	}
+	ino64_t file_id = 0;
+	if (error == 0 && cp != NULL) {
+		file_id = cp->c_fileid;
+		hfs_unlock(cp);
+	}
+	if (started_tr) {
+	    hfs_end_transaction(hfsmp);
+	    started_tr = 0;
+	}
+
+	if (old_doc_vp) {
+		cnode_t *ocp = VTOC(old_doc_vp);
+		hfs_lock_always(ocp, HFS_EXCLUSIVE_LOCK);
+		struct FndrExtendedFileInfo *ofip = (struct FndrExtendedFileInfo *)((char *)&ocp->c_attr.ca_finderinfo + 16);
+
+		const uint32_t doc_id = ofip->document_id;
+		const ino64_t old_file_id = ocp->c_fileid;
+
+		// printf("clearing doc-id from ino %d\n", ocp->c_desc.cd_cnid);
+		ofip->document_id = 0;
+		ocp->c_bsdflags &= ~UF_TRACKED;
+		ocp->c_flag |= C_MODIFIED;
+
+		hfs_unlock(ocp);
+		vnode_put(old_doc_vp);
+
+		add_fsevent(FSE_DOCID_CHANGED, vfs_context_current(),
+					FSE_ARG_DEV, hfsmp->hfs_raw_dev,
+					FSE_ARG_INO, old_file_id,     // src inode #
+					FSE_ARG_INO, file_id,		  // dst inode #
+					FSE_ARG_INT32, doc_id,
+					FSE_ARG_DONE);
+	}
+
+	return (error);
+}
+
+
+/*
+ * hfs_vgetrsrc acquires a resource fork vnode corresponding to the
+ * cnode that is found in 'vp'.  The cnode should be locked upon entry
+ * and will be returned locked, but it may be dropped temporarily.
+ *
+ * If the resource fork vnode does not exist, HFS will attempt to acquire an
+ * empty (uninitialized) vnode from VFS so as to avoid deadlocks with
+ * jetsam. If we let the normal getnewvnode code produce the vnode for us
+ * we would be doing so while holding the cnode lock of our cnode.
+ * 
+ * On success, *rvpp wlll hold the resource fork vnode with an
+ * iocount.  *Don't* forget the vnode_put.
+ */
+int
+hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp)
+{
+	struct vnode *rvp = NULLVP;
+	struct vnode *empty_rvp = NULLVP;
+	struct vnode *dvp = NULLVP;
+	struct cnode *cp = VTOC(vp);
+	int error;
+	int vid;
+
+	if (vnode_vtype(vp) == VDIR) {
+		return EINVAL;
+	}
+	
+restart:
+	/* Attempt to use existing vnode */
+	if ((rvp = cp->c_rsrc_vp)) {
+		vid = vnode_vid(rvp);
+
+		// vnode_getwithvid can block so we need to drop the cnode lock
+		hfs_unlock(cp);
+
+		error = vnode_getwithvid(rvp, vid);
+
+		hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
+
+		/*
+		 * When our lock was relinquished, the resource fork
+		 * could have been recycled.  Check for this and try
+		 * again.
+		 */
+		if (error == ENOENT)
+			goto restart;
+
+		if (error) {
+			const char * name = (const char *)VTOC(vp)->c_desc.cd_nameptr;
+
+			if (name)
+				printf("hfs_vgetrsrc: couldn't get resource"
+				       " fork for %s, vol=%s, err=%d\n", name, hfsmp->vcbVN, error);
+			return (error);
+		}
+	} else {
+		struct cat_fork rsrcfork;
+		struct componentname cn;
+		struct cat_desc *descptr = NULL;
+		struct cat_desc to_desc;
+		char delname[32];
+		int lockflags;
+		int newvnode_flags = 0;
+
+		/* 
+		 * In this case, we don't currently see a resource fork vnode attached
+		 * to this cnode.  In most cases, we were called from a read-only VNOP
+		 * like getattr, so it should be safe to drop the cnode lock and then 
+		 * re-acquire it.  
+		 *
+		 * Here, we drop the lock so that we can acquire an empty/husk
+		 * vnode so that we don't deadlock against jetsam.  
+		 *
+		 * It does not currently appear possible to hold the truncate lock via
+		 * FS re-entrancy when we get to this point. (8/2014)
+		 */
+		hfs_unlock (cp);
+
+		error = vnode_create_empty (&empty_rvp); 
+
+		hfs_lock_always (cp, HFS_EXCLUSIVE_LOCK);
+
+		if (error) {
+			/* If acquiring the 'empty' vnode failed, then nothing to clean up */
+			return error;
+		}
+
+		/* 
+		 * We could have raced with another thread here while we dropped our cnode
+		 * lock.  See if the cnode now has a resource fork vnode and restart if appropriate.
+		 *
+		 * Note: We just released the cnode lock, so there is a possibility that the
+		 * cnode that we just acquired has been deleted or even removed from disk
+		 * completely, though this is unlikely. If the file is open-unlinked, the 
+		 * check below will resolve it for us.  If it has been completely 
+		 * removed (even from the catalog!), then when we examine the catalog 
+		 * directly, below, while holding the catalog lock, we will not find the
+		 * item and we can fail out properly.
+		 */
+		if (cp->c_rsrc_vp) {
+			/* Drop the empty vnode before restarting */
+			vnode_put (empty_rvp);
+			empty_rvp = NULL;
+			rvp = NULL;
+			goto restart;
+		}
+	
+		/*
+		 * hfs_vgetsrc may be invoked for a cnode that has already been marked
+		 * C_DELETED.  This is because we need to continue to provide rsrc
+		 * fork access to open-unlinked files.  In this case, build a fake descriptor
+		 * like in hfs_removefile.  If we don't do this, buildkey will fail in
+		 * cat_lookup because this cnode has no name in its descriptor.
+		 */
+		if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) {
+			bzero (&to_desc, sizeof(to_desc));
+			bzero (delname, 32);
+			MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid);
+			to_desc.cd_nameptr = (const u_int8_t*) delname;
+			to_desc.cd_namelen = strlen(delname);
+			to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+			to_desc.cd_flags = 0;
+			to_desc.cd_cnid = cp->c_cnid;
+
+			descptr = &to_desc;
+		}
+		else {
+			descptr = &cp->c_desc;
+		}
+
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+
+		/*
+		 * We call cat_idlookup (instead of cat_lookup) below because we can't
+		 * trust the descriptor in the provided cnode for lookups at this point.  
+		 * Between the time of the original lookup of this vnode and now, the 
+		 * descriptor could have gotten swapped or replaced.  If this occurred, 
+		 * the parent/name combo originally desired may not necessarily be provided
+		 * if we use the descriptor.  Even worse, if the vnode represents
+		 * a hardlink, we could have removed one of the links from the namespace
+		 * but left the descriptor alone, since hfs_unlink does not invalidate
+		 * the descriptor in the cnode if other links still point to the inode.
+		 * 
+		 * Consider the following (slightly contrived) scenario:
+		 * /tmp/a <--> /tmp/b (hardlinks).
+		 * 1. Thread A: open rsrc fork on /tmp/b.
+		 * 1a. Thread A: does lookup, goes out to lunch right before calling getnamedstream.
+		 * 2. Thread B does 'mv /foo/b /tmp/b'
+		 * 2. Thread B succeeds.
+		 * 3. Thread A comes back and wants rsrc fork info for /tmp/b.  
+		 * 
+		 * Even though the hardlink backing /tmp/b is now eliminated, the descriptor
+		 * is not removed/updated during the unlink process.  So, if you were to
+		 * do a lookup on /tmp/b, you'd acquire an entirely different record's resource
+		 * fork.
+		 * 
+		 * As a result, we use the fileid, which should be invariant for the lifetime
+		 * of the cnode (possibly barring calls to exchangedata).
+		 *
+		 * Addendum: We can't do the above for HFS standard since we aren't guaranteed to
+		 * have thread records for files.  They were only required for directories.  So
+		 * we need to do the lookup with the catalog name. This is OK since hardlinks were
+		 * never allowed on HFS standard.
+		 */
+
+		/* Get resource fork data */
+#if CONFIG_HFS_STD
+		if (ISSET(hfsmp->hfs_flags, HFS_STANDARD)) {
+			/*
+			 * HFS standard only:
+			 *
+			 * Get the resource fork for this item with a cat_lookup call, but do not
+			 * force a case lookup since HFS standard is case-insensitive only. We
+			 * don't want the descriptor; just the fork data here. If we tried to
+			 * do a ID lookup (via thread record -> catalog record), then we might fail
+			 * prematurely since, as noted above, thread records were not strictly required
+			 * on files in HFS.
+			 */
+			error = cat_lookup (hfsmp, descptr, 1, 0, (struct cat_desc*)NULL,
+								(struct cat_attr*)NULL, &rsrcfork, NULL);
+		} else
+#endif
+		{
+			error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork);
+		}
+
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			/* Drop our 'empty' vnode ! */
+			vnode_put (empty_rvp);
+			return (error);
+		}
+		/*
+		 * Supply hfs_getnewvnode with a component name. 
+		 */
+		cn.cn_pnbuf = NULL;
+		if (descptr->cd_nameptr) {
+			void *buf = hfs_malloc(MAXPATHLEN);
+
+			cn = (struct componentname){
+				.cn_nameiop = LOOKUP,
+				.cn_flags	= ISLASTCN,
+				.cn_pnlen	= MAXPATHLEN,
+				.cn_pnbuf	= buf,
+				.cn_nameptr = buf,
+				.cn_namelen = snprintf(buf, MAXPATHLEN,
+									   "%s%s", descptr->cd_nameptr,
+									   _PATH_RSRCFORKSPEC)
+			};
+
+			// Should never happen because cn.cn_nameptr won't ever be long...
+			if (cn.cn_namelen >= MAXPATHLEN) {
+				hfs_free(buf, MAXPATHLEN);
+				/* Drop our 'empty' vnode ! */
+				vnode_put (empty_rvp);
+				return ENAMETOOLONG;
+				
+			}
+		}
+		dvp = vnode_getparent(vp);
+		
+		/* 
+		 * We are about to call hfs_getnewvnode and pass in the vnode that we acquired
+		 * earlier when we were not holding any locks. The semantics of GNV_USE_VP require that
+		 * either hfs_getnewvnode consume the vnode and vend it back to us, properly initialized,
+		 * or it will consume/dispose of it properly if it errors out.
+		 */ 
+		rvp = empty_rvp;
+		
+		error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL,
+		                        descptr, (GNV_WANTRSRC | GNV_SKIPLOCK | GNV_USE_VP), 
+								&cp->c_attr, &rsrcfork, &rvp, &newvnode_flags);
+			
+		if (dvp)
+			vnode_put(dvp);
+		hfs_free(cn.cn_pnbuf, MAXPATHLEN);
+		if (error)
+			return (error);
+	}  /* End 'else' for rsrc fork not existing */
+
+	*rvpp = rvp;
+	return (0);
+}
+
+/*
+ * Wrapper for special device reads
+ */
+int
+hfsspec_read(struct vnop_read_args *ap)
+{
+	/*
+	 * Set access flag.
+	 */
+	cnode_t *cp = VTOC(ap->a_vp);
+
+	if (cp)
+		cp->c_touch_acctime = TRUE;
+
+	return spec_read(ap);
+}
+
+/*
+ * Wrapper for special device writes
+ */
+int
+hfsspec_write(struct vnop_write_args *ap)
+{
+	/*
+	 * Set update and change flags.
+	 */
+	cnode_t *cp = VTOC(ap->a_vp);
+
+	if (cp) {
+		cp->c_touch_chgtime = TRUE;
+		cp->c_touch_modtime = TRUE;
+	}
+
+	return spec_write(ap);
+}
+
+/*
+ * Wrapper for special device close
+ *
+ * Update the times on the cnode then do device close.
+ */
+int
+hfsspec_close(struct vnop_close_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	cnode_t *cp = VTOC(vp);
+
+	if (cp && vnode_isinuse(ap->a_vp, 0)) {
+		if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0) {
+			hfs_touchtimes(VTOHFS(vp), cp);
+			hfs_unlock(cp);
+		}
+	}
+	return spec_close(ap);
+}
+
+#if FIFO
+/*
+ * Wrapper for fifo reads
+ */
+static int
+hfsfifo_read(struct vnop_read_args *ap)
+{
+	/*
+	 * Set access flag.
+	 */
+	VTOC(ap->a_vp)->c_touch_acctime = TRUE;
+	return fifo_read(ap);
+}
+
+/*
+ * Wrapper for fifo writes
+ */
+static int
+hfsfifo_write(struct vnop_write_args *ap)
+{
+	/*
+	 * Set update and change flags.
+	 */
+	VTOC(ap->a_vp)->c_touch_chgtime = TRUE;
+	VTOC(ap->a_vp)->c_touch_modtime = TRUE;
+	return fifo_write(ap);
+}
+
+/*
+ * Wrapper for fifo close
+ *
+ * Update the times on the cnode then do device close.
+ */
+static int
+hfsfifo_close(struct vnop_close_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+
+	if (vnode_isinuse(ap->a_vp, 1)) {
+		if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0) {
+			cp = VTOC(vp);
+			hfs_touchtimes(VTOHFS(vp), cp);
+			hfs_unlock(cp);
+		}
+	}
+	return fifo_close(ap);
+}
+
+
+#endif /* FIFO */
+
+/* 
+ * Getter for the document_id 
+ * the document_id is stored in FndrExtendedFileInfo/FndrExtendedDirInfo
+ */
+static u_int32_t 
+hfs_get_document_id_internal(const uint8_t *finderinfo, mode_t mode)
+{
+	const uint8_t *finfo = NULL;
+	u_int32_t doc_id = 0;
+	
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = finderinfo + 16;
+
+	if (S_ISDIR(mode) || S_ISREG(mode)) {
+		const struct FndrExtendedFileInfo *extinfo = (const struct FndrExtendedFileInfo *)finfo;
+		doc_id = extinfo->document_id;
+	}
+
+	return doc_id;
+}
+
+
+/* getter(s) for document id */
+u_int32_t
+hfs_get_document_id(struct cnode *cp)
+{
+	return (hfs_get_document_id_internal((u_int8_t*)cp->c_finderinfo,
+	    cp->c_attr.ca_mode));
+}
+
+/* If you have finderinfo and mode, you can use this */
+u_int32_t
+hfs_get_document_id_from_blob(const uint8_t *finderinfo, mode_t mode)
+{
+	return (hfs_get_document_id_internal(finderinfo, mode));
+}
+
+/*
+ * Synchronize a file's in-core state with that on disk.
+ */
+int
+hfs_vnop_fsync(struct vnop_fsync_args *ap)
+{
+	struct vnode* vp = ap->a_vp;
+	int error;
+
+	/* Note: We check hfs flags instead of vfs mount flag because during 
+	 * read-write update, hfs marks itself read-write much earlier than
+	 * the vfs, and hence won't result in skipping of certain writes like 
+	 * zero'ing out of unused nodes, creation of hotfiles btree, etc. 
+	 */
+	if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) {
+		return 0;		
+	}
+
+	/*
+	 * No need to call cp_handle_vnop to resolve fsync().  Any dirty data
+	 * should have caused the keys to be unwrapped at the time the data was
+	 * put into the UBC, either at mmap/pagein/read-write.  If we did manage
+	 * to let this by, then strategy will auto-resolve for us.
+	 * 
+	 * We also need to allow ENOENT lock errors since unlink
+	 * system call can call VNOP_FSYNC during vclean.
+	 */
+	error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	if (error)
+		return (0);
+
+	error = hfs_fsync(vp, ap->a_waitfor, 0, vfs_context_proc(ap->a_context));
+
+	hfs_unlock(VTOC(vp));
+	return (error);
+}
+
+int (**hfs_vnodeop_p)(void *);
+
+#define VOPFUNC int (*)(void *)
+
+
+#if CONFIG_HFS_STD
+int (**hfs_std_vnodeop_p) (void *);
+static int hfs_readonly_op (__unused void* ap) { return (EROFS); }
+
+/* 
+ * In 10.6 and forward, HFS Standard is read-only and deprecated.  The vnop table below
+ * is for use with HFS standard to block out operations that would modify the file system
+ */
+
+const struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = {
+    { &vnop_default_desc, (VOPFUNC)vn_default_error },
+    { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup },		/* lookup */
+    { &vnop_create_desc, (VOPFUNC)hfs_readonly_op },		/* create (READONLY) */
+    { &vnop_mknod_desc, (VOPFUNC)hfs_readonly_op },             /* mknod (READONLY) */
+    { &vnop_open_desc, (VOPFUNC)hfs_vnop_open },			/* open */
+    { &vnop_close_desc, (VOPFUNC)hfs_vnop_close },		/* close */
+    { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },		/* getattr */
+    { &vnop_setattr_desc, (VOPFUNC)hfs_readonly_op },		/* setattr */
+    { &vnop_read_desc, (VOPFUNC)hfs_vnop_read },			/* read */
+    { &vnop_write_desc, (VOPFUNC)hfs_readonly_op },		/* write (READONLY) */
+    { &vnop_ioctl_desc, (VOPFUNC)hfs_vnop_ioctl },		/* ioctl */
+    { &vnop_select_desc, (VOPFUNC)hfs_vnop_select },		/* select */
+    { &vnop_revoke_desc, (VOPFUNC)nop_revoke },			/* revoke */
+    { &vnop_exchange_desc, (VOPFUNC)hfs_readonly_op },		/* exchange  (READONLY)*/
+    { &vnop_mmap_desc, (VOPFUNC)err_mmap },			/* mmap */
+    { &vnop_fsync_desc, (VOPFUNC)hfs_readonly_op},		/* fsync (READONLY) */
+    { &vnop_remove_desc, (VOPFUNC)hfs_readonly_op },		/* remove (READONLY) */
+    { &vnop_link_desc, (VOPFUNC)hfs_readonly_op },			/* link ( READONLLY) */
+    { &vnop_rename_desc, (VOPFUNC)hfs_readonly_op },		/* rename (READONLY)*/
+    { &vnop_mkdir_desc, (VOPFUNC)hfs_readonly_op },             /* mkdir (READONLY) */
+    { &vnop_rmdir_desc, (VOPFUNC)hfs_readonly_op },		/* rmdir (READONLY) */
+    { &vnop_symlink_desc, (VOPFUNC)hfs_readonly_op },         /* symlink (READONLY) */
+    { &vnop_readdir_desc, (VOPFUNC)hfs_vnop_readdir },		/* readdir */
+    { &vnop_readdirattr_desc, (VOPFUNC)hfs_vnop_readdirattr },	/* readdirattr */
+    { &vnop_readlink_desc, (VOPFUNC)hfs_vnop_readlink },		/* readlink */
+    { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },		/* inactive */
+    { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },		/* reclaim */
+    { &vnop_strategy_desc, (VOPFUNC)hfs_vnop_strategy },		/* strategy */
+    { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf },		/* pathconf */
+    { &vnop_advlock_desc, (VOPFUNC)err_advlock },		/* advlock */
+    { &vnop_allocate_desc, (VOPFUNC)hfs_readonly_op },		/* allocate (READONLY) */
+#if CONFIG_SEARCHFS
+    { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search },		/* search fs */
+#else
+    { &vnop_searchfs_desc, (VOPFUNC)err_searchfs },		/* search fs */
+#endif
+    { &vnop_bwrite_desc, (VOPFUNC)hfs_readonly_op },		/* bwrite (READONLY) */
+    { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },		/* pagein */
+    { &vnop_pageout_desc,(VOPFUNC) hfs_readonly_op },		/* pageout (READONLY)  */
+    { &vnop_copyfile_desc, (VOPFUNC)hfs_readonly_op },		/* copyfile (READONLY)*/
+    { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },		/* blktooff */
+    { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },		/* offtoblk */
+    { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap },			/* blockmap */
+    { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr},
+    { &vnop_setxattr_desc, (VOPFUNC)hfs_readonly_op},         /* set xattr (READONLY) */
+    { &vnop_removexattr_desc, (VOPFUNC)hfs_readonly_op},      /* remove xattr (READONLY) */
+    { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
+#if NAMEDSTREAMS
+    { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream },
+    { &vnop_makenamedstream_desc, (VOPFUNC)hfs_readonly_op }, 
+    { &vnop_removenamedstream_desc, (VOPFUNC)hfs_readonly_op },
+#endif
+    { &vnop_getattrlistbulk_desc, (VOPFUNC)hfs_vnop_getattrlistbulk },	/* getattrlistbulk */
+    { NULL, (VOPFUNC)NULL }
+};
+
+const struct vnodeopv_desc hfs_std_vnodeop_opv_desc =
+{ &hfs_std_vnodeop_p, hfs_standard_vnodeop_entries };
+#endif
+
+/* VNOP table for HFS+ */
+const struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
+    { &vnop_default_desc, (VOPFUNC)vn_default_error },
+    { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup },		/* lookup */
+    { &vnop_create_desc, (VOPFUNC)hfs_vnop_create },		/* create */
+    { &vnop_mknod_desc, (VOPFUNC)hfs_vnop_mknod },             /* mknod */
+    { &vnop_open_desc, (VOPFUNC)hfs_vnop_open },			/* open */
+    { &vnop_close_desc, (VOPFUNC)hfs_vnop_close },		/* close */
+    { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },		/* getattr */
+    { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr },		/* setattr */
+    { &vnop_read_desc, (VOPFUNC)hfs_vnop_read },			/* read */
+    { &vnop_write_desc, (VOPFUNC)hfs_vnop_write },		/* write */
+    { &vnop_ioctl_desc, (VOPFUNC)hfs_vnop_ioctl },		/* ioctl */
+    { &vnop_select_desc, (VOPFUNC)hfs_vnop_select },		/* select */
+    { &vnop_revoke_desc, (VOPFUNC)nop_revoke },			/* revoke */
+    { &vnop_exchange_desc, (VOPFUNC)hfs_vnop_exchange },		/* exchange */
+    { &vnop_mmap_desc, (VOPFUNC)hfs_vnop_mmap },			/* mmap */
+    { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },		/* fsync */
+    { &vnop_remove_desc, (VOPFUNC)hfs_vnop_remove },		/* remove */
+    { &vnop_link_desc, (VOPFUNC)hfs_vnop_link },			/* link */
+    { &vnop_rename_desc, (VOPFUNC)hfs_vnop_rename },		/* rename */
+    { &vnop_renamex_desc, (VOPFUNC)hfs_vnop_renamex },		/* renamex (with flags) */
+    { &vnop_mkdir_desc, (VOPFUNC)hfs_vnop_mkdir },             /* mkdir */
+    { &vnop_rmdir_desc, (VOPFUNC)hfs_vnop_rmdir },		/* rmdir */
+    { &vnop_symlink_desc, (VOPFUNC)hfs_vnop_symlink },         /* symlink */
+    { &vnop_readdir_desc, (VOPFUNC)hfs_vnop_readdir },		/* readdir */
+    { &vnop_readdirattr_desc, (VOPFUNC)hfs_vnop_readdirattr },	/* readdirattr */
+    { &vnop_readlink_desc, (VOPFUNC)hfs_vnop_readlink },		/* readlink */
+    { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },		/* inactive */
+    { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },		/* reclaim */
+    { &vnop_strategy_desc, (VOPFUNC)hfs_vnop_strategy },		/* strategy */
+    { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf },		/* pathconf */
+    { &vnop_advlock_desc, (VOPFUNC)err_advlock },		/* advlock */
+    { &vnop_allocate_desc, (VOPFUNC)hfs_vnop_allocate },		/* allocate */
+#if CONFIG_SEARCHFS
+    { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search },		/* search fs */
+#else
+    { &vnop_searchfs_desc, (VOPFUNC)err_searchfs },		/* search fs */
+#endif
+    { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },		/* bwrite */
+    { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },		/* pagein */
+    { &vnop_pageout_desc,(VOPFUNC) hfs_vnop_pageout },		/* pageout */
+    { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },		/* copyfile */
+    { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },		/* blktooff */
+    { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },		/* offtoblk */
+    { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap },			/* blockmap */
+    { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr},
+    { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr},
+    { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr},
+    { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
+#if NAMEDSTREAMS
+    { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream },
+    { &vnop_makenamedstream_desc, (VOPFUNC)hfs_vnop_makenamedstream },
+    { &vnop_removenamedstream_desc, (VOPFUNC)hfs_vnop_removenamedstream },
+#endif
+    { &vnop_getattrlistbulk_desc, (VOPFUNC)hfs_vnop_getattrlistbulk },	/* getattrlistbulk */
+	{ &vnop_mnomap_desc, (VOPFUNC)hfs_vnop_mnomap },
+    { NULL, (VOPFUNC)NULL }
+};
+
+const struct vnodeopv_desc hfs_vnodeop_opv_desc =
+{ &hfs_vnodeop_p, hfs_vnodeop_entries };
+
+
+/* Spec Op vnop table for HFS+ */
+int (**hfs_specop_p)(void *);
+const struct vnodeopv_entry_desc hfs_specop_entries[] = {
+	{ &vnop_default_desc, (VOPFUNC)vn_default_error },
+	{ &vnop_lookup_desc, (VOPFUNC)spec_lookup },		/* lookup */
+	{ &vnop_create_desc, (VOPFUNC)spec_create },		/* create */
+	{ &vnop_mknod_desc, (VOPFUNC)spec_mknod },              /* mknod */
+	{ &vnop_open_desc, (VOPFUNC)spec_open },			/* open */
+	{ &vnop_close_desc, (VOPFUNC)hfsspec_close },		/* close */
+	{ &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },	/* getattr */
+	{ &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr },	/* setattr */
+	{ &vnop_read_desc, (VOPFUNC)hfsspec_read },		/* read */
+	{ &vnop_write_desc, (VOPFUNC)hfsspec_write },		/* write */
+	{ &vnop_ioctl_desc, (VOPFUNC)spec_ioctl },		/* ioctl */
+	{ &vnop_select_desc, (VOPFUNC)spec_select },		/* select */
+	{ &vnop_revoke_desc, (VOPFUNC)spec_revoke },		/* revoke */
+	{ &vnop_mmap_desc, (VOPFUNC)spec_mmap },			/* mmap */
+	{ &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },		/* fsync */
+	{ &vnop_remove_desc, (VOPFUNC)spec_remove },		/* remove */
+	{ &vnop_link_desc, (VOPFUNC)spec_link },			/* link */
+	{ &vnop_rename_desc, (VOPFUNC)spec_rename },		/* rename */
+	{ &vnop_mkdir_desc, (VOPFUNC)spec_mkdir },              /* mkdir */
+	{ &vnop_rmdir_desc, (VOPFUNC)spec_rmdir },		/* rmdir */
+	{ &vnop_symlink_desc, (VOPFUNC)spec_symlink },          /* symlink */
+	{ &vnop_readdir_desc, (VOPFUNC)spec_readdir },		/* readdir */
+	{ &vnop_readlink_desc, (VOPFUNC)spec_readlink },		/* readlink */
+	{ &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },	/* inactive */
+	{ &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },	/* reclaim */
+	{ &vnop_strategy_desc, (VOPFUNC)spec_strategy },		/* strategy */
+	{ &vnop_pathconf_desc, (VOPFUNC)spec_pathconf },		/* pathconf */
+	{ &vnop_advlock_desc, (VOPFUNC)err_advlock },		/* advlock */
+	{ &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },
+	{ &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },		/* Pagein */
+	{ &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout },	/* Pageout */
+    { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },		/* copyfile */
+	{ &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },	/* blktooff */
+	{ &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },	/* offtoblk */
+	{ &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr},
+	{ &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr},
+	{ &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr},
+	{ &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
+	{ (struct vnodeop_desc*)NULL, (VOPFUNC)NULL }
+};
+const struct vnodeopv_desc hfs_specop_opv_desc =
+	{ &hfs_specop_p, hfs_specop_entries };
+
+#if FIFO
+/* HFS+ FIFO VNOP table  */
+int (**hfs_fifoop_p)(void *);
+const struct vnodeopv_entry_desc hfs_fifoop_entries[] = {
+	{ &vnop_default_desc, (VOPFUNC)vn_default_error },
+	{ &vnop_lookup_desc, (VOPFUNC)fifo_lookup },		/* lookup */
+	{ &vnop_create_desc, (VOPFUNC)fifo_create },		/* create */
+	{ &vnop_mknod_desc, (VOPFUNC)fifo_mknod },              /* mknod */
+	{ &vnop_open_desc, (VOPFUNC)fifo_open },			/* open */
+	{ &vnop_close_desc, (VOPFUNC)hfsfifo_close },		/* close */
+	{ &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr },	/* getattr */
+	{ &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr },	/* setattr */
+	{ &vnop_read_desc, (VOPFUNC)hfsfifo_read },		/* read */
+	{ &vnop_write_desc, (VOPFUNC)hfsfifo_write },		/* write */
+	{ &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl },		/* ioctl */
+	{ &vnop_select_desc, (VOPFUNC)fifo_select },		/* select */
+	{ &vnop_revoke_desc, (VOPFUNC)fifo_revoke },		/* revoke */
+	{ &vnop_mmap_desc, (VOPFUNC)fifo_mmap },			/* mmap */
+	{ &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },		/* fsync */
+	{ &vnop_remove_desc, (VOPFUNC)fifo_remove },		/* remove */
+	{ &vnop_link_desc, (VOPFUNC)fifo_link },			/* link */
+	{ &vnop_rename_desc, (VOPFUNC)fifo_rename },		/* rename */
+	{ &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir },              /* mkdir */
+	{ &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir },		/* rmdir */
+	{ &vnop_symlink_desc, (VOPFUNC)fifo_symlink },          /* symlink */
+	{ &vnop_readdir_desc, (VOPFUNC)fifo_readdir },		/* readdir */
+	{ &vnop_readlink_desc, (VOPFUNC)fifo_readlink },		/* readlink */
+	{ &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive },	/* inactive */
+	{ &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim },	/* reclaim */
+	{ &vnop_strategy_desc, (VOPFUNC)fifo_strategy },		/* strategy */
+	{ &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf },		/* pathconf */
+	{ &vnop_advlock_desc, (VOPFUNC)err_advlock },		/* advlock */
+	{ &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite },
+	{ &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein },		/* Pagein */
+	{ &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout },	/* Pageout */
+	{ &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, 		/* copyfile */
+	{ &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff },	/* blktooff */
+	{ &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk },	/* offtoblk */
+  	{ &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap },		/* blockmap */
+	{ &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr},
+	{ &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr},
+	{ &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr},
+	{ &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr},
+	{ (struct vnodeop_desc*)NULL, (VOPFUNC)NULL }
+};
+const struct vnodeopv_desc hfs_fifoop_opv_desc =
+	{ &hfs_fifoop_p, hfs_fifoop_entries };
+#endif /* FIFO */
diff --git a/core/hfs_xattr.c b/core/hfs_xattr.c
new file mode 100644
index 0000000..0d3dcd3
--- /dev/null
+++ b/core/hfs_xattr.c
@@ -0,0 +1,2633 @@
+/*
+ * Copyright (c) 2004-2017 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/ubc.h>
+#include <sys/utfconv.h>
+#include <sys/vnode.h>
+#include <sys/xattr.h>
+#include <sys/fcntl.h>
+#include <sys/fsctl.h>
+#include <sys/kauth.h>
+
+#include "hfs.h"
+#include "hfs_cnode.h"
+#include "hfs_mount.h"
+#include "hfs_format.h"
+#include "hfs_endian.h"
+#include "hfs_btreeio.h"
+#include "hfs_fsctl.h"
+#include "hfs_cprotect.h"
+
+#include "BTreesInternal.h"
+
+#define HFS_XATTR_VERBOSE  0
+
+#define  ATTRIBUTE_FILE_NODE_SIZE   8192
+
+
+/* State information for the listattr_callback callback function. */
+struct listattr_callback_state {
+	u_int32_t   fileID;
+	int         result;
+	uio_t       uio;
+	size_t      size;
+#if HFS_COMPRESSION
+	int         showcompressed;
+	vfs_context_t ctx;
+	vnode_t     vp;
+#endif /* HFS_COMPRESSION */
+};
+
+
+/* HFS Internal Names */
+#define XATTR_XATTREXTENTS_NAME	      "system.xattrextents"
+
+static u_int32_t emptyfinfo[8] = {0};
+
+static int hfs_zero_hidden_fields (struct cnode *cp, u_int8_t *finderinfo); 
+
+const char hfs_attrdatafilename[] = "Attribute Data";
+
+static int  listattr_callback(const HFSPlusAttrKey *key, const HFSPlusAttrData *data,
+                       struct listattr_callback_state *state);
+
+static int  remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator);
+
+static int  getnodecount(struct hfsmount *hfsmp, size_t nodesize);
+
+static size_t  getmaxinlineattrsize(struct vnode * attrvp);
+
+static int  read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents);
+
+static int  write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents);
+
+static int  alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, HFSPlusExtentDescriptor *extents, int *blocks);
+
+static void  free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *extents);
+
+static int  has_overflow_extents(HFSPlusForkData *forkdata);
+
+static int  count_extent_blocks(int maxblks, HFSPlusExtentRecord extents);
+
+#if NAMEDSTREAMS
+/*
+ * Obtain the vnode for a stream.
+ */
+int
+hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap)
+{
+	vnode_t vp = ap->a_vp;
+	vnode_t *svpp = ap->a_svpp;
+	struct cnode *cp;
+	int error = 0;
+
+	*svpp = NULL;
+
+	/*
+	 * We only support the "com.apple.ResourceFork" stream.
+	 */
+	if (strcmp(ap->a_name, XATTR_RESOURCEFORK_NAME) != 0) {
+		return (ENOATTR);
+	}
+	cp = VTOC(vp);
+	if ( !S_ISREG(cp->c_mode) ) {
+		return (EPERM);
+	}
+#if HFS_COMPRESSION
+	int hide_rsrc = hfs_hides_rsrc(ap->a_context, VTOC(vp), 1);
+#endif /* HFS_COMPRESSION */
+	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		return (error);
+	}
+	if ((!hfs_has_rsrc(cp)
+#if HFS_COMPRESSION
+	     || hide_rsrc
+#endif /* HFS_COMPRESSION */
+	     ) && (ap->a_operation != NS_OPEN)) {
+		hfs_unlock(cp);
+		return (ENOATTR);
+	}
+	error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp);
+	hfs_unlock(cp);
+
+	return (error);
+}
+
+/*
+ * Create a stream.
+ */
+int
+hfs_vnop_makenamedstream(struct vnop_makenamedstream_args* ap)
+{
+	vnode_t vp = ap->a_vp;
+	vnode_t *svpp = ap->a_svpp;
+	struct cnode *cp;
+	int error = 0;
+
+	*svpp = NULL;
+
+	/*
+	 * We only support the "com.apple.ResourceFork" stream.
+	 */
+	if (strcmp(ap->a_name, XATTR_RESOURCEFORK_NAME) != 0) {
+		return (ENOATTR);
+	}
+	cp = VTOC(vp);
+	if ( !S_ISREG(cp->c_mode) ) {
+		return (EPERM);
+	}
+#if HFS_COMPRESSION
+	if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) {
+		if (VNODE_IS_RSRC(vp)) {
+			return EINVAL;
+		} else {
+			error = decmpfs_decompress_file(vp, VTOCMP(vp), -1, 1, 0);
+			if (error != 0)
+				return error;
+		}
+	}
+#endif /* HFS_COMPRESSION */
+	if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		return (error);
+	}
+	error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp);
+	hfs_unlock(cp);
+
+	return (error);
+}
+
+/*
+ * Remove a stream.
+ */
+int
+hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap)
+{
+	vnode_t svp = ap->a_svp;
+	cnode_t *scp = VTOC(svp);
+	int error = 0;
+
+	/*
+	 * We only support the "com.apple.ResourceFork" stream.
+	 */
+	if (strcmp(ap->a_name, XATTR_RESOURCEFORK_NAME) != 0) {
+		return (ENOATTR);
+	}
+#if HFS_COMPRESSION
+	if (hfs_hides_rsrc(ap->a_context, scp, 1)) {
+		/* do nothing */
+		return 0;
+	}
+#endif /* HFS_COMPRESSION */
+
+	hfs_lock_truncate(scp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	if (VTOF(svp)->ff_size) {
+		// hfs_truncate will deal with the cnode lock
+		error = hfs_truncate(svp, 0, IO_NDELAY, 0, ap->a_context);
+	}
+	hfs_unlock_truncate(scp, HFS_LOCK_DEFAULT);
+
+	return error;
+}
+#endif
+
+
+/* Zero out the date added field for the specified cnode */
+static int hfs_zero_hidden_fields (struct cnode *cp, u_int8_t *finderinfo) 
+{
+	u_int8_t *finfo = finderinfo;
+    
+	/* Advance finfo by 16 bytes to the 2nd half of the finderinfo */
+	finfo = finfo + 16;
+	
+	if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+		extinfo->document_id = 0;
+		extinfo->date_added = 0;
+		extinfo->write_gen_counter = 0;
+	} else if (S_ISDIR(cp->c_attr.ca_mode)) {
+		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+		extinfo->document_id = 0;
+		extinfo->date_added = 0;
+		extinfo->write_gen_counter = 0;
+	} else {
+		/* Return an error */
+		return -1;
+	}
+	return 0;
+    
+}
+
+/*
+ * Retrieve the data of an extended attribute.
+ */
+int
+hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
+/*
+	struct vnop_getxattr_args {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		char * a_name;
+		uio_t a_uio;
+		size_t *a_size;
+		int a_options;
+		vfs_context_t a_context;
+	};
+*/
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct hfsmount *hfsmp;
+	uio_t uio = ap->a_uio;
+	size_t bufsize;
+	int result;
+
+	cp = VTOC(vp);
+	if (vp == cp->c_vp) {
+#if HFS_COMPRESSION
+		int decmpfs_hide = hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1); /* 1 == don't take the cnode lock */
+		if (decmpfs_hide && !(ap->a_options & XATTR_SHOWCOMPRESSION))
+				return ENOATTR;
+#endif /* HFS_COMPRESSION */
+		
+		/* Get the Finder Info. */
+		if (strcmp(ap->a_name, XATTR_FINDERINFO_NAME) == 0) {
+			u_int8_t finderinfo[32];
+			bufsize = 32;
+
+			if ((result = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+				return (result);
+			}
+			/* Make a copy since we may not export all of it. */
+			bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo));
+			hfs_unlock(cp);
+			
+			/* Zero out the date added field in the local copy */
+			hfs_zero_hidden_fields (cp, finderinfo);
+
+			/* Don't expose a symlink's private type/creator. */
+			if (vnode_islnk(vp)) {
+				struct FndrFileInfo *fip;
+
+				fip = (struct FndrFileInfo *)&finderinfo;
+				fip->fdType = 0;
+				fip->fdCreator = 0;
+			}
+			/* If Finder Info is empty then it doesn't exist. */
+			if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) == 0) {
+				return (ENOATTR);
+			}
+			if (uio == NULL) {
+				*ap->a_size = bufsize;
+				return (0);
+			}
+			if ((user_size_t)uio_resid(uio) < bufsize)
+				return (ERANGE);
+
+			result = uiomove((caddr_t)&finderinfo , bufsize, uio);
+
+			return (result);
+		}
+		/* Read the Resource Fork. */
+		if (strcmp(ap->a_name, XATTR_RESOURCEFORK_NAME) == 0) {
+			struct vnode *rvp = NULL;
+			int openunlinked = 0;
+			int namelen = 0;
+
+			if ( !S_ISREG(cp->c_mode) ) {
+				return (EPERM);
+			}
+			if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+				return (result);
+			}
+			namelen = cp->c_desc.cd_namelen;
+
+			if (!hfs_has_rsrc(cp)) {
+				hfs_unlock(cp);
+				return (ENOATTR);
+			}
+			hfsmp = VTOHFS(vp);
+			if ((cp->c_flag & C_DELETED) && (namelen == 0)) {
+				openunlinked = 1;
+			}
+			
+			result = hfs_vgetrsrc(hfsmp, vp, &rvp);
+			hfs_unlock(cp);
+			if (result) {
+				return (result);
+			}
+			if (uio == NULL) {
+				*ap->a_size = (size_t)VTOF(rvp)->ff_size;
+			} else {
+#if HFS_COMPRESSION
+				user_ssize_t uio_size = 0;
+				if (decmpfs_hide)
+					uio_size = uio_resid(uio);
+#endif /* HFS_COMPRESSION */
+				result = VNOP_READ(rvp, uio, 0, ap->a_context);
+#if HFS_COMPRESSION
+				if (decmpfs_hide &&
+				    (result == 0) &&
+				    (uio_resid(uio) == uio_size)) {
+					/*
+					 * We intentionally make the above call to VNOP_READ so that
+					 * it can return an authorization/permission/etc. Error
+					 * based on ap->a_context and thus deny this operation;
+					 * in that case, result != 0 and we won't proceed.
+					 * 
+					 * However, if result == 0, it will have returned no data
+					 * because hfs_vnop_read hid the resource fork
+					 * (hence uio_resid(uio) == uio_size, i.e. the uio is untouched)
+					 * 
+					 * In that case, we try again with the decmpfs_ctx context
+					 * to get the actual data
+					 */
+					result = VNOP_READ(rvp, uio, 0, decmpfs_ctx);
+				}
+#endif /* HFS_COMPRESSION */
+			}
+			/* force the rsrc fork vnode to recycle right away */
+			if (openunlinked) {
+				int vref;
+				vref = vnode_ref (rvp);
+				if (vref == 0) {
+					vnode_rele (rvp);
+				}
+				vnode_recycle(rvp);
+			}
+			vnode_put(rvp);
+			return (result);
+		}
+	}
+	hfsmp = VTOHFS(vp);
+#if CONFIG_HFS_STD
+	/*
+	 * Standard HFS only supports native FinderInfo and Resource Forks.
+	 */
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return (EPERM);
+	}
+#endif
+
+	if ((result = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+		return (result);
+	}
+	
+	/* Check for non-rsrc, non-finderinfo EAs */
+	result = hfs_getxattr_internal (cp, ap, VTOHFS(cp->c_vp), 0);
+
+	hfs_unlock(cp);
+	
+	return MacToVFSError(result);
+}
+
+// Has same limitations as hfs_getxattr_internal below
+int hfs_xattr_read(vnode_t vp, const char *name, void *data, size_t *size)
+{
+	uio_t uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
+
+	uio_addiov(uio, CAST_USER_ADDR_T(data), *size);
+
+	struct vnop_getxattr_args args = {
+		.a_uio = uio,
+		.a_name = name,
+		.a_size = size
+	};
+
+	int ret = hfs_getxattr_internal(VTOC(vp), &args, VTOHFS(vp), 0);
+
+	uio_free(uio);
+
+	return ret;
+}
+
+/*
+ * getxattr_internal
+ *
+ * We break out this internal function which searches the attributes B-Tree and the 
+ * overflow extents file to find non-resource, non-finderinfo EAs.  There may be cases 
+ * where we need to get EAs in contexts where we are already holding the cnode lock, 
+ * and to re-enter hfs_vnop_getxattr would cause us to double-lock the cnode.  Instead, 
+ * we can just directly call this function.
+ *
+ * We pass the hfsmp argument directly here because we may not necessarily have a cnode to
+ * operate on.  Under normal conditions, we have a file or directory to query, but if we
+ * are operating on the root directory (id 1), then we may not have a cnode.  In this case, if hte
+ * 'cp' argument is NULL, then we need to use the 'fileid' argument as the entry to manipulate
+ *
+ * NOTE: This function assumes the cnode lock for 'cp' is held exclusive or shared. 
+ */ 
+int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, 
+		struct hfsmount *hfsmp, u_int32_t fileid) 
+{
+	
+	struct filefork *btfile;
+	struct BTreeIterator * iterator = NULL;
+	size_t bufsize = 0;
+	HFSPlusAttrRecord *recp = NULL;
+	size_t recp_size = 0;
+	FSBufferDescriptor btdata;
+	int lockflags = 0;
+	int result = 0;
+	u_int16_t datasize = 0;
+	uio_t uio = ap->a_uio;
+	u_int32_t target_id = 0;
+
+	if (cp) {
+		target_id = cp->c_fileid;
+	} else {
+		target_id = fileid;
+	}
+
+
+	/* Bail if we don't have an EA B-Tree. */
+	if ((hfsmp->hfs_attribute_vp == NULL) ||
+	   ((cp) &&  (cp->c_attr.ca_recflags & kHFSHasAttributesMask) == 0)) {
+		result = ENOATTR;
+		goto exit;
+	}
+	
+	/* Initialize the B-Tree iterator for searching for the proper EA */
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+	
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	/* Allocate memory for reading in the attribute record.  This buffer is 
+	 * big enough to read in all types of attribute records.  It is not big 
+	 * enough to read inline attribute data which is read in later.
+	 */
+	recp = hfs_malloc(recp_size = sizeof(HFSPlusAttrRecord));
+	btdata.bufferAddress = recp;
+	btdata.itemSize = sizeof(HFSPlusAttrRecord);
+	btdata.itemCount = 1;
+
+	result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+	if (result) {
+		goto exit;
+	}
+
+	/* Lookup the attribute in the Attribute B-Tree */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+	result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	
+	if (result) {
+		if (result == btNotFound) {
+			result = ENOATTR;
+		}
+		goto exit;
+	}
+	
+	/* 
+	 * Operate differently if we have inline EAs that can fit in the attribute B-Tree or if
+	 * we have extent based EAs.
+	 */
+	switch (recp->recordType) {
+
+		/* Attribute fits in the Attribute B-Tree */
+		case kHFSPlusAttrInlineData: {
+			/*
+			 * Sanity check record size. It's not required to have any
+			 * user data, so the minimum size is 2 bytes less that the
+			 * size of HFSPlusAttrData (since HFSPlusAttrData struct
+			 * has 2 bytes set aside for attribute data).
+			 */
+			if (datasize < (sizeof(HFSPlusAttrData) - 2)) {
+				printf("hfs_getxattr: vol=%s %d,%s invalid record size %d (expecting %lu)\n", 
+					   hfsmp->vcbVN, target_id, ap->a_name, datasize, sizeof(HFSPlusAttrData));
+				result = ENOATTR;
+				break;
+			}
+			*ap->a_size = recp->attrData.attrSize;
+			if (uio && recp->attrData.attrSize != 0) {
+				if (*ap->a_size > (user_size_t)uio_resid(uio)) {
+					/* User provided buffer is not large enough for the xattr data */
+					result = ERANGE;
+				} else {
+					/* Previous BTreeSearchRecord() read in only the attribute record, 
+					 * and not the attribute data.  Now allocate enough memory for 
+					 * both attribute record and data, and read the attribute record again. 
+					 */
+					bufsize = sizeof(HFSPlusAttrData) - 2 + recp->attrData.attrSize;
+					hfs_free(recp, recp_size);
+					recp = hfs_malloc(recp_size = bufsize);
+
+					btdata.bufferAddress = recp;
+					btdata.itemSize = bufsize;
+					btdata.itemCount = 1;
+
+					bzero(iterator, sizeof(*iterator));
+					result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+					if (result) {
+						goto exit;
+					}
+
+					/* Lookup the attribute record and inline data */
+					lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+					result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
+					hfs_systemfile_unlock(hfsmp, lockflags);
+					if (result) {
+						if (result == btNotFound) {
+							result = ENOATTR;
+						}
+						goto exit;
+					}
+
+					/* Copy-out the attribute data to the user buffer */
+					*ap->a_size = recp->attrData.attrSize;
+					result = uiomove((caddr_t) &recp->attrData.attrData , recp->attrData.attrSize, uio);
+				}
+			}
+			break;
+		}
+
+		/* Extent-Based EAs */
+		case kHFSPlusAttrForkData: {
+			if (datasize < sizeof(HFSPlusAttrForkData)) {
+				printf("hfs_getxattr: vol=%s %d,%s invalid record size %d (expecting %lu)\n", 
+					   hfsmp->vcbVN, target_id, ap->a_name, datasize, sizeof(HFSPlusAttrForkData));
+				result = ENOATTR;
+				break;
+			}
+			*ap->a_size = recp->forkData.theFork.logicalSize;
+			if (uio == NULL) {
+				break;
+			}
+			if (*ap->a_size > (user_size_t)uio_resid(uio)) {
+				result = ERANGE;
+				break;
+			}
+			/* Process overflow extents if necessary. */
+			if (has_overflow_extents(&recp->forkData.theFork)) {
+				HFSPlusExtentDescriptor *extentbuf;
+				HFSPlusExtentDescriptor *extentptr;
+				size_t extentbufsize;
+				u_int32_t totalblocks;
+				u_int32_t blkcnt;
+				u_int32_t attrlen;
+				
+				totalblocks = recp->forkData.theFork.totalBlocks;
+				/* Ignore bogus block counts. */
+				if (totalblocks > howmany(HFS_XATTR_MAXSIZE, hfsmp->blockSize)) {
+					result = ERANGE;
+					break;
+				}
+				attrlen = recp->forkData.theFork.logicalSize;
+				
+				/* Get a buffer to hold the worst case amount of extents. */
+				extentbufsize = totalblocks * sizeof(HFSPlusExtentDescriptor);
+				extentbufsize = roundup(extentbufsize, sizeof(HFSPlusExtentRecord));
+				extentbuf = hfs_mallocz(extentbufsize);
+				extentptr = extentbuf;
+				
+				/* Grab the first 8 extents. */
+				bcopy(&recp->forkData.theFork.extents[0], extentptr, sizeof(HFSPlusExtentRecord));
+				extentptr += kHFSPlusExtentDensity;
+				blkcnt = count_extent_blocks(totalblocks, recp->forkData.theFork.extents);
+				
+				/* Now lookup the overflow extents. */
+				lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+				while (blkcnt < totalblocks) {
+					((HFSPlusAttrKey *)&iterator->key)->startBlock = blkcnt;
+					result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
+					if (result ||
+						(recp->recordType != kHFSPlusAttrExtents) ||
+						(datasize < sizeof(HFSPlusAttrExtents))) {
+						printf("hfs_getxattr: %s missing extents, only %d blks of %d found\n",
+							   ap->a_name, blkcnt, totalblocks);
+						result = ENOATTR;
+						break;   /* break from while */
+					}
+					/* Grab the next 8 extents. */
+					bcopy(&recp->overflowExtents.extents[0], extentptr, sizeof(HFSPlusExtentRecord));
+					extentptr += kHFSPlusExtentDensity;
+					blkcnt += count_extent_blocks(totalblocks, recp->overflowExtents.extents);
+				}
+				
+				/* Release Attr B-Tree lock */
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				
+				if (blkcnt < totalblocks) {
+					result = ENOATTR;
+				} else {
+					result = read_attr_data(hfsmp, uio, attrlen, extentbuf);
+				}
+				hfs_free(extentbuf, extentbufsize);
+				
+			} else { /* No overflow extents. */
+				result = read_attr_data(hfsmp, uio, recp->forkData.theFork.logicalSize, recp->forkData.theFork.extents);
+			}
+			break;
+		}
+			
+		default:
+			/* We only support Extent or inline EAs.  Default to ENOATTR for anything else */
+			result = ENOATTR;
+			break;		
+	}
+	
+exit:	
+	hfs_free(iterator, sizeof(*iterator));
+	hfs_free(recp, recp_size);
+	
+	return result;
+	
+}
+
+
+/*
+ * Set the data of an extended attribute.
+ */
+int
+hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
+/*
+	struct vnop_setxattr_args {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		char * a_name;
+		uio_t a_uio;
+		int a_options;
+		vfs_context_t a_context;
+	};
+*/
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp = NULL;
+	struct hfsmount *hfsmp;
+	uio_t uio = ap->a_uio;
+	size_t attrsize;
+	void * user_data_ptr = NULL;
+	int result;
+	time_t orig_ctime=VTOC(vp)->c_ctime;
+
+	if (ap->a_name == NULL || ap->a_name[0] == '\0') {
+		return (EINVAL);  /* invalid name */
+	}
+	hfsmp = VTOHFS(vp);
+	if (VNODE_IS_RSRC(vp)) {
+		return (EPERM);
+	}
+
+#if HFS_COMPRESSION
+	if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) ) { /* 1 == don't take the cnode lock */
+		result = decmpfs_decompress_file(vp, VTOCMP(vp), -1, 1, 0);
+		if (result != 0)
+			return result;
+	}
+#endif /* HFS_COMPRESSION */
+
+	nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_WRITE_OP, NSPACE_REARM_NO_ARG);
+
+	/* Set the Finder Info. */
+	if (strcmp(ap->a_name, XATTR_FINDERINFO_NAME) == 0) {
+		union {
+			uint8_t data[32];
+			char cdata[32];
+			struct FndrFileInfo info;
+		} fi;
+		void * finderinfo_start;
+		u_int8_t *finfo = NULL;
+		u_int16_t fdFlags;
+		u_int32_t dateadded = 0;
+		u_int32_t write_gen_counter = 0;
+		u_int32_t document_id = 0;
+
+		attrsize = sizeof(VTOC(vp)->c_finderinfo);
+
+		if ((user_size_t)uio_resid(uio) != attrsize) {
+			return (ERANGE);
+		}
+		/* Grab the new Finder Info data. */
+		if ((result = uiomove(fi.cdata, attrsize, uio))) {
+			return (result);
+		}
+
+		if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			return (result);
+		}
+		cp = VTOC(vp);
+
+		/* Symlink's don't have an external type/creator. */
+		if (vnode_islnk(vp)) {
+			/* Skip over type/creator fields. */
+			finderinfo_start = &cp->c_finderinfo[8];
+			attrsize -= 8;
+		} else {
+			finderinfo_start = &cp->c_finderinfo[0];
+			/*
+			 * Don't allow the external setting of
+			 * file type to kHardLinkFileType.
+			 */
+			if (fi.info.fdType == SWAP_BE32(kHardLinkFileType)) {
+				hfs_unlock(cp);
+				return (EPERM);
+			} 
+		}
+
+		/* Grab the current date added from the cnode */
+		dateadded = hfs_get_dateadded (cp);
+		if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
+			/* 
+			 * Grab generation counter directly from the cnode 
+			 * instead of calling hfs_get_gencount(), because 
+			 * for zero generation count values hfs_get_gencount() 
+			 * lies and bumps it up to one.
+			 */
+			write_gen_counter = extinfo->write_gen_counter;
+			document_id = extinfo->document_id;
+		} else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((u_int8_t*)cp->c_finderinfo + 16);
+			write_gen_counter = extinfo->write_gen_counter;
+			document_id = extinfo->document_id;
+		}
+
+		/* 
+		 * Zero out the finder info's reserved fields like date added, 
+		 * generation counter, and document id to ignore user's attempts 
+		 * to set it 
+		 */ 
+		hfs_zero_hidden_fields(cp, fi.data);
+
+		if (bcmp(finderinfo_start, emptyfinfo, attrsize)) {
+			/* attr exists and "create" was specified. */
+			if (ap->a_options & XATTR_CREATE) {
+				hfs_unlock(cp);
+				return (EEXIST);
+			}
+		} else { /* empty */
+			/* attr doesn't exists and "replace" was specified. */
+			if (ap->a_options & XATTR_REPLACE) {
+				hfs_unlock(cp);
+				return (ENOATTR);
+			}
+		}
+
+		/* 
+		 * Now restore the date added and other reserved fields to the finderinfo to 
+		 * be written out.  Advance to the 2nd half of the finderinfo to write them 
+		 * out into the buffer.
+		 *
+		 * Make sure to endian swap the date added back into big endian.  When we used
+		 * hfs_get_dateadded above to retrieve it, it swapped into local endianness
+		 * for us.  But now that we're writing it out, put it back into big endian.
+		 */
+		finfo = &fi.data[16];
+		if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+			extinfo->write_gen_counter = write_gen_counter;
+			extinfo->document_id = document_id;
+		} else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+			extinfo->write_gen_counter = write_gen_counter;
+			extinfo->document_id = document_id;
+		}
+
+		/* Set the cnode's Finder Info. */
+		if (attrsize == sizeof(cp->c_finderinfo)) {
+			bcopy(&fi.data[0], finderinfo_start, attrsize);
+		} else {
+			bcopy(&fi.data[8], finderinfo_start, attrsize);
+		}
+	
+		/* Updating finderInfo updates change time and modified time */
+		cp->c_touch_chgtime = TRUE;
+		cp->c_flag |= C_MODIFIED;
+
+		/*
+		 * Mirror the invisible bit to the UF_HIDDEN flag.
+		 *
+		 * The fdFlags for files and frFlags for folders are both 8 bytes
+		 * into the userInfo (the first 16 bytes of the Finder Info).  They
+		 * are both 16-bit fields.
+		 */
+		fdFlags = *((u_int16_t *) &cp->c_finderinfo[8]);
+		if (fdFlags & OSSwapHostToBigConstInt16(kFinderInvisibleMask)) {
+			cp->c_bsdflags |= UF_HIDDEN;
+		} else {
+			cp->c_bsdflags &= ~UF_HIDDEN;
+		}
+
+		result = hfs_update(vp, 0);
+
+		hfs_unlock(cp);
+		return (result);
+	}
+	/* Write the Resource Fork. */
+	if (strcmp(ap->a_name, XATTR_RESOURCEFORK_NAME) == 0) {
+		struct vnode *rvp = NULL;
+		int namelen = 0;
+		int openunlinked = 0;
+
+		if (!vnode_isreg(vp)) {
+			return (EPERM);
+		}
+		if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			return (result);
+		}
+		cp = VTOC(vp);
+		namelen = cp->c_desc.cd_namelen;
+
+		if (hfs_has_rsrc(cp)) {
+			/* attr exists and "create" was specified. */
+			if (ap->a_options & XATTR_CREATE) {
+				hfs_unlock(cp);
+				return (EEXIST);
+			}
+		} else {
+			/* attr doesn't exists and "replace" was specified. */
+			if (ap->a_options & XATTR_REPLACE) {
+				hfs_unlock(cp);
+				return (ENOATTR);
+			}
+		}
+		
+		/*
+		 * Note that we could be called on to grab the rsrc fork vnode
+		 * for a file that has become open-unlinked.
+		 */
+		if ((cp->c_flag & C_DELETED) && (namelen == 0)) {
+			openunlinked = 1;
+		}
+
+		result = hfs_vgetrsrc(hfsmp, vp, &rvp);
+		hfs_unlock(cp);
+		if (result) {
+			return (result);
+		}
+		/* VNOP_WRITE marks cnode as needing a modtime update */
+		result = VNOP_WRITE(rvp, uio, 0, ap->a_context);
+		
+		/* if open unlinked, force it inactive */
+		if (openunlinked) {
+			int vref;
+			vref = vnode_ref (rvp);
+			if (vref == 0) {
+				vnode_rele(rvp);
+			}
+			vnode_recycle (rvp);	
+		} else {
+			/* cnode is not open-unlinked, so re-lock cnode to sync */
+			if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+				vnode_recycle (rvp);
+				vnode_put(rvp);
+				return result;
+			}
+			
+			/* hfs fsync rsrc fork to force to disk and update modtime */
+			result = hfs_fsync (rvp, MNT_NOWAIT, 0, vfs_context_proc (ap->a_context));
+			hfs_unlock (cp);
+		}
+
+		vnode_put(rvp);
+		return (result);
+	}
+#if CONFIG_HFS_STD
+	/*
+	 * Standard HFS only supports native FinderInfo and Resource Forks.
+	 */
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return (EPERM);
+	}
+#endif
+	attrsize = uio_resid(uio);
+
+	/* Enforce an upper limit. */
+	if (attrsize > HFS_XATTR_MAXSIZE) {
+		result = E2BIG;
+		goto exit;
+	}
+
+	/*
+	 * Attempt to copy the users attr data before taking any locks,
+	 * only if it will be an inline attribute.  For larger attributes, 
+	 * the data will be directly read from the uio.
+	 */
+	if (attrsize > 0 &&
+	    hfsmp->hfs_max_inline_attrsize != 0 &&
+	    attrsize < hfsmp->hfs_max_inline_attrsize) {
+		user_data_ptr = hfs_malloc(attrsize);
+
+		result = uiomove((caddr_t)user_data_ptr, attrsize, uio);
+		if (result) {
+			goto exit;
+		}
+	}
+
+	result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+	if (result) {
+		goto exit;
+	}
+	cp = VTOC(vp);
+	
+	/* 
+	 * If we're trying to set a non-finderinfo, non-resourcefork EA, then
+	 * call the breakout function.
+	 */
+	result = hfs_setxattr_internal (cp, user_data_ptr, attrsize, ap, VTOHFS(vp), 0);
+
+ exit:
+	if (cp) {
+		hfs_unlock(cp);
+	}
+	if (user_data_ptr) {
+		hfs_free(user_data_ptr, attrsize);
+	}
+
+	return (result == btNotFound ? ENOATTR : MacToVFSError(result));
+}
+
+// Has same limitations as hfs_setxattr_internal below
+int hfs_xattr_write(vnode_t vp, const char *name, const void *data, size_t size)
+{
+	struct vnop_setxattr_args args = {
+		.a_vp 	= vp,
+		.a_name = name,
+	};
+
+	return hfs_setxattr_internal(VTOC(vp), data, size, &args, VTOHFS(vp), 0);
+}
+
+/*
+ * hfs_setxattr_internal
+ * 
+ * Internal function to set non-rsrc, non-finderinfo EAs to either the attribute B-Tree or
+ * extent-based EAs.
+ *
+ * See comments from hfs_getxattr_internal on why we need to pass 'hfsmp' and fileid here.
+ * The gist is that we could end up writing to the root folder which may not have a cnode.
+ *
+ * Assumptions: 
+ *		1. cnode 'cp' is locked EXCLUSIVE before calling this function.
+ *		2. data_ptr contains data to be written.  If gathering data from userland, this must be
+ *			done before calling this function.  
+ *		3. If data originates entirely in-kernel, use a null UIO, and ensure the size is less than 
+ *			hfsmp->hfs_max_inline_attrsize bytes long. 
+ */ 
+int hfs_setxattr_internal (struct cnode *cp, const void *data_ptr, size_t attrsize,
+						   struct vnop_setxattr_args *ap, struct hfsmount *hfsmp, 
+						   u_int32_t fileid) 
+{
+	uio_t uio = ap->a_uio;
+	struct vnode *vp = ap->a_vp;
+	int started_transaction = 0;
+	struct BTreeIterator * iterator = NULL;
+	struct filefork *btfile = NULL;
+	FSBufferDescriptor btdata;
+	HFSPlusAttrRecord attrdata;  /* 90 bytes */
+	HFSPlusAttrRecord *recp = NULL;
+	size_t recp_size = 0;
+	HFSPlusExtentDescriptor *extentptr = NULL;
+	size_t extentbufsize = 0;
+	int result = 0;
+	int lockflags = 0;
+	int exists = 0;
+	int allocatedblks = 0;
+	u_int32_t target_id;
+
+	if (cp) {
+		target_id = cp->c_fileid;
+	} else {
+		target_id = fileid;
+	}
+	
+	/* Start a transaction for our changes. */
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    result = EINVAL;
+	    goto exit;
+	}
+	started_transaction = 1;
+	
+	/*
+	 * Once we started the transaction, nobody can compete
+	 * with us, so make sure this file is still there.
+	 */
+	if ((cp) && (cp->c_flag & C_NOEXISTS)) {
+		result = ENOENT;
+		goto exit;
+	}
+	
+	/*
+	 * If there isn't an attributes b-tree then create one.
+	 */
+	if (hfsmp->hfs_attribute_vp == NULL) {
+		result = hfs_create_attr_btree(hfsmp, ATTRIBUTE_FILE_NODE_SIZE,
+		                               getnodecount(hfsmp, ATTRIBUTE_FILE_NODE_SIZE));
+		if (result) {
+			goto exit;
+		}
+	}
+	if (hfsmp->hfs_max_inline_attrsize == 0) {
+		hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp);
+	}
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+	/* Build the b-tree key. */
+	iterator = hfs_mallocz(sizeof(*iterator));
+	result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+	if (result) {
+		goto exit;
+	}
+	
+	/* Preflight for replace/create semantics. */
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+	btdata.bufferAddress = &attrdata;
+	btdata.itemSize = sizeof(attrdata);
+	btdata.itemCount = 1;
+	exists = BTSearchRecord(btfile, iterator, &btdata, NULL, NULL) == 0;
+	
+	/* Replace requires that the attribute already exists. */
+	if ((ap->a_options & XATTR_REPLACE) && !exists) {
+		result = ENOATTR;
+		goto exit;	
+	}
+	/* Create requires that the attribute doesn't exist. */
+	if ((ap->a_options & XATTR_CREATE) && exists) {
+		result = EEXIST;
+		goto exit;	
+	}
+	
+	/* If it won't fit inline then use extent-based attributes. */
+	if (attrsize > hfsmp->hfs_max_inline_attrsize) {
+		int blkcnt;
+		int extentblks;
+		u_int32_t *keystartblk;
+		int i;
+		
+		if (uio == NULL) {
+			/*
+			 * setxattrs originating from in-kernel are not supported if they are bigger
+			 * than the inline max size. Just return ENOATTR and force them to do it with a
+			 * smaller EA.
+			 */
+			result = EPERM;
+			goto exit;
+		}
+		
+		/* Get some blocks. */
+		blkcnt = howmany(attrsize, hfsmp->blockSize);
+		extentbufsize = blkcnt * sizeof(HFSPlusExtentDescriptor);
+		extentbufsize = roundup(extentbufsize, sizeof(HFSPlusExtentRecord));
+		extentptr = hfs_mallocz(extentbufsize);
+		result = alloc_attr_blks(hfsmp, attrsize, extentbufsize, extentptr, &allocatedblks);
+		if (result) {
+			allocatedblks = 0;
+			goto exit;  /* no more space */
+		}
+		/* Copy data into the blocks. */
+		result = write_attr_data(hfsmp, uio, attrsize, extentptr);
+		if (result) {
+			if (vp) {
+				const char *name = vnode_getname(vp);
+				printf("hfs_setxattr: write_attr_data vol=%s err (%d) %s:%s\n",
+						hfsmp->vcbVN, result,  name ? name : "", ap->a_name);
+				if (name)
+					vnode_putname(name);
+			}
+			goto exit;
+		}
+
+		/* Now remove any previous attribute. */
+		if (exists) {
+			result = remove_attribute_records(hfsmp, iterator);
+			if (result) {
+				if (vp) {
+					const char *name = vnode_getname(vp);
+					printf("hfs_setxattr: remove_attribute_records vol=%s err (%d) %s:%s\n",
+							hfsmp->vcbVN, result, name ? name : "", ap->a_name);
+					if (name)
+						vnode_putname(name);
+				}
+				goto exit;
+			}
+		}
+		/* Create attribute fork data record. */
+		recp = hfs_malloc(recp_size = sizeof(HFSPlusAttrRecord));
+
+		btdata.bufferAddress = recp;
+		btdata.itemCount = 1;
+		btdata.itemSize = sizeof(HFSPlusAttrForkData);
+		
+		recp->recordType = kHFSPlusAttrForkData;
+		recp->forkData.reserved = 0;
+		recp->forkData.theFork.logicalSize = attrsize;
+		recp->forkData.theFork.clumpSize = 0;
+		recp->forkData.theFork.totalBlocks = blkcnt;
+		bcopy(extentptr, recp->forkData.theFork.extents, sizeof(HFSPlusExtentRecord));
+		
+		(void) hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+		
+		result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize);
+		if (result) {
+			printf ("hfs_setxattr: BTInsertRecord(): vol=%s %d,%s err=%d\n", 
+					hfsmp->vcbVN, target_id, ap->a_name, result);
+			goto exit; 
+		}
+		extentblks = count_extent_blocks(blkcnt, recp->forkData.theFork.extents);
+		blkcnt -= extentblks;
+		keystartblk = &((HFSPlusAttrKey *)&iterator->key)->startBlock;
+		i = 0;
+		
+		/* Create overflow extents as needed. */
+		while (blkcnt > 0) {
+			/* Initialize the key and record. */
+			*keystartblk += (u_int32_t)extentblks;
+			btdata.itemSize = sizeof(HFSPlusAttrExtents);
+			recp->recordType = kHFSPlusAttrExtents;
+			recp->overflowExtents.reserved = 0;
+			
+			/* Copy the next set of extents. */
+			i += kHFSPlusExtentDensity;
+			bcopy(&extentptr[i], recp->overflowExtents.extents, sizeof(HFSPlusExtentRecord));
+			
+			result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize);
+			if (result) {
+				printf ("hfs_setxattr: BTInsertRecord() overflow: vol=%s %d,%s err=%d\n", 
+						hfsmp->vcbVN, target_id, ap->a_name, result);
+				goto exit;
+			}
+			extentblks = count_extent_blocks(blkcnt, recp->overflowExtents.extents);
+			blkcnt -= extentblks;
+		}
+	} else { /* Inline data */ 
+		if (exists) {
+			result = remove_attribute_records(hfsmp, iterator);
+			if (result) {
+				goto exit;
+			}
+		}
+		
+		/* Calculate size of record rounded up to multiple of 2 bytes. */
+		btdata.itemSize = sizeof(HFSPlusAttrData) - 2 + attrsize + ((attrsize & 1) ? 1 : 0);
+		recp = hfs_malloc(recp_size = btdata.itemSize);
+
+		recp->recordType = kHFSPlusAttrInlineData;
+		recp->attrData.reserved[0] = 0;
+		recp->attrData.reserved[1] = 0;
+		recp->attrData.attrSize = attrsize;
+		
+		/* Copy in the attribute data (if any). */
+		if (attrsize > 0) {
+			if (data_ptr) {
+				bcopy(data_ptr, &recp->attrData.attrData, attrsize);
+			} else {
+				/* 
+				 * A null UIO meant it originated in-kernel.  If they didn't supply data_ptr 
+				 * then deny the copy operation.
+				 */
+				if (uio == NULL) {
+					result = EPERM;
+					goto exit;
+				}
+				result = uiomove((caddr_t)&recp->attrData.attrData, attrsize, uio);
+			}
+			
+			if (result) {
+				goto exit;
+			}
+		}
+		
+		(void) hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+		
+		btdata.bufferAddress = recp;
+		btdata.itemCount = 1;
+		result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize);
+	}
+	
+exit:
+	if (btfile && started_transaction) {
+		(void) BTFlushPath(btfile);
+	}
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	if (result == 0) {
+		if (vp) {
+			cp = VTOC(vp);
+			/* Setting an attribute only updates change time and not 
+			 * modified time of the file.
+			 */
+			cp->c_touch_chgtime = TRUE;
+			cp->c_flag |= C_MODIFIED;
+			cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+			if ((strcmp(ap->a_name, KAUTH_FILESEC_XATTR) == 0)) {
+				cp->c_attr.ca_recflags |= kHFSHasSecurityMask;
+			}
+			(void) hfs_update(vp, 0);
+		}
+	}
+	if (started_transaction) {
+		if (result && allocatedblks) {
+			free_attr_blks(hfsmp, allocatedblks, extentptr);
+		}
+		hfs_end_transaction(hfsmp);
+	}
+	
+	hfs_free(recp, recp_size);
+	hfs_free(extentptr, extentbufsize);
+	hfs_free(iterator, sizeof(*iterator));
+	
+	return result;	
+}
+
+
+
+
+/*
+ * Remove an extended attribute.
+ */
+int
+hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
+/*
+	struct vnop_removexattr_args {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		char * a_name;
+		int a_options;
+		vfs_context_t a_context;
+	};
+*/
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp = VTOC(vp);
+	struct hfsmount *hfsmp;
+	struct BTreeIterator * iterator = NULL;
+	int lockflags;
+	int result;
+	time_t orig_ctime=VTOC(vp)->c_ctime;
+
+	if (ap->a_name == NULL || ap->a_name[0] == '\0') {
+		return (EINVAL);  /* invalid name */
+	}
+	hfsmp = VTOHFS(vp);
+	if (VNODE_IS_RSRC(vp)) {
+		return (EPERM);
+	}
+
+#if HFS_COMPRESSION
+	if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) && !(ap->a_options & XATTR_SHOWCOMPRESSION)) {
+		return ENOATTR;
+	}
+#endif /* HFS_COMPRESSION */
+
+	nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_DELETE_OP, NSPACE_REARM_NO_ARG);
+	
+	/* If Resource Fork is non-empty then truncate it. */
+	if (strcmp(ap->a_name, XATTR_RESOURCEFORK_NAME) == 0) {
+		struct vnode *rvp = NULL;
+
+		if ( !vnode_isreg(vp) ) {
+			return (EPERM);
+		}
+		if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			return (result);
+		}
+		if (!hfs_has_rsrc(cp)) {
+			hfs_unlock(cp);
+			return (ENOATTR);
+		}
+		result = hfs_vgetrsrc(hfsmp, vp, &rvp);
+		hfs_unlock(cp);
+		if (result) {
+			return (result);
+		}
+
+		hfs_lock_truncate(VTOC(rvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+		// Tell UBC now before we take the cnode lock and start the transaction
+		hfs_ubc_setsize(rvp, 0, false);
+
+		if ((result = hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			vnode_put(rvp);
+			return (result);
+		}
+
+		/* Start a transaction for encapsulating changes in 
+		 * hfs_truncate() and hfs_update()
+		 */
+		if ((result = hfs_start_transaction(hfsmp))) {
+			hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+			hfs_unlock(cp);
+			vnode_put(rvp);
+			return (result);
+		}
+
+		result = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 0, ap->a_context);
+		if (result == 0) {
+			cp->c_touch_chgtime = TRUE;
+			cp->c_flag |= C_MODIFIED;
+			result = hfs_update(vp, 0);
+		}
+
+		hfs_end_transaction(hfsmp);
+		hfs_unlock_truncate(VTOC(rvp), HFS_LOCK_DEFAULT);
+		hfs_unlock(VTOC(rvp));
+
+		vnode_put(rvp);
+		return (result);
+	}
+	/* Clear out the Finder Info. */
+	if (strcmp(ap->a_name, XATTR_FINDERINFO_NAME) == 0) {
+		void * finderinfo_start;
+		int finderinfo_size;
+		u_int8_t finderinfo[32];
+		u_int32_t date_added = 0, write_gen_counter = 0, document_id = 0;
+		u_int8_t *finfo = NULL;
+        
+		if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+			return (result);
+		}
+		
+		/* Use the local copy to store our temporary changes. */
+		bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo));
+		
+		
+		/* Zero out the date added field in the local copy */
+		hfs_zero_hidden_fields (cp, finderinfo);
+		
+		/* Don't expose a symlink's private type/creator. */
+		if (vnode_islnk(vp)) {
+			struct FndrFileInfo *fip;
+			
+			fip = (struct FndrFileInfo *)&finderinfo;
+			fip->fdType = 0;
+			fip->fdCreator = 0;
+		}
+		
+		/* Do the byte compare against the local copy */
+		if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) == 0) {
+            hfs_unlock(cp);
+			return (ENOATTR);
+		}
+		
+		/* 
+		 * If there was other content, zero out everything except 
+		 * type/creator and date added.  First, save the date added.
+		 */
+		finfo = cp->c_finderinfo;
+		finfo = finfo + 16;
+		if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			date_added = extinfo->date_added;
+			write_gen_counter = extinfo->write_gen_counter;
+			document_id = extinfo->document_id;
+		} else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			date_added = extinfo->date_added;
+			write_gen_counter = extinfo->write_gen_counter;
+			document_id = extinfo->document_id;
+		}
+		
+		if (vnode_islnk(vp)) {
+			/* Ignore type/creator */
+			finderinfo_start = &cp->c_finderinfo[8];
+			finderinfo_size = sizeof(cp->c_finderinfo) - 8;
+		} else {
+			finderinfo_start = &cp->c_finderinfo[0];
+			finderinfo_size = sizeof(cp->c_finderinfo);
+		}
+		bzero(finderinfo_start, finderinfo_size);
+		
+		
+		/* Now restore the date added */
+		if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			extinfo->date_added = date_added;
+			extinfo->write_gen_counter = write_gen_counter;
+			extinfo->document_id = document_id;
+		} else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			extinfo->date_added = date_added;
+			extinfo->write_gen_counter = write_gen_counter;
+			extinfo->document_id = document_id;
+		}
+        
+		/* Updating finderInfo updates change time and modified time */
+		cp->c_touch_chgtime = TRUE;
+		cp->c_flag |= C_MODIFIED;
+		hfs_update(vp, 0);
+        
+		hfs_unlock(cp);
+        
+		return (0);
+	}
+#if CONFIG_HFS_STD
+	/*
+	 * Standard HFS only supports native FinderInfo and Resource Forks.
+	 */
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return (EPERM);
+	}
+#endif
+	if (hfsmp->hfs_attribute_vp == NULL) {
+		return (ENOATTR);
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+		goto exit_nolock;
+	}
+
+	result = hfs_buildattrkey(cp->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+	if (result) {
+		goto exit;	
+	}
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    result = EINVAL;
+	    goto exit;
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	result = remove_attribute_records(hfsmp, iterator);
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	if (result == 0) {
+		cp->c_touch_chgtime = TRUE;
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+		/* If no more attributes exist, clear attribute bit */
+		result = file_attribute_exist(hfsmp, cp->c_fileid);
+		if (result == 0) {
+			cp->c_attr.ca_recflags &= ~kHFSHasAttributesMask;
+			cp->c_flag |= C_MODIFIED;
+		}
+		if (result == EEXIST) {
+			result = 0;
+		}
+
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+		/* If ACL was removed, clear security bit */
+		if (strcmp(ap->a_name, KAUTH_FILESEC_XATTR) == 0) {
+			cp->c_attr.ca_recflags &= ~kHFSHasSecurityMask;
+			cp->c_flag |= C_MODIFIED;
+		}
+		(void) hfs_update(vp, 0);
+	}
+
+	hfs_end_transaction(hfsmp);
+exit:
+	hfs_unlock(cp);
+exit_nolock:
+	hfs_free(iterator, sizeof(*iterator));
+	return MacToVFSError(result);
+}
+
+/*
+ * Removes a non rsrc-fork, non-finderinfo EA from the specified file ID.
+ * Note that this results in a bit of code duplication for the xattr removal
+ * path.  This is done because it's a bit messy to deal with things without the 
+ * cnode.  This function is used by exchangedata to port XATTRS to alternate
+ * fileIDs while everything is locked, and the cnodes are in a transitional state.  
+ *
+ * Assumes that the cnode backing the fileid specified is LOCKED.
+ */ 
+
+int 
+hfs_removexattr_by_id (struct hfsmount *hfsmp, uint32_t fileid, const char *xattr_name ) {
+	struct BTreeIterator iter; // allocated on the stack to avoid heap allocation mid-txn
+	int ret = 0;
+	int started_txn = 0;
+	int lockflags;
+
+	memset (&iter, 0, sizeof(iter));
+
+	//position the B-Tree iter key before grabbing locks and starting a txn 	
+	ret = hfs_buildattrkey (fileid, xattr_name, (HFSPlusAttrKey*)&iter.key);
+	if (ret) {
+		goto xattr_out;
+	}	
+
+	//note: this is likely a nested transaction since there is a global transaction cover
+	if (hfs_start_transaction (hfsmp) != 0) {
+		ret = EINVAL;		
+		goto xattr_out;		
+	}
+	started_txn = 1;
+
+
+    lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+	//actually remove the EA from the tree
+    ret = remove_attribute_records(hfsmp, &iter);
+
+    hfs_systemfile_unlock(hfsmp, lockflags);
+	
+	/*
+   	 * NOTE: Responsibility of the caller to remove the "has XATTRs" bit in the catalog record
+	 * if this was the last EA.  
+	 */	 
+
+
+xattr_out:
+	if (started_txn) {
+		hfs_end_transaction(hfsmp);
+	}
+
+	return MacToVFSError(ret);	
+		
+}
+
+
+/* Check if any attribute record exist for given fileID.  This function 
+ * is called by hfs_vnop_removexattr to determine if it should clear the 
+ * attribute bit in the catalog record or not.
+ * 
+ * Note - you must acquire a shared lock on the attribute btree before
+ *        calling this function.
+ * 
+ * Output: 
+ * 	EEXIST	- If attribute record was found
+ *	0	- Attribute was not found
+ *	(other)	- Other error (such as EIO) 
+ */
+int
+file_attribute_exist(struct hfsmount *hfsmp, uint32_t fileID)
+{
+	HFSPlusAttrKey *key;
+	struct BTreeIterator * iterator = NULL;
+	struct filefork *btfile;
+	int result = 0;
+
+	// if there's no attribute b-tree we sure as heck
+	// can't have any attributes!
+	if (hfsmp->hfs_attribute_vp == NULL) {
+	    return false;
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	key = (HFSPlusAttrKey *)&iterator->key;
+
+	result = hfs_buildattrkey(fileID, NULL, key);
+	if (result) {
+		goto out;
+	}
+
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+	result = BTSearchRecord(btfile, iterator, NULL, NULL, NULL);
+	if (result && (result != btNotFound)) {
+		goto out;
+	}
+
+	result = BTIterateRecord(btfile, kBTreeNextRecord, iterator, NULL, NULL);
+	/* If no next record was found or fileID for next record did not match,
+	 * no more attributes exist for this fileID
+	 */
+	if ((result && (result == btNotFound)) || (key->fileID != fileID)) {
+		result = 0;	
+	} else {
+		result = EEXIST;
+	}
+
+out:
+	hfs_free(iterator, sizeof(*iterator));
+	return result;
+}
+
+
+/*
+ * Remove all the records for a given attribute.
+ *
+ * - Used by hfs_vnop_removexattr, hfs_vnop_setxattr and hfs_removeallattr.
+ * - A transaction must have been started.
+ * - The Attribute b-tree file must be locked exclusive.
+ * - The Allocation Bitmap file must be locked exclusive.
+ * - The iterator key must be initialized.
+ */
+int
+remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator)
+{
+	struct filefork *btfile;
+	FSBufferDescriptor btdata;
+	HFSPlusAttrRecord attrdata;  /* 90 bytes */
+	u_int16_t datasize;
+	int result;
+
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+
+	btdata.bufferAddress = &attrdata;
+	btdata.itemSize = sizeof(attrdata);
+	btdata.itemCount = 1;
+	result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
+	if (result) {
+		goto exit; /* no records. */
+	}
+	/*
+	 * Free the blocks from extent based attributes.
+	 *
+	 * Note that the block references (btree records) are removed
+	 * before releasing the blocks in the allocation bitmap.
+	 */
+	if (attrdata.recordType == kHFSPlusAttrForkData) {
+		int totalblks;
+		int extentblks;
+		u_int32_t *keystartblk;
+
+		if (datasize < sizeof(HFSPlusAttrForkData)) {
+			printf("hfs: remove_attribute_records: bad record size %d (expecting %lu)\n", datasize, sizeof(HFSPlusAttrForkData));
+		}
+		totalblks = attrdata.forkData.theFork.totalBlocks;
+
+		/* Process the first 8 extents. */
+		extentblks = count_extent_blocks(totalblks, attrdata.forkData.theFork.extents);
+		if (extentblks > totalblks)
+			panic("hfs: remove_attribute_records: corruption...");
+		if (BTDeleteRecord(btfile, iterator) == 0) {
+			free_attr_blks(hfsmp, extentblks, attrdata.forkData.theFork.extents);
+		}
+		totalblks -= extentblks;
+		keystartblk = &((HFSPlusAttrKey *)&iterator->key)->startBlock;
+
+		/* Process any overflow extents. */
+		while (totalblks) {
+			*keystartblk += (u_int32_t)extentblks;
+
+			result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
+			if (result ||
+			    (attrdata.recordType != kHFSPlusAttrExtents) ||
+			    (datasize < sizeof(HFSPlusAttrExtents))) {
+				printf("hfs: remove_attribute_records: BTSearchRecord: vol=%s, err=%d (%d), totalblks %d\n",
+					hfsmp->vcbVN, MacToVFSError(result), attrdata.recordType != kHFSPlusAttrExtents, totalblks);
+				result = ENOATTR;
+				break;   /* break from while */
+			}
+			/* Process the next 8 extents. */
+			extentblks = count_extent_blocks(totalblks, attrdata.overflowExtents.extents);
+			if (extentblks > totalblks)
+				panic("hfs: remove_attribute_records: corruption...");
+			if (BTDeleteRecord(btfile, iterator) == 0) {
+				free_attr_blks(hfsmp, extentblks, attrdata.overflowExtents.extents);
+			}
+			totalblks -= extentblks;
+		}
+	} else {
+		result = BTDeleteRecord(btfile, iterator);
+	}
+	(void) BTFlushPath(btfile);
+exit:
+	return (result == btNotFound ? ENOATTR :  MacToVFSError(result));
+}
+
+
+/*
+ * Retrieve the list of extended attribute names.
+ */
+int
+hfs_vnop_listxattr(struct vnop_listxattr_args *ap)
+/*
+	struct vnop_listxattr_args {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		uio_t a_uio;
+		size_t *a_size;
+		int a_options;
+		vfs_context_t a_context;
+*/
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp = VTOC(vp);
+	struct hfsmount *hfsmp;
+	uio_t uio = ap->a_uio;
+	struct BTreeIterator * iterator = NULL;
+	struct filefork *btfile;
+	struct listattr_callback_state state;
+	user_addr_t user_start = 0;
+	user_size_t user_len = 0;
+	int lockflags;
+	int result;
+    u_int8_t finderinfo[32];
+
+
+	if (VNODE_IS_RSRC(vp)) {
+		return (EPERM);
+	}
+	
+#if HFS_COMPRESSION
+	int compressed = hfs_file_is_compressed(cp, 1); /* 1 == don't take the cnode lock */
+#endif /* HFS_COMPRESSION */
+	
+	hfsmp = VTOHFS(vp);
+	*ap->a_size = 0;
+	
+	/* 
+	 * Take the truncate lock; this serializes us against the ioctl
+	 * to truncate data & reset the decmpfs state
+	 * in the compressed file handler. 
+	 */
+	hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+
+	/* Now the regular cnode lock (shared) */
+	if ((result = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) {
+		hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+		return (result);
+	}
+
+	/* 
+	 * Make a copy of the cnode's finderinfo to a local so we can
+	 * zero out the date added field.  Also zero out the private type/creator
+	 * for symlinks.
+	 */
+	bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo));
+	hfs_zero_hidden_fields (cp, finderinfo);
+	
+	/* Don't expose a symlink's private type/creator. */
+	if (vnode_islnk(vp)) {
+		struct FndrFileInfo *fip;
+		
+		fip = (struct FndrFileInfo *)&finderinfo;
+		fip->fdType = 0;
+		fip->fdCreator = 0;
+	}	
+
+	
+	/* If Finder Info is non-empty then export it's name. */
+	if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) != 0) {
+		if (uio == NULL) {
+			*ap->a_size += sizeof(XATTR_FINDERINFO_NAME);
+		} else if ((user_size_t)uio_resid(uio) < sizeof(XATTR_FINDERINFO_NAME)) {
+			result = ERANGE;
+			goto exit;
+		} else {
+			result = uiomove(XATTR_FINDERINFO_NAME,
+			                  sizeof(XATTR_FINDERINFO_NAME), uio);
+			if (result)
+				goto exit;
+		}
+	}
+	/* If Resource Fork is non-empty then export it's name. */
+	if (S_ISREG(cp->c_mode) && hfs_has_rsrc(cp)) {
+#if HFS_COMPRESSION
+		if ((ap->a_options & XATTR_SHOWCOMPRESSION) ||
+		    !compressed ||
+		    !decmpfs_hides_rsrc(ap->a_context, VTOCMP(vp))
+		    )
+#endif /* HFS_COMPRESSION */
+		{
+			if (uio == NULL) {
+				*ap->a_size += sizeof(XATTR_RESOURCEFORK_NAME);
+			} else if ((user_size_t)uio_resid(uio) < sizeof(XATTR_RESOURCEFORK_NAME)) {
+				result = ERANGE;
+				goto exit;
+			} else {
+				result = uiomove(XATTR_RESOURCEFORK_NAME,
+								 sizeof(XATTR_RESOURCEFORK_NAME), uio);
+				if (result)
+					goto exit;
+			}
+		}
+	}
+#if CONFIG_HFS_STD
+	/*
+	 * Standard HFS only supports native FinderInfo and Resource Forks.
+	 * Return at this point.
+	 */
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		result = 0;
+		goto exit;
+	}
+#endif
+	/* Bail if we don't have any extended attributes. */
+	if ((hfsmp->hfs_attribute_vp == NULL) ||
+	    (cp->c_attr.ca_recflags & kHFSHasAttributesMask) == 0) {
+		result = 0;
+		goto exit;
+	}
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	result = hfs_buildattrkey(cp->c_fileid, NULL, (HFSPlusAttrKey *)&iterator->key);
+	if (result)
+		goto exit;	
+
+	/*
+	 * Lock the user's buffer here so that we won't fault on
+	 * it in uiomove while holding the attributes b-tree lock.
+	 */
+	if (uio && uio_isuserspace(uio)) {
+		user_start = uio_curriovbase(uio);
+		user_len = uio_curriovlen(uio);
+
+		if ((result = vslock(user_start, user_len)) != 0) {
+			user_start = 0;
+			goto exit;
+		}
+	}
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+	result = BTSearchRecord(btfile, iterator, NULL, NULL, NULL);
+	if (result && result != btNotFound) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		goto exit;
+	}
+
+	state.fileID = cp->c_fileid;
+	state.result = 0;
+	state.uio = uio;
+	state.size = 0;
+#if HFS_COMPRESSION
+	state.showcompressed = !compressed || ap->a_options & XATTR_SHOWCOMPRESSION;
+	state.ctx = ap->a_context;
+	state.vp = vp;
+#endif /* HFS_COMPRESSION */
+
+	/*
+	 * Process entries starting just after iterator->key.
+	 */
+	result = BTIterateRecords(btfile, kBTreeNextRecord, iterator,
+	                          (IterateCallBackProcPtr)listattr_callback, &state);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	if (uio == NULL) {
+		*ap->a_size += state.size;
+	}
+
+	if (state.result || result == btNotFound)
+		result = state.result;
+
+exit:
+	if (user_start) {
+		vsunlock(user_start, user_len, TRUE);
+	}
+	hfs_free(iterator, sizeof(*iterator));
+	hfs_unlock(cp);
+	hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+	
+	return MacToVFSError(result);
+}
+
+
+/*
+ * Callback - called for each attribute record
+ */
+static int
+listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *data, struct listattr_callback_state *state)
+{
+	char attrname[XATTR_MAXNAMELEN + 1];
+	ssize_t bytecount;
+	int result;
+
+	if (state->fileID != key->fileID) {
+		state->result = 0;
+		return (0);	/* stop */
+	}
+	/*
+	 * Skip over non-primary keys
+	 */
+	if (key->startBlock != 0) {
+		return (1);	/* continue */
+	}
+
+	/* Convert the attribute name into UTF-8. */
+	result = utf8_encodestr(key->attrName, key->attrNameLen * sizeof(UniChar),
+				(u_int8_t *)attrname, (size_t *)&bytecount, sizeof(attrname), '/', 0);
+	if (result) {
+		state->result = result;
+		return (0);	/* stop */
+	}
+	bytecount++; /* account for null termination char */
+
+	if (xattr_protected(attrname))
+		return (1);     /* continue */
+
+#if HFS_COMPRESSION
+	if (!state->showcompressed && decmpfs_hides_xattr(state->ctx, VTOCMP(state->vp), attrname) )
+		return 1; /* continue */
+#endif /* HFS_COMPRESSION */
+	
+	if (state->uio == NULL) {
+		state->size += bytecount;
+	} else {
+		if (bytecount > uio_resid(state->uio)) {
+			state->result = ERANGE;
+			return (0);	/* stop */
+		}
+		result = uiomove((caddr_t) attrname, bytecount, state->uio);
+		if (result) {
+			state->result = result;
+			return (0);	/* stop */
+		}
+	}
+	return (1); /* continue */
+}
+
+/*
+ * Remove all the attributes from a cnode.
+ *
+ * This function creates/ends its own transaction so that each
+ * attribute is deleted in its own transaction (to avoid having
+ * a transaction grow too large).
+ *
+ * This function takes the necessary locks on the attribute
+ * b-tree file and the allocation (bitmap) file.
+ *
+ * NOTE: Upon sucecss, this function will return with an open
+ * transaction.  The reason we do it this way is because when we
+ * delete the last attribute, we must make sure the flag in the
+ * catalog record that indicates there are no more records is cleared.
+ * The caller is responsible for doing this and *must* do it before
+ * ending the transaction.
+ */
+int
+hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid, 
+				  bool *open_transaction)
+{
+	BTreeIterator *iterator = NULL;
+	HFSPlusAttrKey *key;
+	struct filefork *btfile;
+	int result, lockflags = 0;
+
+	*open_transaction = false;
+
+	if (hfsmp->hfs_attribute_vp == NULL)
+		return 0;
+
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+
+	iterator = hfs_mallocz(sizeof(BTreeIterator));
+
+	key = (HFSPlusAttrKey *)&iterator->key;
+
+	/* Loop until there are no more attributes for this file id */
+	do {
+		if (!*open_transaction)
+			lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+
+		(void) hfs_buildattrkey(fileid, NULL, key);
+		result = BTIterateRecord(btfile, kBTreeNextRecord, iterator, NULL, NULL);
+		if (result || key->fileID != fileid)
+			goto exit;
+
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		lockflags = 0;
+
+		if (*open_transaction) {
+			hfs_end_transaction(hfsmp);
+			*open_transaction = false;
+		}
+
+		if (hfs_start_transaction(hfsmp) != 0) {
+			result = EINVAL;
+			goto exit;
+		}
+
+		*open_transaction = true;
+
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+		result = remove_attribute_records(hfsmp, iterator);
+
+#if HFS_XATTR_VERBOSE
+		if (result) {
+			printf("hfs_removeallattr: unexpected err %d\n", result);
+		}
+#endif
+	} while (!result);
+
+exit:
+	hfs_free(iterator, sizeof(*iterator));
+
+	if (lockflags)
+		hfs_systemfile_unlock(hfsmp, lockflags);
+
+	result = result == btNotFound ? 0 : MacToVFSError(result);
+
+	if (result && *open_transaction) {
+		hfs_end_transaction(hfsmp);
+		*open_transaction = false;
+	}
+
+	return result;
+}
+
+void
+hfs_xattr_init(struct hfsmount * hfsmp)
+{
+#if CONFIG_HFS_STD
+	if (ISSET(hfsmp->hfs_flags, HFS_STANDARD))
+		return;
+#endif
+
+	/*
+	 * If there isn't an attributes b-tree then create one.
+	 */
+	if ((hfsmp->hfs_attribute_vp == NULL) &&
+	    !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
+		(void) hfs_create_attr_btree(hfsmp, ATTRIBUTE_FILE_NODE_SIZE,
+		                             getnodecount(hfsmp, ATTRIBUTE_FILE_NODE_SIZE));
+	}
+	if (hfsmp->hfs_attribute_vp)
+		hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp);
+}
+
+/*
+ * Enable/Disable volume attributes stored as EA for root file system.
+ * Supported attributes are - 
+ *	1. Extent-based Extended Attributes 
+ */
+int
+hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state)
+{
+	struct BTreeIterator * iterator = NULL;
+	struct filefork *btfile;
+	int lockflags;
+	int result;
+
+#if CONFIG_HFS_STD
+	if (hfsmp->hfs_flags & HFS_STANDARD) {
+		return (ENOTSUP);
+	}
+#endif
+	if (xattrtype != HFSIOC_SET_XATTREXTENTS_STATE) {
+		return EINVAL;
+	}
+
+	/*
+	 * If there isn't an attributes b-tree then create one.
+	 */
+	if (hfsmp->hfs_attribute_vp == NULL) {
+		result = hfs_create_attr_btree(hfsmp, ATTRIBUTE_FILE_NODE_SIZE,
+		                               getnodecount(hfsmp, ATTRIBUTE_FILE_NODE_SIZE));
+		if (result) {
+			return (result);
+		}
+	}
+
+	iterator = hfs_mallocz(sizeof(*iterator));
+
+	/*
+	 * Build a b-tree key.
+	 * We use the root's parent id (1) to hold this volume attribute.
+	 */
+	(void) hfs_buildattrkey(kHFSRootParentID, XATTR_XATTREXTENTS_NAME,
+			      (HFSPlusAttrKey *)&iterator->key);
+
+	/* Start a transaction for our changes. */
+	if (hfs_start_transaction(hfsmp) != 0) {
+		result = EINVAL;
+		goto exit;
+	}
+	btfile = VTOF(hfsmp->hfs_attribute_vp);
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+	if (state == 0) {
+		/* Remove the attribute. */
+		result = BTDeleteRecord(btfile, iterator);
+		if (result == btNotFound)
+			result = 0;
+	} else {
+		FSBufferDescriptor btdata;
+		HFSPlusAttrData attrdata;
+		u_int16_t datasize;
+
+		datasize = sizeof(attrdata);
+		btdata.bufferAddress = &attrdata;
+		btdata.itemSize = datasize;
+		btdata.itemCount = 1;
+		attrdata.recordType = kHFSPlusAttrInlineData;
+		attrdata.reserved[0] = 0;
+		attrdata.reserved[1] = 0;
+		attrdata.attrSize    = 2;
+		attrdata.attrData[0] = 0;
+		attrdata.attrData[1] = 0;
+
+		/* Insert the attribute. */
+		result = BTInsertRecord(btfile, iterator, &btdata, datasize);
+		if (result == btExists)
+			result = 0;
+	}
+	(void) BTFlushPath(btfile);
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+	/* Finish the transaction of our changes. */
+	hfs_end_transaction(hfsmp);
+
+	/* Update the state in the mount point */
+	hfs_lock_mount (hfsmp);
+	if (state == 0) {
+		hfsmp->hfs_flags &= ~HFS_XATTR_EXTENTS; 
+	} else {
+		hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; 
+	}
+	hfs_unlock_mount (hfsmp);
+
+exit:
+	hfs_free(iterator, sizeof(*iterator));
+	return MacToVFSError(result);
+}
+
+
+/*
+ * hfs_attrkeycompare - compare two attribute b-tree keys.
+ *
+ * The name portion of the key is compared using a 16-bit binary comparison. 
+ * This is called from the b-tree code.
+ */
+int
+hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey)
+{
+	u_int32_t searchFileID, trialFileID;
+	int result;
+
+	searchFileID = searchKey->fileID;
+	trialFileID = trialKey->fileID;
+	result = 0;
+	
+	if (searchFileID > trialFileID) {
+		++result;
+	} else if (searchFileID < trialFileID) {
+		--result;
+	} else {
+		u_int16_t * str1 = &searchKey->attrName[0];
+		u_int16_t * str2 = &trialKey->attrName[0];
+		int length1 = searchKey->attrNameLen;
+		int length2 = trialKey->attrNameLen;
+		u_int16_t c1, c2;
+		int length;
+	
+		if (length1 < length2) {
+			length = length1;
+			--result;
+		} else if (length1 > length2) {
+			length = length2;
+			++result;
+		} else {
+			length = length1;
+		}
+	
+		while (length--) {
+			c1 = *(str1++);
+			c2 = *(str2++);
+	
+			if (c1 > c2) {
+				result = 1;
+				break;
+			}
+			if (c1 < c2) {
+				result = -1;
+				break;
+			}
+		}
+		if (result)
+			return (result);
+		/*
+		 * Names are equal; compare startBlock
+		 */
+		if (searchKey->startBlock == trialKey->startBlock) {
+			return (0);
+		} else {
+			return (searchKey->startBlock < trialKey->startBlock ? -1 : 1);
+		}
+	}
+
+	return result;
+}
+
+
+/*
+ * hfs_buildattrkey - build an Attribute b-tree key
+ */
+int
+hfs_buildattrkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *key)
+{
+	int result = 0;
+	size_t unicodeBytes = 0;
+
+	if (attrname != NULL) {
+		/*
+		 * Convert filename from UTF-8 into Unicode
+		 */	
+		result = utf8_decodestr((const u_int8_t *)attrname, strlen(attrname), key->attrName,
+					&unicodeBytes, sizeof(key->attrName), 0, 0);
+		if (result) {
+			if (result != ENAMETOOLONG)
+				result = EINVAL;  /* name has invalid characters */
+			return (result);
+		}
+		key->attrNameLen = unicodeBytes / sizeof(UniChar);
+		key->keyLength = kHFSPlusAttrKeyMinimumLength + unicodeBytes;
+	} else {
+		key->attrNameLen = 0;
+		key->keyLength = kHFSPlusAttrKeyMinimumLength;
+	}
+	key->pad = 0;
+	key->fileID = fileID;
+	key->startBlock = 0;
+
+	return (0);
+ }
+
+/*
+ * getnodecount - calculate starting node count for attributes b-tree.
+ */
+static int
+getnodecount(struct hfsmount *hfsmp, size_t nodesize)
+{
+	u_int64_t freebytes;
+	u_int64_t calcbytes;
+
+	/*
+	 * 10.4: Scale base on current catalog file size (20 %) up to 20 MB.
+	 * 10.5: Attempt to be as big as the catalog clump size.
+	 *
+	 * Use no more than 10 % of the remaining free space.
+	 */
+	freebytes = (u_int64_t)hfs_freeblks(hfsmp, 0) * (u_int64_t)hfsmp->blockSize;
+
+	calcbytes = MIN(hfsmp->hfs_catalog_cp->c_datafork->ff_size / 5, 20 * 1024 * 1024);
+
+	calcbytes = MAX(calcbytes, hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize);
+	
+	calcbytes = MIN(calcbytes, freebytes / 10);
+
+	return (MAX(2, (int)(calcbytes / nodesize)));
+}
+
+
+/*
+ * getmaxinlineattrsize - calculate maximum inline attribute size.
+ *
+ * This yields 3,802 bytes for an 8K node size.
+ */
+static size_t
+getmaxinlineattrsize(struct vnode * attrvp)
+{
+	struct BTreeInfoRec btinfo;
+	size_t nodesize = ATTRIBUTE_FILE_NODE_SIZE;
+	size_t maxsize;
+
+	if (attrvp != NULL) {
+		(void) hfs_lock(VTOC(attrvp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+		if (BTGetInformation(VTOF(attrvp), 0, &btinfo) == 0)
+			nodesize = btinfo.nodeSize;
+		hfs_unlock(VTOC(attrvp));
+	}
+	maxsize = nodesize;
+	maxsize -= sizeof(BTNodeDescriptor);     /* minus node descriptor */
+	maxsize -= 3 * sizeof(u_int16_t);        /* minus 3 index slots */
+	maxsize /= 2;                            /* 2 key/rec pairs minumum */
+	maxsize -= sizeof(HFSPlusAttrKey);       /* minus maximum key size */
+	maxsize -= sizeof(HFSPlusAttrData) - 2;  /* minus data header */
+	maxsize &= 0xFFFFFFFE;                   /* multiple of 2 bytes */
+	
+	return (maxsize);
+}
+
+/*
+ * Initialize vnode for attribute data I/O.  
+ * 
+ * On success, 
+ * 	- returns zero
+ * 	- the attrdata vnode is initialized as hfsmp->hfs_attrdata_vp
+ * 	- an iocount is taken on the attrdata vnode which exists 
+ * 	  for the entire duration of the mount.  It is only dropped 
+ * 	  during unmount
+ * 	- the attrdata cnode is not locked
+ *
+ * On failure, 
+ * 	- returns non-zero value
+ * 	- the caller does not have to worry about any locks or references
+ */
+int init_attrdata_vnode(struct hfsmount *hfsmp)
+{
+	vnode_t vp;
+	int result = 0;
+	struct cat_desc cat_desc;
+	struct cat_attr cat_attr;
+	struct cat_fork cat_fork;
+	int newvnode_flags = 0;
+
+	bzero(&cat_desc, sizeof(cat_desc));
+	cat_desc.cd_parentcnid = kHFSRootParentID;
+	cat_desc.cd_nameptr = (const u_int8_t *)hfs_attrdatafilename;
+	cat_desc.cd_namelen = strlen(hfs_attrdatafilename);
+	cat_desc.cd_cnid = kHFSAttributeDataFileID;
+	/* Tag vnode as system file, note that we can still use cluster I/O */
+	cat_desc.cd_flags |= CD_ISMETA; 
+
+	bzero(&cat_attr, sizeof(cat_attr));
+	cat_attr.ca_linkcount = 1;
+	cat_attr.ca_mode = S_IFREG;
+	cat_attr.ca_fileid = cat_desc.cd_cnid;
+	cat_attr.ca_blocks = hfsmp->totalBlocks;
+
+	/*
+	 * The attribute data file is a virtual file that spans the
+	 * entire file system space.
+	 *
+	 * Each extent-based attribute occupies a unique portion of
+	 * in this virtual file.  The cluster I/O is done using actual
+	 * allocation block offsets so no additional mapping is needed
+	 * for the VNOP_BLOCKMAP call.
+	 *
+	 * This approach allows the attribute data to be cached without
+	 * incurring the high cost of using a separate vnode per attribute.
+	 *
+	 * Since we need to acquire the attribute b-tree file lock anyways,
+	 * the virtual file doesn't introduce any additional serialization.
+	 */
+	bzero(&cat_fork, sizeof(cat_fork));
+	cat_fork.cf_size = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+	cat_fork.cf_blocks = hfsmp->totalBlocks;
+	cat_fork.cf_extents[0].startBlock = 0;
+	cat_fork.cf_extents[0].blockCount = cat_fork.cf_blocks;
+
+	result = hfs_getnewvnode(hfsmp, NULL, NULL, &cat_desc, 0, &cat_attr, 
+				 &cat_fork, &vp, &newvnode_flags);
+	if (result == 0) {
+		hfsmp->hfs_attrdata_vp = vp;
+		hfs_unlock(VTOC(vp));
+	}
+	return (result);
+}
+
+/*
+ * Read an extent based attribute.
+ */
+static int
+read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
+{
+	vnode_t evp = hfsmp->hfs_attrdata_vp;
+	int bufsize;
+	int64_t iosize;
+	int attrsize;
+	int blksize;
+	int i;
+	int result = 0;
+
+	hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+
+	bufsize = (int)uio_resid(uio);
+	attrsize = (int)datasize;
+	blksize = (int)hfsmp->blockSize;
+
+	/*
+	 * Read the attribute data one extent at a time.
+	 * For the typical case there is only one extent.
+	 */
+	for (i = 0; (attrsize > 0) && (bufsize > 0) && (extents[i].startBlock != 0); ++i) {
+		iosize = extents[i].blockCount * blksize;
+		iosize = MIN(iosize, attrsize);
+		iosize = MIN(iosize, bufsize);
+		uio_setresid(uio, iosize);
+		uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+
+		result = cluster_read(evp, uio, VTOF(evp)->ff_size, IO_SYNC | IO_UNIT);
+
+#if HFS_XATTR_VERBOSE
+		printf("hfs: read_attr_data: cr iosize %lld [%d, %d] (%d)\n",
+			iosize, extents[i].startBlock, extents[i].blockCount, result);
+#endif
+		if (result)
+			break;
+		attrsize -= iosize;
+		bufsize -= iosize;
+	}
+	uio_setresid(uio, bufsize);
+	uio_setoffset(uio, datasize);
+
+	hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT);
+	return (result);
+}
+
+/*
+ * Write an extent based attribute.
+ */
+static int
+write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
+{
+	vnode_t evp = hfsmp->hfs_attrdata_vp;
+	off_t filesize;
+	int bufsize;
+	int attrsize;
+	int64_t iosize;
+	int blksize;
+	int i;
+	int result = 0;
+
+	hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+
+	bufsize = uio_resid(uio);
+	attrsize = (int) datasize;
+	blksize = (int) hfsmp->blockSize;
+	filesize = VTOF(evp)->ff_size;
+
+	/*
+	 * Write the attribute data one extent at a time.
+	 */
+	for (i = 0; (attrsize > 0) && (bufsize > 0) && (extents[i].startBlock != 0); ++i) {
+		iosize = extents[i].blockCount * blksize;
+		iosize = MIN(iosize, attrsize);
+		iosize = MIN(iosize, bufsize);
+		uio_setresid(uio, iosize);
+		uio_setoffset(uio, (u_int64_t)extents[i].startBlock * (u_int64_t)blksize);
+
+		result = cluster_write(evp, uio, filesize, filesize, filesize,
+		                       (off_t) 0, IO_SYNC | IO_UNIT);
+#if HFS_XATTR_VERBOSE
+		printf("hfs: write_attr_data: cw iosize %lld [%d, %d] (%d)\n",
+			iosize, extents[i].startBlock, extents[i].blockCount, result);
+#endif
+		if (result)
+			break;
+		attrsize -= iosize;
+		bufsize -= iosize;
+	}
+	uio_setresid(uio, bufsize);
+	uio_setoffset(uio, datasize);
+
+	hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT);
+	return (result);
+}
+
+/*
+ * Allocate blocks for an extent based attribute.
+ */
+static int
+alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, HFSPlusExtentDescriptor *extents, int *blocks)
+{
+	int blkcnt;
+	int startblk;
+	int lockflags;
+	int i;
+	int maxextents;
+	int result = 0;
+
+	startblk = hfsmp->hfs_metazone_end;
+	blkcnt = howmany(attrsize, hfsmp->blockSize);
+	if (blkcnt > (int)hfs_freeblks(hfsmp, 0)) {
+		return (ENOSPC);
+	}
+	*blocks = blkcnt;
+	maxextents = extentbufsize / sizeof(HFSPlusExtentDescriptor);
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+	for (i = 0; (blkcnt > 0) && (i < maxextents); i++) {
+		/* Try allocating and see if we find something decent */
+		result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, 0,
+				       &extents[i].startBlock, &extents[i].blockCount);
+		/* 
+		 * If we couldn't find anything, then re-try the allocation but allow
+		 * journal flushes.
+		 */
+		if (result == dskFulErr) {
+			result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, HFS_ALLOC_FLUSHTXN,
+					&extents[i].startBlock, &extents[i].blockCount);
+		}
+
+		
+#if HFS_XATTR_VERBOSE
+		printf("hfs: alloc_attr_blks: BA blkcnt %d [%d, %d] (%d)\n",
+			blkcnt, extents[i].startBlock, extents[i].blockCount, result);
+#endif
+		if (result) {
+			extents[i].startBlock = 0;
+			extents[i].blockCount = 0;
+			break;
+		}
+		blkcnt -= extents[i].blockCount;
+		startblk = extents[i].startBlock + extents[i].blockCount;
+	}
+	/*
+	 * If it didn't fit in the extents buffer then bail.
+	 */
+	if (blkcnt) {
+		result = ENOSPC;
+
+#if HFS_XATTR_VERBOSE
+		printf("hfs: alloc_attr_blks: unexpected failure, %d blocks unallocated\n", blkcnt);
+#endif
+		for (; i >= 0; i--) {
+			if ((blkcnt = extents[i].blockCount) != 0) {
+				(void) BlockDeallocate(hfsmp, extents[i].startBlock, blkcnt, 0);
+				extents[i].startBlock = 0;
+				extents[i].blockCount = 0;
+		    }
+		}
+	}
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	return MacToVFSError(result);
+}
+
+/*
+ * Release blocks from an extent based attribute.
+ */
+static void
+free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *extents)
+{
+	vnode_t evp = hfsmp->hfs_attrdata_vp;
+	int remblks = blkcnt;
+	int lockflags;
+	int i;
+
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+
+	for (i = 0; (remblks > 0) && (extents[i].blockCount != 0); i++) {
+		if (extents[i].blockCount > (u_int32_t)blkcnt) {
+#if HFS_XATTR_VERBOSE
+			printf("hfs: free_attr_blks: skipping bad extent [%d, %d]\n",
+				extents[i].startBlock, extents[i].blockCount);
+#endif
+			extents[i].blockCount = 0;
+			continue;
+		}
+		if (extents[i].startBlock == 0) {
+			break;
+		}
+		(void)BlockDeallocate(hfsmp, extents[i].startBlock, extents[i].blockCount, 0);
+		remblks -= extents[i].blockCount;
+		extents[i].startBlock = 0;
+		extents[i].blockCount = 0;
+
+#if HFS_XATTR_VERBOSE
+		printf("hfs: free_attr_blks: BlockDeallocate [%d, %d]\n",
+		       extents[i].startBlock, extents[i].blockCount);
+#endif
+		/* Discard any resident pages for this block range. */
+		if (evp) {
+			off_t  start, end;
+
+			start = (u_int64_t)extents[i].startBlock * (u_int64_t)hfsmp->blockSize;
+			end = start + (u_int64_t)extents[i].blockCount * (u_int64_t)hfsmp->blockSize;
+			(void) ubc_msync(hfsmp->hfs_attrdata_vp, start, end, &start, UBC_INVALIDATE);
+		}
+	}
+
+	hfs_systemfile_unlock(hfsmp, lockflags);
+}
+
+static int
+has_overflow_extents(HFSPlusForkData *forkdata)
+{
+	u_int32_t blocks;
+
+	if (forkdata->extents[7].blockCount == 0)
+		return (0);
+
+	blocks = forkdata->extents[0].blockCount +
+		 forkdata->extents[1].blockCount +
+		 forkdata->extents[2].blockCount +
+		 forkdata->extents[3].blockCount +
+		 forkdata->extents[4].blockCount +
+		 forkdata->extents[5].blockCount +
+		 forkdata->extents[6].blockCount +
+		 forkdata->extents[7].blockCount;	
+
+	return (forkdata->totalBlocks > blocks);
+}
+
+static int
+count_extent_blocks(int maxblks, HFSPlusExtentRecord extents)
+{
+	int blocks;
+	int i;
+
+	for (i = 0, blocks = 0; i < kHFSPlusExtentDensity; ++i) {
+		/* Ignore obvious bogus extents. */
+		if (extents[i].blockCount > (u_int32_t)maxblks)
+			continue;
+		if (extents[i].startBlock == 0 || extents[i].blockCount == 0)
+			break;
+		blocks += extents[i].blockCount;
+	}
+	return (blocks);
+}
+
diff --git a/core/install b/core/install
new file mode 100755
index 0000000..3975ef9
--- /dev/null
+++ b/core/install
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+#  install
+#  hfs
+#
+#  Created by Chris Suter on 4/30/15.
+#
+
+if [ ! "$MACHINE" ] ; then
+	echo "MACHINE not specified"
+	exit 1
+fi
+
+ROOT=hfs-root.tbz
+
+# Virtual machine stuff
+[ "$VM" != "" -a "$VMRUN" != "" ] && { ping -c 1 -t 2 $MACHINE || {
+		VMX="$HOME/Documents/Virtual Machines.localized/$VM.vmwarevm/$VM.vmx"
+
+		"$VMRUN" revertToSnapshot "$VMX" "Safe" || exit 1
+		"$VMRUN" start "$VMX" || exit 1
+	}
+}
+
+rsync -P "$BUILT_PRODUCTS_DIR/$ROOT" $MACHINE: || exit 1
+
+ssh $MACHINE bash -x -s <<EOF
+sudo darwinup uninstall $ROOT
+sudo darwinup install $ROOT
+sudo kextcache -update-volume /
+sync
+echo Rebooting...
+nohup bash -c "sleep 0.6 ; sudo reboot -q" >/dev/null 2>/dev/null </dev/null &
+sleep 0.3
+EOF
diff --git a/core/iphoneos-Info.plist b/core/iphoneos-Info.plist
new file mode 100644
index 0000000..bbeb2ce
--- /dev/null
+++ b/core/iphoneos-Info.plist
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>KEXT</string>
+	<key>CFBundleShortVersionString</key>
+	<string>HFS_KEXT_VERSION</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>HFS_KEXT_VERSION</string>
+	<key>IOKitPersonalities</key>
+	<dict>
+		<key>com.apple.filesystems.hfs.kext</key>
+		<dict>
+			<key>CFBundleIdentifier</key>
+			<string>com.apple.filesystems.hfs.kext</string>
+			<key>IOClass</key>
+			<string>com_apple_filesystems_hfs</string>
+			<key>IOMatchCategory</key>
+			<string>com_apple_filesystems_hfs</string>
+			<key>IOProviderClass</key>
+			<string>IOResources</string>
+			<key>IOResourceMatch</key>
+			<string>IOBSD</string>
+		</dict>
+	</dict>
+	<key>NSHumanReadableCopyright</key>
+	<string>Copyright Â© 2015 Apple Inc. All rights reserved.</string>
+	<key>OSBundleLibraries</key>
+	<dict>
+		<key>com.apple.kpi.bsd</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.iokit</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.libkern</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.mach</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.private</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.unsupported</key>
+		<string>14.1</string>
+	</dict>
+	<key>OSBundleRequired</key>
+	<string>Local-Root</string>
+</dict>
+</plist>
diff --git a/core/kext-config.h b/core/kext-config.h
new file mode 100644
index 0000000..fa3b3c2
--- /dev/null
+++ b/core/kext-config.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _hfs_config_
+#define _hfs_config_
+
+#include <TargetConditionals.h>
+
+#define HFS_COMPRESSION 1
+#define FIFO 1
+
+// #define HFS_MALLOC_DEBUG 1
+// #define HFS_LEAK_DEBUG 1
+
+#if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR) // iOS (real hardware)
+
+#define QUOTA 0
+#define CONFIG_PROTECT 1
+#define CONFIG_SECLUDED_RENAME 1
+
+
+#else // OS X
+
+#define QUOTA 1
+#define NAMEDSTREAMS 1
+#define CONFIG_HFS_DIRLINK 1
+#define CONFIG_SEARCHFS 1
+
+#endif
+
+#endif /* defined(_hfs_config_) */
diff --git a/core/kext.xcconfig b/core/kext.xcconfig
new file mode 100644
index 0000000..56ab404
--- /dev/null
+++ b/core/kext.xcconfig
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2015 Apple Inc. All rights reserved.
+//
+// @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+//
+// This file contains Original Code and/or Modifications of Original Code
+// as defined in and that are subject to the Apple Public Source License
+// Version 2.0 (the 'License'). You may not use this file except in
+// compliance with the License. The rights granted to you under the License
+// may not be used to create, or enable the creation or redistribution of,
+// unlawful or unlicensed copies of an Apple operating system, or to
+// circumvent, violate, or enable the circumvention or violation of, any
+// terms of an Apple operating system software license agreement.
+//
+// Please obtain a copy of the License at
+// http://www.opensource.apple.com/apsl/ and read it before using this file.
+//
+// The Original Code and all software distributed under the License are
+// distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+// EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+// INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+// Please see the License for the specific language governing rights and
+// limitations under the License.
+//
+// @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+//
+
+// Used if building combined xnu & hfs roots
+//DEVICES=<ios-devices-to-build>
+//MACHINE=<osx-machine-net-address>
+//XNU_PATH=<path-to-xnu>
+//XNU_DST_PATH=$(XNU_PATH)/BUILD/dst
+//KERNEL_PATH=$(XNU_DST_PATH)/System/Library/Kernels/kernel.development
+//KERNEL_FRAMEWORK_PATH=$(XNU_DST_PATH)/System/Library/Frameworks/Kernel.framework
+//HEADER_SEARCH_PATHS=$(KERNEL_FRAMEWORK_PATH)/PrivateHeaders $(KERNEL_FRAMEWORK_PATH)/Headers
+
+// Enable this to see if Clang has any new warnings
+// WARNING_CFLAGS=-Weverything -Wno-unused-parameter -Wno-shorten-64-to-32 -Wno-reserved-id-macro -Wno-undef -Wno-missing-variable-declarations -Wno-padded -Wno-c11-extensions -Wno- documentation -Wno-variadic-macros -Wno-zero-length-array -Wno-documentation-unknown-command -Wno-packed -Wno-pedantic -Wno-format-non-iso -Wno-bad-function-cast -Wno-cast-align -Wno-disabled-macro-expansion -Wno-used-but-marked-unused -Wno-c++98-compat-pedantic -Wno-old-style-cast -Wno-c++98-compat -Wno-vla -Wno-switch-enum -Wno-c++-compat -Wno-global-constructors -Wno-shift-sign-overflow -Wno-covered-switch-default
+
+GCC_PREFIX_HEADER=core/kext-config.h
+GCC_PREPROCESSOR_DEFINITIONS=$(PREPROC_DEFN_$(CONFIGURATION))
+PREPROC_DEFN_Debug=DEBUG
+PRIVATE_HEADERS_FOLDER_PATH=/usr/local/include/hfs
+PUBLIC_HEADERS_FOLDER_PATH=/usr/include/hfs
+OTHER_CFLAGS=$(OTHER_CFLAGS_$(CONFIGURATION))
+OTHER_CFLAGS_Coverage=-fprofile-instr-generate -fcoverage-mapping
+
+BUILD_VARIANTS[sdk=macosx*] = normal kasan
+BUILD_VARIANTS[sdk=iphoneos*] = normal kasan
+CODE_SIGN_IDENTITY = -
+OTHER_CFLAGS_kasan = $(KASAN_DEFAULT_CFLAGS)
diff --git a/core/macosx-Info.plist b/core/macosx-Info.plist
new file mode 100644
index 0000000..7d3fca3
--- /dev/null
+++ b/core/macosx-Info.plist
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>KEXT</string>
+	<key>CFBundleShortVersionString</key>
+	<string>HFS_KEXT_VERSION</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>HFS_KEXT_VERSION</string>
+	<key>IOKitPersonalities</key>
+	<dict>
+		<key>com.apple.filesystems.hfs.kext</key>
+		<dict>
+			<key>CFBundleIdentifier</key>
+			<string>com.apple.filesystems.hfs.kext</string>
+			<key>IOClass</key>
+			<string>com_apple_filesystems_hfs</string>
+			<key>IOMatchCategory</key>
+			<string>com_apple_filesystems_hfs</string>
+			<key>IOProviderClass</key>
+			<string>IOResources</string>
+			<key>IOResourceMatch</key>
+			<string>IOBSD</string>
+		</dict>
+	</dict>
+	<key>NSHumanReadableCopyright</key>
+	<string>Copyright Â© 2015 Apple Inc. All rights reserved.</string>
+	<key>OSBundleLibraries</key>
+	<dict>
+		<key>com.apple.kpi.bsd</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.iokit</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.libkern</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.mach</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.private</key>
+		<string>14.1</string>
+		<key>com.apple.kpi.unsupported</key>
+		<string>14.1</string>
+		<key>com.apple.filesystems.hfs.encodings.kext</key>
+		<string>1.0</string>
+	</dict>
+	<key>OSBundleRequired</key>
+	<string>Local-Root</string>
+</dict>
+</plist>
diff --git a/core/mk-root.sh b/core/mk-root.sh
new file mode 100755
index 0000000..fb62b25
--- /dev/null
+++ b/core/mk-root.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+#  mk-root.sh
+#  hfs
+#
+#  Created by Chris Suter on 5/3/15.
+#
+
+shopt -s nocasematch
+
+set -e
+
+if [[ "$SDKROOT" =~ macosx ]] ; then
+  if [ ! "$KERNEL_PATH" ] ; then
+    KERNEL_PATH=$SDKROOT/System/Library/Kernels/kernel.development
+  fi
+
+  EXTS_PATH="`dirname \"$KERNEL_PATH\"`"/../Extensions
+
+  kextutil -no-load -t -k "$KERNEL_PATH" -no-authentication "$BUILT_PRODUCTS_DIR/HFSEncodings.kext" -d "$EXTS_PATH/System.kext"
+  kextutil -no-load -t -k "$KERNEL_PATH" -no-authentication "$BUILT_PRODUCTS_DIR/HFS.kext" -d "$EXTS_PATH/System.kext" -d "$BUILT_PRODUCTS_DIR/HFSEncodings.kext"
+
+  if [ "$XNU_PATH" ] ; then
+    extra_args=(-C "$XNU_PATH/BUILD/dst" .)
+  fi
+  gnutar --owner 0 --group 0 --transform 's|^([^/]+.kext)|System/Library/Extensions/\1|x' -C "$BUILT_PRODUCTS_DIR" HFS.kext HFSEncodings.kext "${extra_args[@]}" -cjf "$BUILT_PRODUCTS_DIR/hfs-root.tbz"
+  echo "Created $BUILT_PRODUCTS_DIR/hfs-root.tbz"
+  ln -sf $BUILT_PRODUCTS_DIR/hfs-root.tbz /tmp/
+else
+  ~/bin/copy-kernel-cache-builder
+  pushd /tmp/KernelCacheBuilder
+  if [ "$XNU_PATH" ] ; then
+    extra_args=(KERNEL_PATH="$XNU_DST_PATH")
+    extra_kext_paths="$BUILT_PRODUCTS_DIR $XNU_PATH/BUILD/dst/System/Library/Extensions"
+  else
+    extra_kext_paths="$BUILT_PRODUCTS_DIR"
+  fi
+  env -i make TARGETS="$DEVICES" "${extra_args[@]}" BUILDS=development VERBOSE=YES SDKROOT=iphoneos.internal EXTRA_KEXT_PATHS="$BUILT_PRODUCTS_DIR $XNU_PATH/BUILD/dst/System/Library/Extensions" EXTRA_BUNDLES=com.apple.filesystems.hfs.kext 2> >(sed -E '/^.*duplicate BUNDLE_IDS$/d' 1>&2)
+fi
diff --git a/core/rangelist.c b/core/rangelist.c
new file mode 100644
index 0000000..a38ea83
--- /dev/null
+++ b/core/rangelist.c
@@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2001-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/param.h>
+#include <mach/boolean.h>
+#include <sys/time.h>
+#include <sys/malloc.h>
+
+#if !RANGELIST_TEST
+#include <kern/debug.h>
+#include "hfs.h"
+#endif
+
+#include "rangelist.h"
+
+static enum rl_overlaptype rl_scan_from(struct rl_head *rangelist, off_t start, off_t end, struct rl_entry **overlap, struct rl_entry *range);
+static void rl_collapse_forwards(struct rl_head *rangelist, struct rl_entry *range);
+static void rl_collapse_backwards(struct rl_head *rangelist, struct rl_entry *range);
+static void rl_collapse_neighbors(struct rl_head *rangelist, struct rl_entry *range);
+
+
+#ifdef RL_DIAGNOSTIC
+static void
+rl_verify(struct rl_head *rangelist) {
+	struct rl_entry *entry;
+	struct rl_entry *next;
+	off_t limit = 0;
+	
+	TAILQ_FOREACH_SAFE(rangelist, entry, rl_link, next) {
+		if ((limit > 0) && (entry->rl_start <= limit)) panic("hfs: rl_verify: bad entry start?!");
+		if (entry->rl_end < entry->rl_start) panic("hfs: rl_verify: bad entry end?!");
+		limit = entry->rl_end;
+	};
+}
+#endif
+
+
+
+/*
+ * Initialize a range list head
+ */
+void
+rl_init(struct rl_head *rangelist)
+{
+    TAILQ_INIT(rangelist);
+}
+
+/*
+ * Add a range to the list
+ */
+void
+rl_add(off_t start, off_t end, struct rl_head *rangelist)
+{
+	struct rl_entry *range;
+	struct rl_entry *overlap;
+	enum rl_overlaptype ovcase;
+
+#ifdef RL_DIAGNOSTIC
+	if (end < start) panic("hfs: rl_add: end < start?!");
+#endif
+
+	ovcase = rl_scan(rangelist, start, end, &overlap);
+			
+	/*
+	 * Six cases:
+	 *	0) no overlap
+	 *	1) overlap == range
+	 *	2) overlap contains range
+	 *	3) range contains overlap
+	 *	4) overlap starts before range
+	 *	5) overlap ends after range
+	 */
+	switch (ovcase) {
+		case RL_NOOVERLAP: /* 0: no overlap */
+			/*
+			 * overlap points to the entry we should insert before, or
+			 * if NULL, we should insert at the end.
+			 */
+			range = hfs_malloc(sizeof(*range));
+			range->rl_start = start;
+			range->rl_end = end;
+			
+			/* Link in the new range: */
+			if (overlap) {
+				TAILQ_INSERT_BEFORE(overlap, range, rl_link);
+			} else {
+				TAILQ_INSERT_TAIL(rangelist, range, rl_link);
+			}
+			
+			/* Check to see if any ranges can be combined (possibly including the immediately
+			   preceding range entry)
+			 */
+			rl_collapse_neighbors(rangelist, range);
+			break;
+
+		case RL_MATCHINGOVERLAP: /* 1: overlap == range */
+		case RL_OVERLAPCONTAINSRANGE: /* 2: overlap contains range */
+			break;
+
+		case RL_OVERLAPISCONTAINED: /* 3: range contains overlap */
+			/*
+			 * Replace the overlap with the new, larger range:
+			 */
+			overlap->rl_start = start;
+			overlap->rl_end = end;
+			rl_collapse_neighbors(rangelist, overlap);
+			break;
+
+		case RL_OVERLAPSTARTSBEFORE: /* 4: overlap starts before range */
+			/*
+			 * Expand the overlap area to cover the new range:
+			 */
+			overlap->rl_end = end;
+			rl_collapse_forwards(rangelist, overlap);
+			break;
+
+		case RL_OVERLAPENDSAFTER: /* 5: overlap ends after range */
+			/*
+			 * Expand the overlap area to cover the new range:
+			 */
+			overlap->rl_start = start;
+			rl_collapse_backwards(rangelist, overlap);
+			break;
+	}
+
+#ifdef RL_DIAGNOSTIC
+	rl_verify(rangelist);
+#endif
+}
+
+
+
+/*
+ * Remove a range from a range list.
+ *
+ * Generally, find the range (or an overlap to that range)
+ * and remove it (or shrink it), then wakeup anyone we can.
+ */
+void
+rl_remove(off_t start, off_t end, struct rl_head *rangelist)
+{
+	struct rl_entry *range, *next_range, *overlap, *splitrange;
+	int ovcase;
+
+#ifdef RL_DIAGNOSTIC
+	if (end < start) panic("hfs: rl_remove: end < start?!");
+#endif
+
+	if (TAILQ_EMPTY(rangelist)) {
+		return;
+	};
+
+	range = TAILQ_FIRST(rangelist);
+	while ((ovcase = rl_scan_from(rangelist, start, end, &overlap, range))) {
+		switch (ovcase) {
+
+		case RL_MATCHINGOVERLAP: /* 1: overlap == range */
+			TAILQ_REMOVE(rangelist, overlap, rl_link);
+			hfs_free(overlap, sizeof(*overlap));
+			break;
+
+		case RL_OVERLAPCONTAINSRANGE: /* 2: overlap contains range: split it */
+			if (overlap->rl_start == start) {
+				overlap->rl_start = end + 1;
+				break;
+			};
+			
+			if (overlap->rl_end == end) {
+				overlap->rl_end = start - 1;
+				break;
+			};
+			
+			/*
+			* Make a new range consisting of the last part of the encompassing range
+			*/
+			splitrange = hfs_malloc(sizeof *splitrange);
+			splitrange->rl_start = end + 1;
+			splitrange->rl_end = overlap->rl_end;
+			overlap->rl_end = start - 1;
+			
+			/*
+			* Now link the new entry into the range list after the range from which it was split:
+			*/
+			TAILQ_INSERT_AFTER(rangelist, overlap, splitrange, rl_link);
+			break;
+
+		case RL_OVERLAPISCONTAINED: /* 3: range contains overlap */
+			/* Check before discarding overlap entry */
+			next_range = TAILQ_NEXT(overlap, rl_link);
+			TAILQ_REMOVE(rangelist, overlap, rl_link);
+			hfs_free(overlap, sizeof(*overlap));
+			if (next_range) {
+				range = next_range;
+				continue;
+			};
+			break;
+
+		case RL_OVERLAPSTARTSBEFORE: /* 4: overlap starts before range */
+			overlap->rl_end = start - 1;
+			range = TAILQ_NEXT(overlap, rl_link);
+			if (range) {
+				continue;
+			}
+			break;
+
+		case RL_OVERLAPENDSAFTER: /* 5: overlap ends after range */
+			overlap->rl_start = (end == RL_INFINITY ? RL_INFINITY : end + 1);
+			break;
+		}
+		break;
+	}
+
+#ifdef RL_DIAGNOSTIC
+	rl_verify(rangelist);
+#endif
+}
+
+
+
+/*
+ * Scan a range list for an entry in a specified range (if any):
+ *
+ * NOTE: this returns only the FIRST overlapping range.
+ *	     There may be more than one.
+ */
+
+enum rl_overlaptype
+rl_scan(struct rl_head *rangelist,
+		off_t start,
+		off_t end,
+		struct rl_entry **overlap) {
+
+	return rl_scan_from(rangelist, start, end, overlap, TAILQ_FIRST(rangelist));	
+}
+
+enum rl_overlaptype
+rl_overlap(const struct rl_entry *range, off_t start, off_t end)
+{
+	/*
+	 * OK, check for overlap
+	 *
+	 * Six cases:
+	 *	0) no overlap (RL_NOOVERLAP)
+	 *	1) overlap == range (RL_MATCHINGOVERLAP)
+	 *	2) overlap contains range (RL_OVERLAPCONTAINSRANGE)
+	 *	3) range contains overlap (RL_OVERLAPISCONTAINED)
+	 *	4) overlap starts before range (RL_OVERLAPSTARTSBEFORE)
+	 *	5) overlap ends after range (RL_OVERLAPENDSAFTER)
+	 */
+	if (start > range->rl_end || range->rl_start > end) {
+		/* Case 0 (RL_NOOVERLAP) */
+		return RL_NOOVERLAP;
+	}
+
+	if (range->rl_start == start && range->rl_end == end) {
+		/* Case 1 (RL_MATCHINGOVERLAP) */
+		return RL_MATCHINGOVERLAP;
+	}
+
+	if (range->rl_start <= start && range->rl_end >= end) {
+		/* Case 2 (RL_OVERLAPCONTAINSRANGE) */
+		return RL_OVERLAPCONTAINSRANGE;
+	}
+
+	if (start <= range->rl_start && end >= range->rl_end) {
+		/* Case 3 (RL_OVERLAPISCONTAINED) */
+		return RL_OVERLAPISCONTAINED;
+	}
+
+	if (range->rl_start < start && range->rl_end < end) {
+		/* Case 4 (RL_OVERLAPSTARTSBEFORE) */
+		return RL_OVERLAPSTARTSBEFORE;
+	}
+
+	/* Case 5 (RL_OVERLAPENDSAFTER) */
+	// range->rl_start > start && range->rl_end > end
+	return RL_OVERLAPENDSAFTER;
+}
+
+/*
+ * Walk the list of ranges for an entry to
+ * find an overlapping range (if any).
+ *
+ * NOTE: this returns only the FIRST overlapping range.
+ *	     There may be more than one.
+ */
+static enum rl_overlaptype
+rl_scan_from(struct rl_head *rangelist __unused,
+			 off_t start,
+			 off_t end,
+			 struct rl_entry **overlap,
+			 struct rl_entry *range)
+{
+#ifdef RL_DIAGNOSTIC
+	rl_verify(rangelist);
+#endif
+
+	while (range) {
+		enum rl_overlaptype ot = rl_overlap(range, start, end);
+
+		if (ot != RL_NOOVERLAP || range->rl_start > end) {
+			*overlap = range;
+			return ot;
+		}
+
+		range = TAILQ_NEXT(range, rl_link);
+	}
+
+	*overlap = NULL;
+	return RL_NOOVERLAP;
+}
+
+
+static void
+rl_collapse_forwards(struct rl_head *rangelist, struct rl_entry *range) {
+	struct rl_entry *next_range;
+	
+	while ((next_range = TAILQ_NEXT(range, rl_link))) { 
+		if ((range->rl_end != RL_INFINITY) && (range->rl_end < next_range->rl_start - 1)) return;
+
+		/* Expand this range to include the next range: */
+		range->rl_end = next_range->rl_end;
+
+		/* Remove the now covered range from the list: */
+		TAILQ_REMOVE(rangelist, next_range, rl_link);
+		hfs_free(next_range, sizeof(*next_range));
+
+#ifdef RL_DIAGNOSTIC
+		rl_verify(rangelist);
+#endif
+	};
+}
+
+
+
+static void
+rl_collapse_backwards(struct rl_head *rangelist, struct rl_entry *range) {
+    struct rl_entry *prev_range;
+    
+		while ((prev_range = TAILQ_PREV(range, rl_head, rl_link))) {
+			if (prev_range->rl_end < range->rl_start -1) {
+#ifdef RL_DIAGNOSTIC
+			rl_verify(rangelist);
+#endif
+        	return;
+        };
+        
+        /* Expand this range to include the previous range: */
+        range->rl_start = prev_range->rl_start;
+    
+        /* Remove the now covered range from the list: */
+        TAILQ_REMOVE(rangelist, prev_range, rl_link);
+        hfs_free(prev_range, sizeof(*prev_range));
+    };
+}
+
+
+
+static void
+rl_collapse_neighbors(struct rl_head *rangelist, struct rl_entry *range)
+{
+    rl_collapse_forwards(rangelist, range);
+    rl_collapse_backwards(rangelist, range);
+}
+
+void rl_remove_all(struct rl_head *rangelist)
+{
+	struct rl_entry *r, *nextr;
+	TAILQ_FOREACH_SAFE(r, rangelist, rl_link, nextr)
+		hfs_free(r, sizeof(*r));
+	TAILQ_INIT(rangelist);
+}
+
+/*
+ * In the case where b is contained by a, we return the the largest part
+ * remaining.  The result is stored in a.
+ */
+void rl_subtract(struct rl_entry *a, const struct rl_entry *b)
+{
+	switch (rl_overlap(b, a->rl_start, a->rl_end)) {
+		case RL_MATCHINGOVERLAP:
+		case RL_OVERLAPCONTAINSRANGE:
+			a->rl_end = a->rl_start - 1;
+			break;
+		case RL_OVERLAPISCONTAINED:
+			// Keep the bigger part
+			if (b->rl_start - a->rl_start >= a->rl_end - b->rl_end) {
+				// Keep left
+				a->rl_end = b->rl_start - 1;
+			} else {
+				// Keep right
+				a->rl_start = b->rl_end + 1;
+			}
+			break;
+		case RL_OVERLAPSTARTSBEFORE:
+			a->rl_start = b->rl_end + 1;
+			break;
+		case RL_OVERLAPENDSAFTER:
+			a->rl_end = b->rl_start - 1;
+			break;
+		case RL_NOOVERLAP:
+			break;
+	}
+}
diff --git a/core/rangelist.h b/core/rangelist.h
new file mode 100644
index 0000000..b0bb7d1
--- /dev/null
+++ b/core/rangelist.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2001-2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _HFS_RANGELIST_H_
+#define _HFS_RANGELIST_H_
+
+#include <sys/appleapiopts.h>
+
+#ifdef KERNEL
+#ifdef __APPLE_API_PRIVATE
+#include <sys/types.h>
+#include <sys/queue.h>
+
+enum rl_overlaptype {
+    RL_NOOVERLAP = 0,		/* 0 */
+    RL_MATCHINGOVERLAP,		/* 1 */
+    RL_OVERLAPCONTAINSRANGE,	/* 2 */
+    RL_OVERLAPISCONTAINED,	/* 3 */
+    RL_OVERLAPSTARTSBEFORE,	/* 4 */
+    RL_OVERLAPENDSAFTER		/* 5 */
+};
+
+#define RL_INFINITY INT64_MAX
+
+TAILQ_HEAD(rl_head, rl_entry);
+
+struct rl_entry {
+    TAILQ_ENTRY(rl_entry) rl_link;
+    off_t rl_start;
+    off_t rl_end;
+};
+
+__BEGIN_DECLS
+void rl_init(struct rl_head *rangelist);
+void rl_add(off_t start, off_t end, struct rl_head *rangelist);
+void rl_remove(off_t start, off_t end, struct rl_head *rangelist);
+void rl_remove_all(struct rl_head *rangelist);
+enum rl_overlaptype rl_scan(struct rl_head *rangelist,
+							off_t start,
+							off_t end,
+							struct rl_entry **overlap);
+enum rl_overlaptype rl_overlap(const struct rl_entry *range, 
+							   off_t start, off_t end);
+
+static __attribute__((pure)) inline
+off_t rl_len(const struct rl_entry *range)
+{
+	return range->rl_end - range->rl_start + 1;
+}
+
+void rl_subtract(struct rl_entry *a, const struct rl_entry *b);
+
+static inline struct rl_entry rl_make(off_t start, off_t end)
+{
+	return (struct rl_entry){ .rl_start = start, .rl_end = end };
+}
+
+__END_DECLS
+
+#endif /* __APPLE_API_PRIVATE */
+#endif /* KERNEL */
+#endif /* ! _HFS_RANGELIST_H_ */
diff --git a/hfs.xcodeproj/project.pbxproj b/hfs.xcodeproj/project.pbxproj
index a6b84e0..ace6a65 100644
--- a/hfs.xcodeproj/project.pbxproj
+++ b/hfs.xcodeproj/project.pbxproj
@@ -344,6 +344,7 @@
 		D7978426205FC09A00E93B37 /* lf_hfs_endian.h in Headers */ = {isa = PBXBuildFile; fileRef = D7978424205FC09A00E93B37 /* lf_hfs_endian.h */; };
 		D79784412060037400E93B37 /* lf_hfs_raw_read_write.h in Headers */ = {isa = PBXBuildFile; fileRef = D797843F2060037400E93B37 /* lf_hfs_raw_read_write.h */; };
 		D79784422060037400E93B37 /* lf_hfs_raw_read_write.c in Sources */ = {isa = PBXBuildFile; fileRef = D79784402060037400E93B37 /* lf_hfs_raw_read_write.c */; };
+		D7B2DC81233A3F5B00F12230 /* livefiles_hfs.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 900BDED41FF919C2002F7EC0 /* livefiles_hfs.dylib */; };
 		D7BD8F9C20AC388E00E93640 /* lf_hfs_catalog.c in Sources */ = {isa = PBXBuildFile; fileRef = 906EBF82206409B800B21E94 /* lf_hfs_catalog.c */; };
 		EE73740520644328004C2F0E /* lf_hfs_sbunicode.h in Headers */ = {isa = PBXBuildFile; fileRef = EE73740320644328004C2F0E /* lf_hfs_sbunicode.h */; };
 		EE73740620644328004C2F0E /* lf_hfs_sbunicode.c in Sources */ = {isa = PBXBuildFile; fileRef = EE73740420644328004C2F0E /* lf_hfs_sbunicode.c */; };
@@ -1284,7 +1285,7 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				9022D171205EC18500D9A2AE /* livefiles_hfs.dylib in Frameworks */,
+				D7B2DC81233A3F5B00F12230 /* livefiles_hfs.dylib in Frameworks */,
 				9022D170205EC16900D9A2AE /* CoreFoundation.framework in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
diff --git a/hfs.xcodeproj/xcshareddata/xcschemes/livefiles_hfs_tester.xcscheme b/hfs.xcodeproj/xcshareddata/xcschemes/livefiles_hfs_tester.xcscheme
index 965e177..8413325 100644
--- a/hfs.xcodeproj/xcshareddata/xcschemes/livefiles_hfs_tester.xcscheme
+++ b/hfs.xcodeproj/xcshareddata/xcschemes/livefiles_hfs_tester.xcscheme
@@ -27,8 +27,6 @@
       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
       shouldUseLaunchSchemeArgsEnv = "YES">
-      <Testables>
-      </Testables>
       <MacroExpansion>
          <BuildableReference
             BuildableIdentifier = "primary"
@@ -38,8 +36,8 @@
             ReferencedContainer = "container:hfs.xcodeproj">
          </BuildableReference>
       </MacroExpansion>
-      <AdditionalOptions>
-      </AdditionalOptions>
+      <Testables>
+      </Testables>
    </TestAction>
    <LaunchAction
       buildConfiguration = "Debug"
@@ -67,8 +65,6 @@
             isEnabled = "YES">
          </CommandLineArgument>
       </CommandLineArguments>
-      <AdditionalOptions>
-      </AdditionalOptions>
    </LaunchAction>
    <ProfileAction
       buildConfiguration = "Release"
diff --git a/livefiles_hfs_plugin/lf_hfs_btree_node_ops.c b/livefiles_hfs_plugin/lf_hfs_btree_node_ops.c
index e0df514..0bed265 100644
--- a/livefiles_hfs_plugin/lf_hfs_btree_node_ops.c
+++ b/livefiles_hfs_plugin/lf_hfs_btree_node_ops.c
@@ -122,7 +122,7 @@ OSStatus    GetNode        (BTreeControlBlockPtr     btreePtr,
     return noErr;
 
 ErrorExit:
-    nodePtr->buffer            = nil;
+    nodePtr->buffer         = nil;
     nodePtr->blockHeader    = nil;
 
     return    err;
diff --git a/livefiles_hfs_plugin/lf_hfs_btree_tree_ops.c b/livefiles_hfs_plugin/lf_hfs_btree_tree_ops.c
index ecbe3fe..c3accc3 100644
--- a/livefiles_hfs_plugin/lf_hfs_btree_tree_ops.c
+++ b/livefiles_hfs_plugin/lf_hfs_btree_tree_ops.c
@@ -251,7 +251,7 @@ ReleaseAndExit:
 ErrorExit:
 
     *nodeNum                    = 0;
-    nodePtr->buffer                = nil;
+    nodePtr->buffer             = nil;
     nodePtr->blockHeader        = nil;
     *returnIndex                = 0;
     return    err;
diff --git a/livefiles_hfs_plugin/lf_hfs_chash.c b/livefiles_hfs_plugin/lf_hfs_chash.c
index 70d99ca..60f4fd5 100644
--- a/livefiles_hfs_plugin/lf_hfs_chash.c
+++ b/livefiles_hfs_plugin/lf_hfs_chash.c
@@ -181,7 +181,7 @@ loop_with_lock:
             }
         }
         vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp;
-
+        
         /*
          * Skip cnodes that are not in the name space anymore
          * we need to check with the cnode lock held because
@@ -193,9 +193,10 @@ loop_with_lock:
          * is no longer valid for lookups.
          */
         if (((cp->c_flag & (C_NOEXISTS | C_DELETED)) && !wantrsrc) ||
-            (cp->uOpenLookupRefCount == 0) ||
+            ((vp != NULL) &&
+            ((cp->uOpenLookupRefCount == 0) ||
             (vp->uValidNodeMagic1 == VALID_NODE_BADMAGIC) ||
-            (vp->uValidNodeMagic2 == VALID_NODE_BADMAGIC))
+            (vp->uValidNodeMagic2 == VALID_NODE_BADMAGIC))))
         {
             int renamed = 0;
             if (cp->c_flag & C_RENAMED)
diff --git a/livefiles_hfs_plugin/lf_hfs_cnode.c b/livefiles_hfs_plugin/lf_hfs_cnode.c
index be2582d..2e15868 100644
--- a/livefiles_hfs_plugin/lf_hfs_cnode.c
+++ b/livefiles_hfs_plugin/lf_hfs_cnode.c
@@ -465,36 +465,6 @@ hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
             }
         }
     }
-#if LF_HFS_FULL_VNODE_SUPPORT
-    if (tvp != NULL)
-    {
-        /*
-         * grab an iocount on the vnode we weren't
-         * interested in (i.e. we want the resource fork
-         * but the cnode already has the data fork)
-         * to prevent it from being
-         * recycled by us when we call vnode_create
-         * which will result in a deadlock when we
-         * try to take the cnode lock in hfs_vnop_fsync or
-         * hfs_vnop_reclaim... vnode_get can be called here
-         * because we already hold the cnode lock which will
-         * prevent the vnode from changing identity until
-         * we drop it.. vnode_get will not block waiting for
-         * a change of state... however, it will return an
-         * error if the current iocount == 0 and we've already
-         * started to terminate the vnode... we don't need/want to
-         * grab an iocount in the case since we can't cause
-         * the fileystem to be re-entered on this thread for this vp
-         *
-         * the matching vnode_put will happen in hfs_unlock
-         * after we've dropped the cnode lock
-         */
-        if ( vnode_get(tvp) != 0)
-        {
-            cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
-        }
-    }
-#endif
 
     vfsp.vnfs_mp = mp;
     vfsp.vnfs_vtype = vtype;
@@ -671,7 +641,7 @@ hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
      */
     if (vp && VNODE_IS_RSRC(vp))
     {
-        vnode_rele(vp);
+        vp->is_rsrc = true;
     }
     hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
 
@@ -937,32 +907,6 @@ hfs_unlock(struct cnode *cp)
         cp->c_lockowner = NULL;
         lf_lck_rw_unlock_shared(&cp->c_rwlock);
     }
-
-#if LF_HFS_FULL_VNODE_SUPPORT
-    /* Perform any vnode post processing after cnode lock is dropped. */
-    if (vp)
-    {
-        if (c_flag & C_NEED_DATA_SETSIZE)
-        {
-            ubc_setsize(vp, VTOF(vp)->ff_size);
-        }
-        if (c_flag & C_NEED_DVNODE_PUT)
-        {
-            vnode_put(vp);
-        }
-    }
-    if (rvp)
-    {
-        if (c_flag & C_NEED_RSRC_SETSIZE)
-        {
-            ubc_setsize(rvp, VTOF(rvp)->ff_size);
-        }
-        if (c_flag & C_NEED_RVNODE_PUT)
-        {
-            vnode_put(rvp);
-        }
-    }
-#endif
 }
 
 /*
@@ -1900,47 +1844,13 @@ out:
     return error;
 }
 
-
-/*
- * Reclaim a cnode so that it can be used for other purposes.
- */
 int
-hfs_vnop_reclaim(struct vnode *vp)
+hfs_fork_release(struct cnode* cp, struct vnode *vp, bool bIsRsc, int* piErr)
 {
-    struct cnode* cp = VTOC(vp);
+    struct hfsmount *hfsmp = VTOHFS(vp);
     struct filefork *fp = NULL;
     struct filefork *altfp = NULL;
-    struct hfsmount *hfsmp = VTOHFS(vp);
     int reclaim_cnode = 0;
-    int err = 0;
-
-    /*
-     * We don't take the truncate lock since by the time reclaim comes along,
-     * all dirty pages have been synced and nobody should be competing
-     * with us for this thread.
-     */
-    hfs_chash_mark_in_transit(hfsmp, cp);
-
-    hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
-    lf_hfs_generic_buf_cache_LockBufCache();
-
-    //In case we have other open lookups
-    //We need to decrease the counter and exit
-    if (cp->uOpenLookupRefCount > 1)
-    {
-        hfs_chash_lower_OpenLookupCounter(cp);
-        hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
-        lf_hfs_generic_buf_cache_UnLockBufCache();
-        hfs_unlock(cp);
-        return err;
-    }
-    
-    if (cp->uOpenLookupRefCount == 0) assert(0);
-    
-    hfs_chash_lower_OpenLookupCounter(cp);
-    lf_hfs_generic_buf_cache_remove_vnode(vp);
-
-    lf_hfs_generic_buf_cache_UnLockBufCache();
     
     /*
      * Sync to disk any remaining data in the cnode/vnode.  This includes
@@ -1950,12 +1860,12 @@ hfs_vnop_reclaim(struct vnode *vp)
      * because the catalog entry for this cnode is already gone.
      */
     INVALIDATE_NODE(vp);
-
+    
     if (!ISSET(cp->c_flag, C_NOEXISTS)) {
-        err = hfs_cnode_teardown(vp, 1);
-        if (err)
+        *piErr = hfs_cnode_teardown(vp, 1);
+        if (*piErr)
         {
-            return err;
+            return 0;
         }
     }
     
@@ -1966,28 +1876,21 @@ hfs_vnop_reclaim(struct vnode *vp)
         hfs_free(vp->sFSParams.vnfs_cnp);
     }
     
-
-    /*
-     * Find file fork for this vnode (if any)
-     * Also check if another fork is active
-     */
-    if (cp->c_vp == vp) {
+    
+    if (!bIsRsc) {
         fp = cp->c_datafork;
         altfp = cp->c_rsrcfork;
-
+        
         cp->c_datafork = NULL;
         cp->c_vp = NULL;
-    } else if (cp->c_rsrc_vp == vp) {
+    } else {
         fp = cp->c_rsrcfork;
         altfp = cp->c_datafork;
-
+        
         cp->c_rsrcfork = NULL;
         cp->c_rsrc_vp = NULL;
-    } else {
-        LFHFS_LOG(LEVEL_ERROR, "hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
-        hfs_assert(0);
     }
-
+    
     /*
      * On the last fork, remove the cnode from its hash chain.
      */
@@ -2001,11 +1904,12 @@ hfs_vnop_reclaim(struct vnode *vp)
         if (vnode_isdir(vp)) {
             hfs_reldirhints(cp, 0);
         }
-
+        
         if(cp->c_flag & C_HARDLINK) {
             hfs_relorigins(cp);
         }
     }
+    
     /* Release the file fork and related data */
     if (fp)
     {
@@ -2016,6 +1920,80 @@ hfs_vnop_reclaim(struct vnode *vp)
         rl_remove_all(&fp->ff_invalidranges);
         hfs_free(fp);
     }
+    
+    return reclaim_cnode;
+}
+
+
+/*
+ * Reclaim a cnode so that it can be used for other purposes.
+ */
+int
+hfs_vnop_reclaim(struct vnode *vp)
+{
+    struct cnode* cp = VTOC(vp);
+    struct hfsmount *hfsmp = VTOHFS(vp);
+    struct vnode *altvp = NULL;
+    int reclaim_cnode = 0;
+    int err = 0;
+
+    /*
+     * We don't take the truncate lock since by the time reclaim comes along,
+     * all dirty pages have been synced and nobody should be competing
+     * with us for this thread.
+     */
+    hfs_chash_mark_in_transit(hfsmp, cp);
+
+    hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+    lf_hfs_generic_buf_cache_LockBufCache();
+
+    //In case we have other open lookups
+    //We need to decrease the counter and exit
+    if (cp->uOpenLookupRefCount > 1)
+    {
+        hfs_chash_lower_OpenLookupCounter(cp);
+        hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
+        lf_hfs_generic_buf_cache_UnLockBufCache();
+        hfs_unlock(cp);
+        return err;
+    }
+    
+    if (cp->uOpenLookupRefCount == 0) assert(0);
+    
+    hfs_chash_lower_OpenLookupCounter(cp);
+    lf_hfs_generic_buf_cache_remove_vnode(vp);
+
+    lf_hfs_generic_buf_cache_UnLockBufCache();
+    
+    /*
+     * Find file fork for this vnode (if any)
+     * Also check if another fork is active
+     */
+    if (cp->c_vp == vp) {
+        
+        reclaim_cnode = hfs_fork_release(cp, vp, false, &err);
+        if (err) return err;
+    
+        if (!reclaim_cnode && cp->c_rsrc_vp != NULL)
+        {
+            altvp = cp->c_rsrc_vp;
+            reclaim_cnode = hfs_fork_release(cp, altvp, true, &err);
+            if (err) return err;
+        }
+    } else if (cp->c_rsrc_vp == vp) {
+        reclaim_cnode = hfs_fork_release(cp, vp, true, &err);
+        if (err) return err;
+        
+        if (!reclaim_cnode && cp->c_vp != NULL)
+        {
+            altvp = cp->c_vp;
+            reclaim_cnode = hfs_fork_release(cp, altvp, false, &err);
+            if (err) return err;
+        }
+    } else {
+        LFHFS_LOG(LEVEL_ERROR, "hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
+        hfs_assert(0);
+    }
 
     /*
      * If there was only one active fork then we can release the cnode.
@@ -2035,6 +2013,9 @@ hfs_vnop_reclaim(struct vnode *vp)
     }
     
     hfs_free(vp);
+    if (altvp)
+        hfs_free(altvp);
+    
     vp = NULL;
     return (0);
 }
diff --git a/livefiles_hfs_plugin/lf_hfs_common.h b/livefiles_hfs_plugin/lf_hfs_common.h
index 2150b09..ee9fb95 100644
--- a/livefiles_hfs_plugin/lf_hfs_common.h
+++ b/livefiles_hfs_plugin/lf_hfs_common.h
@@ -35,13 +35,14 @@
 
 typedef struct
 {
-    int iFD;    // File descriptor as received from usbstoraged
-
+    int      iFD;         // File descriptor as received from usbstoraged
+    unsigned uUnmountHint; // Unmount hint (passed on in LFHFS_UNMOUNT, cleared on LFHFS_MOUNT)
 } FileSystemRecord_s;
 
 #define    VPTOFSRECORD(vp) (vp->sFSParams.vnfs_mp->psHfsmount->hfs_devvp->psFSRecord)
 
-#define    VNODE_TO_IFD(vp) ((vp->bIsMountVnode)? (vp->psFSRecord->iFD) : ((VPTOFSRECORD(vp))->iFD))
+#define    VNODE_TO_IFD(vp)          ((vp->bIsMountVnode)? (vp->psFSRecord->iFD)          : ((VPTOFSRECORD(vp))->iFD))
+#define    VNODE_TO_UNMOUNT_HINT(vp) ((vp->bIsMountVnode)? (vp->psFSRecord->uUnmountHint) : ((VPTOFSRECORD(vp))->uUnmountHint))
 
 /* Macros to clear/set/test flags. */
 #define    SET(t, f)    (t) |= (f)
diff --git a/livefiles_hfs_plugin/lf_hfs_dirops_handler.c b/livefiles_hfs_plugin/lf_hfs_dirops_handler.c
index befe576..e13ec2a 100644
--- a/livefiles_hfs_plugin/lf_hfs_dirops_handler.c
+++ b/livefiles_hfs_plugin/lf_hfs_dirops_handler.c
@@ -201,7 +201,7 @@ exit:
 }
 
 int
-LFHFS_Remove ( UVFSFileNode psDirNode, const char *pcUTF8Name, __unused  UVFSFileNode victimNode)
+LFHFS_Remove ( UVFSFileNode psDirNode, const char *pcUTF8Name, __unused UVFSFileNode victimNode)
 {
     LFHFS_LOG(LEVEL_DEBUG, "LFHFS_Remove\n");
     VERIFY_NODE_IS_VALID(psDirNode);
diff --git a/livefiles_hfs_plugin/lf_hfs_endian.c b/livefiles_hfs_plugin/lf_hfs_endian.c
index 9e4a7d0..9ed6591 100644
--- a/livefiles_hfs_plugin/lf_hfs_endian.c
+++ b/livefiles_hfs_plugin/lf_hfs_endian.c
@@ -228,10 +228,7 @@ hfs_swap_BTNode (
     if ((srcDesc->kind == kBTIndexNode) ||
         (srcDesc->kind == kBTLeafNode)) {
 
-        if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) {
-            error = hfs_swap_HFSPlusBTInternalNode (src, VTOC(vp)->c_fileid, direction);
-        }
-
+        error = hfs_swap_HFSPlusBTInternalNode (src, VTOC(vp)->c_fileid, direction);
         if (error) goto fail;
 
     } else if (srcDesc-> kind == kBTMapNode) {
diff --git a/livefiles_hfs_plugin/lf_hfs_file_extent_mapping.c b/livefiles_hfs_plugin/lf_hfs_file_extent_mapping.c
index 2a680f8..14c58c4 100644
--- a/livefiles_hfs_plugin/lf_hfs_file_extent_mapping.c
+++ b/livefiles_hfs_plugin/lf_hfs_file_extent_mapping.c
@@ -446,7 +446,7 @@ static OSErr TruncateExtents(
     Boolean                releasedLastExtent;
     u_int32_t            hint;
     HFSPlusExtentKey    key;
-    HFSPlusExtentRecord    extents = {0};
+    HFSPlusExtentRecord    extents = {{0}};
     int  lockflags;
 
     /*
@@ -1397,6 +1397,7 @@ static OSErr SearchExtentRecord(
                                 u_int32_t                *endingFABNPlusOne,
                                 Boolean                    *noMoreExtents)
 {
+#pragma unused (vcb)
     OSErr    err = noErr;
     u_int32_t    extentIndex;
     /* Set it to the HFS std value */
diff --git a/livefiles_hfs_plugin/lf_hfs_fileops_handler.c b/livefiles_hfs_plugin/lf_hfs_fileops_handler.c
index 44aee23..edeae7e 100644
--- a/livefiles_hfs_plugin/lf_hfs_fileops_handler.c
+++ b/livefiles_hfs_plugin/lf_hfs_fileops_handler.c
@@ -654,3 +654,105 @@ int LFHFS_ListXAttr ( UVFSFileNode psNode, void *pvOutBuf, size_t iBufSize, size
 
     return iErr;
 }
+
+int
+LFHFS_StreamLookup ( UVFSFileNode psFileNode, UVFSStreamNode *ppsOutNode )
+{
+    LFHFS_LOG(LEVEL_DEBUG, "LFHFS_StreamLookup\n");
+    VERIFY_NODE_IS_VALID(psFileNode);
+    
+    vnode_t psVnode = (vnode_t)psFileNode;
+    vnode_t psRscVnode = NULL;
+    
+    if (IS_DIR(psVnode)) {
+        return EISDIR;
+    }
+    
+    int iError = hfs_vgetrsrc(psVnode, &psRscVnode);
+    
+    if (!iError)
+        hfs_unlock (VTOC(psRscVnode));
+    
+    *ppsOutNode = (UVFSStreamNode) psRscVnode;
+    
+    return iError;
+}
+
+int
+LFHFS_StreamReclaim (UVFSStreamNode psStreamNode )
+{
+    LFHFS_LOG(LEVEL_DEBUG, "LFHFS_StreamReclaim\n");
+    
+    int iError = 0;
+    vnode_t psVnode = (vnode_t) psStreamNode;
+
+    if ( psVnode != NULL )
+    {
+        VERIFY_NODE_IS_VALID_FOR_RECLAIM(psVnode);
+        
+        iError = hfs_vnop_reclaim(psVnode);
+        psVnode = NULL;
+    }
+    
+    return iError;
+}
+
+int
+LFHFS_StreamRead (UVFSStreamNode psStreamNode, uint64_t uOffset, size_t iLength, void *pvBuf, size_t *iActuallyRead )
+{
+    LFHFS_LOG(LEVEL_DEBUG, "LFHFS_StreamRead  (psNode %p, uOffset %llu, iLength %lu)\n", psStreamNode, uOffset, iLength);
+    VERIFY_NODE_IS_VALID(psStreamNode);
+    
+    struct vnode *vp = (vnode_t)psStreamNode;
+    struct cnode *cp;
+    struct filefork *fp;
+    uint64_t filesize;
+    int retval = 0;
+    int took_truncate_lock = 0;
+    *iActuallyRead = 0;
+    
+    /* Preflight checks */
+    if (!vnode_isreg(vp)) {
+        /* can only read regular files */
+        return ( vnode_isdir(vp) ? EISDIR : EPERM );
+    }
+    
+    cp = VTOC(vp);
+    fp = VTOF(vp);
+    
+    /* Protect against a size change. */
+    hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
+    took_truncate_lock = 1;
+    
+    filesize = fp->ff_size;
+    /*
+     * Check the file size. Note that per POSIX spec, we return 0 at
+     * file EOF, so attempting a read at an offset that is too big
+     * should just return 0 on HFS+. Since the return value was initialized
+     * to 0 above, we just jump to exit.  HFS Standard has its own behavior.
+     */
+    if (uOffset > filesize)
+    {
+        LFHFS_LOG( LEVEL_ERROR, "LFHFS_Read: wanted offset is greater then file size\n" );
+        goto exit;
+    }
+    
+    // If we asked to read above the file size, adjust the read size;
+    if ( uOffset + iLength > filesize )
+    {
+        iLength = filesize - uOffset;
+    }
+    
+    uint64_t uReadStartCluster;
+    retval = raw_readwrite_read( vp, uOffset, pvBuf, iLength, iActuallyRead, &uReadStartCluster );
+    
+    cp->c_touch_acctime = TRUE;
+    
+exit:
+    if (took_truncate_lock)
+    {
+        hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+    }
+    return retval;
+}
+
diff --git a/livefiles_hfs_plugin/lf_hfs_fileops_handler.h b/livefiles_hfs_plugin/lf_hfs_fileops_handler.h
index bf709a7..dedd70e 100644
--- a/livefiles_hfs_plugin/lf_hfs_fileops_handler.h
+++ b/livefiles_hfs_plugin/lf_hfs_fileops_handler.h
@@ -63,4 +63,7 @@ int LFHFS_GetXAttr    ( UVFSFileNode psNode, const char *pcAttr, void *pvOutBuf,
 int LFHFS_SetXAttr    ( UVFSFileNode psNode, const char *pcAttr, const void *pvInBuf, size_t iBufSize, UVFSXattrHow How );
 int LFHFS_ListXAttr   ( UVFSFileNode psNode, void *pvOutBuf, size_t iBufSize, size_t *iActualSize );
 
+int LFHFS_StreamLookup ( UVFSFileNode psFileNode, UVFSStreamNode *ppsOutNode );
+int LFHFS_StreamReclaim (UVFSStreamNode psStreamNode );
+int LFHFS_StreamRead (UVFSStreamNode psStreamNode, uint64_t uOffset, size_t iLength, void *pvBuf, size_t *iActuallyRead );
 #endif /* lf_hfs_fileops_handler_h */
diff --git a/livefiles_hfs_plugin/lf_hfs_fsops_handler.c b/livefiles_hfs_plugin/lf_hfs_fsops_handler.c
index 40acd59..b4bb873 100644
--- a/livefiles_hfs_plugin/lf_hfs_fsops_handler.c
+++ b/livefiles_hfs_plugin/lf_hfs_fsops_handler.c
@@ -22,6 +22,9 @@
 #include "lf_hfs_journal.h"
 #include "lf_hfs_vfsops.h"
 #include "lf_hfs_mount.h"
+#include "lf_hfs_readwrite_ops.h"
+
+#include "lf_hfs_vnops.h"
 
 static int
 FSOPS_GetRootVnode(struct vnode* psDevVnode, struct vnode** ppsRootVnode)
@@ -261,14 +264,15 @@ LFHFS_Unmount ( UVFSFileNode psRootNode, UVFSUnmountHint hint )
 {
     VERIFY_NODE_IS_VALID(psRootNode);
     LFHFS_LOG(LEVEL_DEBUG, "HFS_Unmount (psRootNode %p) (hint %u)\n", psRootNode, hint);
-
+    
     int iError = 0;
     struct vnode       *psRootVnode = (struct vnode*) psRootNode;
     FileSystemRecord_s *psFSRecord  = VPTOFSRECORD(psRootVnode);
     struct mount       *psMount     = psRootVnode->sFSParams.vnfs_mp;
     struct cnode       *psDevCnode  = VTOHFS(psRootVnode)->hfs_devvp->sFSParams.vnfs_fsnode;
     struct hfsmount    *psHfsMp     = psMount->psHfsmount;
-    
+    psFSRecord->uUnmountHint        = hint;
+
     #if HFS_CRASH_TEST
         CRASH_ABORT(CRASH_ABORT_ON_UNMOUNT, psHfsMp, NULL);
     #endif
@@ -290,11 +294,24 @@ LFHFS_Unmount ( UVFSFileNode psRootNode, UVFSUnmountHint hint )
 }
 
 int
-LFHFS_SetFSAttr ( UVFSFileNode psNode, const char *pcAttr, const UVFSFSAttributeValue *psAttrVal, size_t uLen )
+LFHFS_SetFSAttr ( UVFSFileNode psNode, const char *pcAttr, const UVFSFSAttributeValue *psAttrVal, size_t uLen, UVFSFSAttributeValue *psOutAttrVal, size_t uOutLen )
 {
 #pragma unused (psNode, pcAttr, psAttrVal, uLen)
     VERIFY_NODE_IS_VALID(psNode);
-    LFHFS_LOG(LEVEL_DEBUG, "LFHFS_SetFSAttr (ENOTSUP)\n");
+
+    if (pcAttr == NULL || psAttrVal == NULL || psOutAttrVal == NULL) return EINVAL;
+
+    if (strcmp(pcAttr, LI_FSATTR_PREALLOCATE) == 0)
+    {
+         if (uLen < sizeof (UVFSFSAttributeValue) || uOutLen < sizeof (UVFSFSAttributeValue))
+             return EINVAL;
+
+         LIFilePreallocateArgs_t* psPreAllocReq = (LIFilePreallocateArgs_t *) ((void *) psAttrVal->fsa_opaque);
+         LIFilePreallocateArgs_t* psPreAllocRes = (LIFilePreallocateArgs_t *) ((void *) psOutAttrVal->fsa_opaque);
+
+         memcpy (psPreAllocRes, psPreAllocReq, sizeof(LIFilePreallocateArgs_t));
+         return hfs_vnop_preallocate(psNode, psPreAllocReq, psPreAllocRes);
+    }
 
     return ENOTSUP;
 }
@@ -690,7 +707,11 @@ UVFSFSOps HFS_fsOps = {
     .fsops_listxattr    = LFHFS_ListXAttr,
 
     .fsops_scandir      = LFHFS_ScanDir,
-    .fsops_scanids      = LFHFS_ScanIDs
+    .fsops_scanids      = LFHFS_ScanIDs,
+    
+    .fsops_stream_lookup = LFHFS_StreamLookup,
+    .fsops_stream_reclaim = LFHFS_StreamReclaim,
+    .fsops_stream_read = LFHFS_StreamRead,
 };
 
 #if HFS_CRASH_TEST
diff --git a/livefiles_hfs_plugin/lf_hfs_raw_read_write.c b/livefiles_hfs_plugin/lf_hfs_raw_read_write.c
index f3ab6fd..f4c93f2 100644
--- a/livefiles_hfs_plugin/lf_hfs_raw_read_write.c
+++ b/livefiles_hfs_plugin/lf_hfs_raw_read_write.c
@@ -12,6 +12,7 @@
 #include "lf_hfs_file_mgr_internal.h"
 #include "lf_hfs_file_extent_mapping.h"
 #include "lf_hfs_vfsutils.h"
+#include <UserFS/UserVFS.h>
 
 #define MAX_READ_WRITE_LENGTH (0x7ffff000)
 
@@ -62,7 +63,8 @@ errno_t raw_readwrite_read_mount( vnode_t psMountVnode, uint64_t uBlockN, uint64
     if ( iReadBytes != (ssize_t)uBufLen )
     {
         iErr = ( (iReadBytes < 0) ? errno : EIO );
-        LFHFS_LOG( LEVEL_ERROR, "raw_readwrite_read_mount failed [%d]\n", iErr );
+        HFSLogLevel_e eLogLevel = (VNODE_TO_UNMOUNT_HINT(psMountVnode)==UVFSUnmountHintForce)?LEVEL_DEBUG:LEVEL_ERROR;
+        LFHFS_LOG( eLogLevel, "raw_readwrite_read_mount failed [%d]\n", iErr );
     }
 
     if (puActuallyRead)
@@ -85,7 +87,8 @@ errno_t raw_readwrite_write_mount( vnode_t psMountVnode, uint64_t uBlockN, uint6
     uActuallyWritten = pwrite(iFD, pvBuf, (size_t)uBufLen, uWantedOffset);
     if ( uActuallyWritten != (ssize_t)uBufLen ) {
         iErr = ( (uActuallyWritten < 0) ? errno : EIO );
-        LFHFS_LOG( LEVEL_ERROR, "raw_readwrite_write_mount failed [%d]\n", iErr );
+        HFSLogLevel_e eLogLevel = (VNODE_TO_UNMOUNT_HINT(psMountVnode)==UVFSUnmountHintForce)?LEVEL_DEBUG:LEVEL_ERROR;
+        LFHFS_LOG( eLogLevel, "raw_readwrite_write_mount failed [%d]\n", iErr );
     }
 
     if (piActuallyWritten)
diff --git a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
index ea0b44c..45d1eff 100644
--- a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
+++ b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.c
@@ -753,3 +753,144 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags)
 
     return error;
 }
+
+/*
+ * Preallocate file storage space.
+ */
+int
+hfs_vnop_preallocate(struct vnode * vp, LIFilePreallocateArgs_t* psPreAllocReq, LIFilePreallocateArgs_t* psPreAllocRes)
+{
+    struct cnode *cp = VTOC(vp);
+    struct filefork *fp = VTOF(vp);
+    struct hfsmount *hfsmp = VTOHFS(vp);
+    ExtendedVCB *vcb = VTOVCB(vp);
+    int retval = E_NONE , retval2 = E_NONE;
+
+    off_t length = psPreAllocReq->length;
+    psPreAllocRes->bytesallocated = 0;
+
+    if (vnode_isdir(vp) || vnode_islnk(vp)) {
+        LFHFS_LOG(LEVEL_ERROR, "hfs_vnop_preallocate: Cannot change size of a directory or symlink!");
+        return EPERM;
+    }
+    
+    if (length == 0)
+        return (0);
+    
+     if (psPreAllocReq->flags & LI_PREALLOCATE_ALLOCATEFROMVOL){
+         LFHFS_LOG(LEVEL_ERROR, "hfs_vnop_preallocate: Not supporting LI_PREALLOCATE_ALLOCATEFROMVOL mode\n");
+         return ENOTSUP;
+     }
+        
+    hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+
+    if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
+        goto err_exit;
+    }
+    
+    off_t filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+    off_t startingPEOF = filebytes;
+
+    /* If no changes are necesary, then we're done */
+    if (filebytes == length)
+        goto exit;
+
+    u_int32_t extendFlags = kEFNoClumpMask;
+    if (psPreAllocReq->flags & LI_PREALLOCATE_ALLOCATECONTIG)
+        extendFlags |= kEFContigMask;
+    if (psPreAllocReq->flags & LI_PREALLOCATE_ALLOCATEALL)
+        extendFlags |= kEFAllMask;
+
+    
+    /*
+     * Lengthen the size of the file. We must ensure that the
+     * last byte of the file is allocated. Since the smallest
+     * value of filebytes is 0, length will be at least 1.
+     */
+    if (length > filebytes)
+    {
+        off_t total_bytes_added = 0, orig_request_size, moreBytesRequested, actualBytesAdded;
+        orig_request_size = moreBytesRequested = length - filebytes;
+
+        while ((length > filebytes) && (retval == E_NONE))
+        {
+            off_t bytesRequested;
+            
+            if (hfs_start_transaction(hfsmp) != 0)
+            {
+                retval = EINVAL;
+                goto err_exit;
+            }
+
+            /* Protect extents b-tree and allocation bitmap */
+            int lockflags = SFL_BITMAP;
+            if (overflow_extents(fp)) 
+                lockflags |= SFL_EXTENTS;
+            lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+
+            if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
+                bytesRequested = HFS_BIGFILE_SIZE;
+            } else {
+                bytesRequested = moreBytesRequested;
+            }
+
+            retval = MacToVFSError(ExtendFileC(vcb,
+                        (FCB*)fp,
+                        bytesRequested,
+                        0,
+                        extendFlags,
+                        &actualBytesAdded));
+
+            if (retval == E_NONE)
+            {
+                psPreAllocRes->bytesallocated += actualBytesAdded;
+                total_bytes_added += actualBytesAdded;
+                moreBytesRequested -= actualBytesAdded;
+            }
+            
+            filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
+            hfs_systemfile_unlock(hfsmp, lockflags);
+
+            if (hfsmp->jnl) {
+                (void) hfs_update(vp, 0);
+                (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+            }
+
+            hfs_end_transaction(hfsmp);
+        }
+
+        /*
+         * if we get an error and no changes were made then exit
+         * otherwise we must do the hfs_update to reflect the changes
+         */
+        if (retval && (startingPEOF == filebytes))
+            goto err_exit;
+        
+        /*
+         * Adjust actualBytesAdded to be allocation block aligned, not
+         * clump size aligned.
+         * NOTE: So what we are reporting does not affect reality
+         * until the file is closed, when we truncate the file to allocation
+         * block size.
+         */
+        if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
+            psPreAllocRes->bytesallocated = roundup(orig_request_size, (off_t)vcb->blockSize);
+    } else {
+        //No need to touch anything else, just unlock and go out
+        goto err_exit;
+    }
+
+exit:
+    cp->c_flag |= C_MODIFIED;
+    cp->c_touch_chgtime = TRUE;
+    cp->c_touch_modtime = TRUE;
+    retval2 = hfs_update(vp, 0);
+
+    if (retval == 0)
+        retval = retval2;
+    
+err_exit:
+    hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
+    hfs_unlock(cp);
+    return (retval);
+}
diff --git a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.h b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.h
index 491d31f..7a021f6 100644
--- a/livefiles_hfs_plugin/lf_hfs_readwrite_ops.h
+++ b/livefiles_hfs_plugin/lf_hfs_readwrite_ops.h
@@ -28,5 +28,6 @@ int hfs_vnop_blockmap(struct vnop_blockmap_args *ap);
 int hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp);
 int hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, struct filefork *rsrcfork, u_int32_t fileid);
 int hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags);
+int hfs_vnop_preallocate(struct vnode * vp, LIFilePreallocateArgs_t* psPreAllocReq, LIFilePreallocateArgs_t* psPreAllocRes);
 
 #endif /* lf_hfs_readwrite_ops_h */
diff --git a/livefiles_hfs_plugin/lf_hfs_vfsops.c b/livefiles_hfs_plugin/lf_hfs_vfsops.c
index 4dab9c8..4689cad 100644
--- a/livefiles_hfs_plugin/lf_hfs_vfsops.c
+++ b/livefiles_hfs_plugin/lf_hfs_vfsops.c
@@ -839,6 +839,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args)
 
     retval = hfs_CollectBtreeStats(hfsmp, vhp, embeddedOffset, args);
     free(vhp);
+    vhp = NULL;
     if ( retval )
     {
         LFHFS_LOG(LEVEL_DEBUG, "hfs_mountfs: hfs_CollectBtreeStats encountered failure %d \n", retval);
diff --git a/livefiles_hfs_plugin/lf_hfs_vnode.c b/livefiles_hfs_plugin/lf_hfs_vnode.c
index 292a7a2..0bc73b6 100644
--- a/livefiles_hfs_plugin/lf_hfs_vnode.c
+++ b/livefiles_hfs_plugin/lf_hfs_vnode.c
@@ -11,6 +11,8 @@
 #include "lf_hfs_vfsutils.h"
 #include "lf_hfs_generic_buf.h"
 #include "lf_hfs_fileops_handler.h"
+#include "lf_hfs_xattr.h"
+#include <System/sys/decmpfs.h>
 
 int VTtoUVFS_tab[16] =
 {
@@ -166,8 +168,72 @@ void vnode_GetAttrInternal (vnode_t vp, UVFSFileAttributes *psOutAttr )
     }
     else
     {
-        psOutAttr->fa_allocsize = VCTOF(vp, cp)->ff_blocks * VTOHFS(vp)->blockSize;
-        psOutAttr->fa_size      = VCTOF(vp, cp)->ff_size;
+        if (psOutAttr->fa_bsd_flags & UF_COMPRESSED)
+        {
+            if (VNODE_IS_RSRC(vp))
+            {
+                psOutAttr->fa_allocsize = VTOF(vp)->ff_blocks * VTOHFS(vp)->blockSize;
+                psOutAttr->fa_size      = VTOF(vp)->ff_size;
+            }
+            else
+            {
+                hfs_unlock(VTOC(vp));
+                void* data = NULL;
+                size_t attr_size;
+                int iErr = hfs_vnop_getxattr(vp, "com.apple.decmpfs", NULL, 0, &attr_size);
+                if (iErr != 0) {
+                    goto fail;
+                }
+                
+                if (attr_size < sizeof(decmpfs_disk_header) || attr_size > MAX_DECMPFS_XATTR_SIZE) {
+                    iErr = EINVAL;
+                    goto fail;
+                }
+                /* allocation includes space for the extra attr_size field of a compressed_header */
+                data = (char *) malloc(attr_size);
+                if (!data) {
+                    iErr = ENOMEM;
+                    goto fail;
+                }
+                
+                /* read the xattr into our buffer, skipping over the attr_size field at the beginning */
+                size_t read_size;
+                iErr =  hfs_vnop_getxattr(vp, "com.apple.decmpfs", data, attr_size, &read_size);
+                if (iErr != 0) {
+                    goto fail;
+                }
+                if (read_size != attr_size) {
+                    iErr = EINVAL;
+                    goto fail;
+                }
+                
+                decmpfs_header Hdr;
+                Hdr.attr_size = (uint32_t) attr_size;
+                Hdr.compression_magic = *((uint32_t*)data);
+                Hdr.compression_type  = *((uint32_t*)(data + sizeof(uint32_t)));
+                Hdr.uncompressed_size = *((uint32_t*)(data + sizeof(uint64_t)));
+
+fail:
+                if (iErr)
+                {
+                    psOutAttr->fa_allocsize = VCTOF(vp, cp)->ff_blocks * VTOHFS(vp)->blockSize;
+                    psOutAttr->fa_size      = VCTOF(vp, cp)->ff_size;
+                }
+                else
+                {
+                    psOutAttr->fa_allocsize = ROUND_UP(Hdr.uncompressed_size,VTOHFS(vp)->blockSize);
+                    psOutAttr->fa_size = Hdr.uncompressed_size;
+                }
+                
+                if (data) free(data);
+                hfs_lock(VTOC(vp), 0, 0);
+            }
+        }
+        else
+        {
+            psOutAttr->fa_allocsize = VCTOF(vp, cp)->ff_blocks * VTOHFS(vp)->blockSize;
+            psOutAttr->fa_size      = VCTOF(vp, cp)->ff_size;
+        }
         psOutAttr->fa_nlink     = (cp->c_flag & C_HARDLINK)? cp->c_linkcount : 1;
     }
 }
diff --git a/livefiles_hfs_plugin/lf_hfs_vnode.h b/livefiles_hfs_plugin/lf_hfs_vnode.h
index ece2e07..d3afabe 100644
--- a/livefiles_hfs_plugin/lf_hfs_vnode.h
+++ b/livefiles_hfs_plugin/lf_hfs_vnode.h
@@ -12,18 +12,7 @@
 #include <sys/_types/_guid_t.h>
 
 #include "lf_hfs_common.h"
-
-/*
- * Vnode types.  VNON means no type.
- */
-enum vtype    {
-    /* 0 */
-    VNON,
-    /* 1 - 5 */
-    VREG, VDIR, VBLK, VCHR, VLNK,
-    /* 6 - 10 */
-    VSOCK, VFIFO, VBAD, VSTR, VCPLX
-};
+#include <System/sys/vnode.h>
 
 extern int VTtoUVFS_tab[];
 
diff --git a/livefiles_hfs_plugin/lf_hfs_vnops.c b/livefiles_hfs_plugin/lf_hfs_vnops.c
index d040e97..a30b815 100644
--- a/livefiles_hfs_plugin/lf_hfs_vnops.c
+++ b/livefiles_hfs_plugin/lf_hfs_vnops.c
@@ -1202,7 +1202,8 @@ relock:
         hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
         if (rvp)
         {
-            hfs_free(rvp);
+            hfs_chash_lower_OpenLookupCounter(cp);
+            rvp = NULL;
         }
         return (error);
     }
@@ -1228,7 +1229,7 @@ relock:
         {
             /* We need to acquire the rsrc vnode */
             rvp = cp->c_rsrc_vp;
-
+            hfs_chash_raise_OpenLookupCounter(cp);
             /* Unlock everything to acquire iocount on the rsrc vnode */
             hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
             hfs_unlockpair (dcp, cp);
@@ -1263,8 +1264,10 @@ rm_done:
     hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
 
     if (rvp)
-        hfs_free(rvp);
-
+    {
+        hfs_chash_lower_OpenLookupCounter(cp);
+        rvp = NULL;
+    }
     return (error);
 }
 
@@ -2342,7 +2345,6 @@ retry:
         if (tvp_rsrc && tcp)
         {
             hfs_chash_lower_OpenLookupCounter(tcp);
-            hfs_free(tvp_rsrc);
             tvp_rsrc = NULL;
         }
 
@@ -2399,7 +2401,6 @@ retry:
                 took_trunc_lock = 0;
             }
             
-            
             hfs_unlockfour(fdcp, fcp, tdcp, tcp);
             
             goto retry;
@@ -2760,7 +2761,7 @@ retry:
             if ((error == 0) && (tcp->c_flag & C_DELETED) && (tvp_rsrc))
             {
                 hfs_chash_lower_OpenLookupCounter(tcp);
-                hfs_free(tvp_rsrc);
+                tvp_rsrc = NULL;
             }
         }
 
@@ -2947,7 +2948,6 @@ out:
     if (tvp_rsrc)
     {
         hfs_chash_lower_OpenLookupCounter(tcp);
-        hfs_free(tvp_rsrc);
         tvp_rsrc = NULL;
     }
     
@@ -3171,3 +3171,175 @@ int hfs_removefile_callback(GenericLFBuf *psBuff, void *pvArgs) {
     return (0);
 }
 
+
+/*
+ * hfs_vgetrsrc acquires a resource fork vnode corresponding to the
+ * cnode that is found in 'vp'.  The cnode should be locked upon entry
+ * and will be returned locked, but it may be dropped temporarily.
+ *
+ * If the resource fork vnode does not exist, HFS will attempt to acquire an
+ * empty (uninitialized) vnode from VFS so as to avoid deadlocks with
+ * jetsam. If we let the normal getnewvnode code produce the vnode for us
+ * we would be doing so while holding the cnode lock of our cnode.
+ *
+ * On success, *rvpp wlll hold the resource fork vnode with an
+ * iocount.  *Don't* forget the vnode_put.
+ */
+int
+hfs_vgetrsrc( struct vnode *vp, struct vnode **rvpp)
+{
+    struct hfsmount *hfsmp = VTOHFS(vp);
+    struct vnode *rvp = NULL;
+    struct cnode *cp = VTOC(vp);
+    int error = 0;
+    
+restart:
+    /* Attempt to use existing vnode */
+    if ((rvp = cp->c_rsrc_vp)) {
+        hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+        hfs_chash_raise_OpenLookupCounter(cp);
+
+    } else {
+        struct cat_fork rsrcfork;
+        struct cat_desc *descptr = NULL;
+        struct cat_desc to_desc;
+        int newvnode_flags = 0;
+        
+        hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
+        
+        /*
+         * We could have raced with another thread here while we dropped our cnode
+         * lock.  See if the cnode now has a resource fork vnode and restart if appropriate.
+         *
+         * Note: We just released the cnode lock, so there is a possibility that the
+         * cnode that we just acquired has been deleted or even removed from disk
+         * completely, though this is unlikely. If the file is open-unlinked, the
+         * check below will resolve it for us.  If it has been completely
+         * removed (even from the catalog!), then when we examine the catalog
+         * directly, below, while holding the catalog lock, we will not find the
+         * item and we can fail out properly.
+         */
+        if (cp->c_rsrc_vp) {
+            /* Drop the empty vnode before restarting */
+            hfs_unlock(cp);
+            rvp = NULL;
+            goto restart;
+        }
+        
+        /*
+         * hfs_vgetsrc may be invoked for a cnode that has already been marked
+         * C_DELETED.  This is because we need to continue to provide rsrc
+         * fork access to open-unlinked files.  In this case, build a fake descriptor
+         * like in hfs_removefile.  If we don't do this, buildkey will fail in
+         * cat_lookup because this cnode has no name in its descriptor.
+         */
+        if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) {
+            char delname[32];
+            bzero (&to_desc, sizeof(to_desc));
+            bzero (delname, 32);
+            MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid);
+            to_desc.cd_nameptr = (const u_int8_t*) delname;
+            to_desc.cd_namelen = strlen(delname);
+            to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
+            to_desc.cd_flags = 0;
+            to_desc.cd_cnid = cp->c_cnid;
+            
+            descptr = &to_desc;
+        }
+        else {
+            descptr = &cp->c_desc;
+        }
+        
+        
+        int lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+        
+        /*
+         * We call cat_idlookup (instead of cat_lookup) below because we can't
+         * trust the descriptor in the provided cnode for lookups at this point.
+         * Between the time of the original lookup of this vnode and now, the
+         * descriptor could have gotten swapped or replaced.  If this occurred,
+         * the parent/name combo originally desired may not necessarily be provided
+         * if we use the descriptor.  Even worse, if the vnode represents
+         * a hardlink, we could have removed one of the links from the namespace
+         * but left the descriptor alone, since hfs_unlink does not invalidate
+         * the descriptor in the cnode if other links still point to the inode.
+         *
+         * Consider the following (slightly contrived) scenario:
+         * /tmp/a <--> /tmp/b (hardlinks).
+         * 1. Thread A: open rsrc fork on /tmp/b.
+         * 1a. Thread A: does lookup, goes out to lunch right before calling getnamedstream.
+         * 2. Thread B does 'mv /foo/b /tmp/b'
+         * 2. Thread B succeeds.
+         * 3. Thread A comes back and wants rsrc fork info for /tmp/b.
+         *
+         * Even though the hardlink backing /tmp/b is now eliminated, the descriptor
+         * is not removed/updated during the unlink process.  So, if you were to
+         * do a lookup on /tmp/b, you'd acquire an entirely different record's resource
+         * fork.
+         *
+         * As a result, we use the fileid, which should be invariant for the lifetime
+         * of the cnode (possibly barring calls to exchangedata).
+         *
+         * Addendum: We can't do the above for HFS standard since we aren't guaranteed to
+         * have thread records for files.  They were only required for directories.  So
+         * we need to do the lookup with the catalog name. This is OK since hardlinks were
+         * never allowed on HFS standard.
+         */
+        
+        /* Get resource fork data */
+        error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork);
+        
+        hfs_systemfile_unlock(hfsmp, lockflags);
+        if (error) {
+            LFHFS_LOG(LEVEL_ERROR, "hfs_vgetrsrc: cat_idlookup failed with error [%d]\n", error);
+            hfs_unlock(cp);
+            hfs_chash_lower_OpenLookupCounter(cp);
+            return (error);
+        }
+        /*
+         * Supply hfs_getnewvnode with a component name.
+         */
+        struct componentname cn;
+        cn.cn_pnbuf = NULL;
+        if (descptr->cd_nameptr) {
+            void *buf = hfs_malloc(MAXPATHLEN);
+            
+            cn = (struct componentname){
+                .cn_nameiop = LOOKUP,
+                .cn_flags    = ISLASTCN,
+                .cn_pnlen    = MAXPATHLEN,
+                .cn_pnbuf    = buf,
+                .cn_nameptr = buf,
+                .cn_namelen = snprintf(buf, MAXPATHLEN,
+                                       "%s%s", descptr->cd_nameptr,
+                                       _PATH_RSRCFORKSPEC)
+            };
+            
+            // Should never happen because cn.cn_nameptr won't ever be long...
+            if (cn.cn_namelen >= MAXPATHLEN) {
+                hfs_free(buf);
+                LFHFS_LOG(LEVEL_ERROR, "hfs_vgetrsrc: cnode name too long [ENAMETOOLONG]\n");
+                hfs_unlock(cp);
+                hfs_chash_lower_OpenLookupCounter(cp);
+                return ENAMETOOLONG;
+            }
+        }
+        
+        /*
+         * We are about to call hfs_getnewvnode and pass in the vnode that we acquired
+         * earlier when we were not holding any locks. The semantics of GNV_USE_VP require that
+         * either hfs_getnewvnode consume the vnode and vend it back to us, properly initialized,
+         * or it will consume/dispose of it properly if it errors out.
+         */
+        error = hfs_getnewvnode(hfsmp, NULL, cn.cn_pnbuf ? &cn : NULL,
+                                descptr, (GNV_WANTRSRC | GNV_SKIPLOCK),
+                                &cp->c_attr, &rsrcfork, &rvp, &newvnode_flags);
+                
+        hfs_free(cn.cn_pnbuf);
+        if (error)
+            return (error);
+    }  /* End 'else' for rsrc fork not existing */
+    
+    *rvpp = rvp;
+    return (0);
+}
diff --git a/livefiles_hfs_plugin/lf_hfs_vnops.h b/livefiles_hfs_plugin/lf_hfs_vnops.h
index 6c1d1f6..5db2b8b 100644
--- a/livefiles_hfs_plugin/lf_hfs_vnops.h
+++ b/livefiles_hfs_plugin/lf_hfs_vnops.h
@@ -48,4 +48,6 @@ int hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cn
 int hfs_vnop_renamex(struct vnode *fdvp,struct vnode *fvp, struct componentname *fcnp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp);
 int hfs_vnop_link(vnode_t vp, vnode_t tdvp, struct componentname *cnp);
 int hfs_removefile_callback(GenericLFBuf *psBuff, void *pvArgs);
+
+int  hfs_vgetrsrc( struct vnode *vp, struct vnode **rvpp);
 #endif /* lf_hfs_vnops_h */
diff --git a/make_opensource.sh b/make_opensource.sh
deleted file mode 100755
index 5b40db1..0000000
--- a/make_opensource.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/bin/sh
-
-#
-# This script processes the directory hierarchy
-# passed to it and eliminates all source code,
-# makefile fragments, and documentation that is
-# not suitable for open source posting.
-#
-
-OPENSOURCE=1
-
-DST=/tmp/hfs-open-source
-
-rm -rf $DST
-mkdir $DST
-xcodebuild installsrc SRCROOT=$DST
-
-SRCROOT="$DST"
-
-if [ ! -d "${SRCROOT}" ]; then
-    echo "Could not access ${SRCROOT}" 1>&2
-    exit 1
-fi
-
-
-UNIFDEF_FLAGS=""
-if [ "$OPENSOURCE" -eq 1 ]; then
-    UNIFDEF_FLAGS="$UNIFDEF_FLAGS -D_OPEN_SOURCE_ -D__OPEN_SOURCE__ -U__arm__ -Uarm -UARM -U__ARM__ -U__arm64__ -Uarm64 -UARM64 -U__ARM64__ -UTARGET_OS_EMBEDDED -UHFS_CONFIG_KEY_ROLL"
-fi
-
-# From this point forward, all paths are ./-relative
-cd "${SRCROOT}"
-
-find -d . -name .open_source_exclude | while read f; do
-    dir=`dirname $f`
-    if [ -s $f ]; then
-	cat $f | while read g; do
-	    if [ -n "$g" ]; then
-		echo "Removing $dir/$g (Listed in $f)"
-		rm -f "$dir/$g" || exit 1
-	    else
-		echo "Bad entry '$g' in $f"
-		exit 1
-	    fi
-	done
-	if [ $? -ne 0 ]; then
-	    exit 1
-	fi
-    else
-	echo "Removing $dir (Contains empty $f)"
-	rm -rf "$dir"
-    fi
-    rm -f "$f"
-done
-
-if [ $? -ne 0 ]; then
-    # Propagate error from sub-shell invocation above
-    exit 1
-fi
-
-function stripfile() {
-    local extraflags="$1"
-    local path="$2"
-
-    unifdef $extraflags $UNIFDEF_FLAGS $path > $path.new
-    if [ $? -eq 0  ]; then
-	    # no change
-	rm $path.new
-    else
-	if [ $? -eq 2 ]; then
-	    echo "Problems parsing $path, removing..."
-	    rm $path.new $path
-	else
-	    if [ -s $path.new ]; then
-		echo "Modified $path"
-		mv -f $path.new $path
-	    else
-		echo "Removing empty $path"
-		rm -f $path.new $path
-	    fi
-	fi
-    fi
-}
-
-# C/C++ Source files
-find . \( -type f -o -type l \) -a \( -name "*.[chy]" -o -name "*.cpp" \) | while read f; do
-	    stripfile "" "$f"
-done
-
-# Free-form plain text files
-find . \( -type f -o -type l \) -a \( -name "*.[sS]" -o -name "*.sh" -o -name "README" -o -name "*.py" \) | while read f; do
-	    stripfile "-t" "$f"
-	    case "$f" in
-		*.sh)
-		    chmod +x "$f"
-		    ;;
-	    esac
-done
-
-# Remove project references
-grep -i -v -E '(hfs_key_roll)' ./hfs.xcodeproj/project.pbxproj > ./hfs.xcodeproj/project.pbxproj.new
-mv -f ./hfs.xcodeproj/project.pbxproj.new ./hfs.xcodeproj/project.pbxproj
-
-# Check for remaining bad file names
-BADFILES=`find . \( -name "*.arm*" -o -name "arm*" \) | xargs echo`;
-if [ -n "$BADFILES" ]; then
-    echo "Bad file names $BADFILES"
-    exit 1
-fi
-
-# Check for remaining bad file contents
-if grep -iEr '([^UD_]_?_OPEN_SOURCE_|XNU_HIDE_SEED|XNU_HIDE_HARDWARE|CONFIG_EMBEDDED)' .; then
-    echo "cleanup FAILURE"
-    exit 1
-else
-    echo "done"
-    exit 0
-fi
-- 
2.45.2