git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2009 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* @(#)hfs_vfsutils.c 4.0
	29	*
	30	* (c) 1997-2002 Apple Computer, Inc. All Rights Reserved
	31	*
	32	* hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
	33	*
	34	*/
	35	#include <sys/param.h>
	36	#include <sys/systm.h>
	37	#include <sys/kernel.h>
	38	#include <sys/malloc.h>
	39	#include <sys/stat.h>
	40	#include <sys/mount.h>
	41	#include <sys/mount_internal.h>
	42	#include <sys/buf.h>
	43	#include <sys/buf_internal.h>
	44	#include <sys/ubc.h>
	45	#include <sys/unistd.h>
	46	#include <sys/utfconv.h>
	47	#include <sys/kauth.h>
	48	#include <sys/fcntl.h>
	49	#include <sys/vnode_internal.h>
	50	#include <kern/clock.h>
	51
	52	#include <libkern/OSAtomic.h>
	53
	54	#include "hfs.h"
	55	#include "hfs_catalog.h"
	56	#include "hfs_dbg.h"
	57	#include "hfs_mount.h"
	58	#include "hfs_endian.h"
	59	#include "hfs_cnode.h"
	60	#include "hfs_fsctl.h"
	61
	62	#include "hfscommon/headers/FileMgrInternal.h"
	63	#include "hfscommon/headers/BTreesInternal.h"
	64	#include "hfscommon/headers/HFSUnicodeWrappers.h"
	65
	66	static void ReleaseMetaFileVNode(struct vnode *vp);
	67	static int hfs_late_journal_init(struct hfsmount hfsmp, HFSPlusVolumeHeader vhp, void *_args);
	68
	69	static void hfs_metadatazone_init(struct hfsmount *);
	70	static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
	71
	72
	73	//*******************************************************************************
	74	// Note: Finder information in the HFS/HFS+ metadata are considered opaque and
	75	// hence are not in the right byte order on little endian machines. It is
	76	// the responsibility of the finder and other clients to swap the data.
	77	//*******************************************************************************
	78
	79	//*******************************************************************************
	80	// Routine: hfs_MountHFSVolume
	81	//
	82	//
	83	//*******************************************************************************
	84	unsigned char hfs_catname[] = "Catalog B-tree";
	85	unsigned char hfs_extname[] = "Extents B-tree";
	86	unsigned char hfs_vbmname[] = "Volume Bitmap";
	87	unsigned char hfs_attrname[] = "Attribute B-tree";
	88	unsigned char hfs_startupname[] = "Startup File";
	89
	90
	91	__private_extern__
	92	OSErr hfs_MountHFSVolume(struct hfsmount hfsmp, HFSMasterDirectoryBlock mdb,
	93	__unused struct proc *p)
	94	{
	95	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
	96	int error;
	97	ByteCount utf8chars;
	98	struct cat_desc cndesc;
	99	struct cat_attr cnattr;
	100	struct cat_fork fork;
	101
	102	/* Block size must be a multiple of 512 */
	103	if (SWAP_BE32(mdb->drAlBlkSiz) == 0 \|\|
	104	(SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
	105	return (EINVAL);
	106
	107	/* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
	108	if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
	109	((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
	110	return (EINVAL);
	111	}
	112	hfsmp->hfs_flags \|= HFS_STANDARD;
	113	/*
	114	* The MDB seems OK: transfer info from it into VCB
	115	* Note - the VCB starts out clear (all zeros)
	116	*
	117	*/
	118	vcb->vcbSigWord = SWAP_BE16 (mdb->drSigWord);
	119	vcb->vcbCrDate = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
	120	vcb->localCreateDate = SWAP_BE32 (mdb->drCrDate);
	121	vcb->vcbLsMod = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
	122	vcb->vcbAtrb = SWAP_BE16 (mdb->drAtrb);
	123	vcb->vcbNmFls = SWAP_BE16 (mdb->drNmFls);
	124	vcb->vcbVBMSt = SWAP_BE16 (mdb->drVBMSt);
	125	vcb->nextAllocation = SWAP_BE16 (mdb->drAllocPtr);
	126	vcb->totalBlocks = SWAP_BE16 (mdb->drNmAlBlks);
	127	vcb->allocLimit = vcb->totalBlocks;
	128	vcb->blockSize = SWAP_BE32 (mdb->drAlBlkSiz);
	129	vcb->vcbClpSiz = SWAP_BE32 (mdb->drClpSiz);
	130	vcb->vcbAlBlSt = SWAP_BE16 (mdb->drAlBlSt);
	131	vcb->vcbNxtCNID = SWAP_BE32 (mdb->drNxtCNID);
	132	vcb->freeBlocks = SWAP_BE16 (mdb->drFreeBks);
	133	vcb->vcbVolBkUp = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
	134	vcb->vcbWrCnt = SWAP_BE32 (mdb->drWrCnt);
	135	vcb->vcbNmRtDirs = SWAP_BE16 (mdb->drNmRtDirs);
	136	vcb->vcbFilCnt = SWAP_BE32 (mdb->drFilCnt);
	137	vcb->vcbDirCnt = SWAP_BE32 (mdb->drDirCnt);
	138	bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
	139	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
	140	vcb->vcbWrCnt++; /* Compensate for write of MDB on last flush */
	141
	142	/* convert hfs encoded name into UTF-8 string */
	143	error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
	144	/*
	145	* When an HFS name cannot be encoded with the current
	146	* volume encoding we use MacRoman as a fallback.
	147	*/
	148	if (error \|\| (utf8chars == 0))
	149	(void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
	150
	151	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
	152	vcb->vcbVBMIOSize = kHFSBlockSize;
	153
	154	hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
	155	hfsmp->hfs_logical_block_count);
	156
	157	bzero(&cndesc, sizeof(cndesc));
	158	cndesc.cd_parentcnid = kHFSRootParentID;
	159	cndesc.cd_flags \|= CD_ISMETA;
	160	bzero(&cnattr, sizeof(cnattr));
	161	cnattr.ca_linkcount = 1;
	162	cnattr.ca_mode = S_IFREG;
	163	bzero(&fork, sizeof(fork));
	164
	165	/*
	166	* Set up Extents B-tree vnode
	167	*/
	168	cndesc.cd_nameptr = hfs_extname;
	169	cndesc.cd_namelen = strlen((char *)hfs_extname);
	170	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
	171	fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
	172	fork.cf_blocks = fork.cf_size / vcb->blockSize;
	173	fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
	174	fork.cf_vblocks = 0;
	175	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
	176	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
	177	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
	178	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
	179	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
	180	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
	181	cnattr.ca_blocks = fork.cf_blocks;
	182
	183	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
	184	&hfsmp->hfs_extents_vp);
	185	if (error) goto MtVolErr;
	186	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
	187	(KeyCompareProcPtr)CompareExtentKeys));
	188	if (error) {
	189	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
	190	goto MtVolErr;
	191	}
	192	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
	193
	194	/*
	195	* Set up Catalog B-tree vnode...
	196	*/
	197	cndesc.cd_nameptr = hfs_catname;
	198	cndesc.cd_namelen = strlen((char *)hfs_catname);
	199	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
	200	fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
	201	fork.cf_blocks = fork.cf_size / vcb->blockSize;
	202	fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
	203	fork.cf_vblocks = 0;
	204	fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
	205	fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
	206	fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
	207	fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
	208	fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
	209	fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
	210	cnattr.ca_blocks = fork.cf_blocks;
	211
	212	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
	213	&hfsmp->hfs_catalog_vp);
	214	if (error) {
	215	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
	216	goto MtVolErr;
	217	}
	218	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
	219	(KeyCompareProcPtr)CompareCatalogKeys));
	220	if (error) {
	221	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
	222	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
	223	goto MtVolErr;
	224	}
	225	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
	226
	227	/*
	228	* Set up dummy Allocation file vnode (used only for locking bitmap)
	229	*/
	230	cndesc.cd_nameptr = hfs_vbmname;
	231	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
	232	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
	233	bzero(&fork, sizeof(fork));
	234	cnattr.ca_blocks = 0;
	235
	236	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
	237	&hfsmp->hfs_allocation_vp);
	238	if (error) {
	239	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
	240	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
	241	goto MtVolErr;
	242	}
	243	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
	244
	245	/* mark the volume dirty (clear clean unmount bit) */
	246	vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
	247
	248	if (error == noErr)
	249	{
	250	error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
	251	}
	252
	253	if ( error == noErr )
	254	{
	255	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected
	256	{
	257	MarkVCBDirty( vcb ); // mark VCB dirty so it will be written
	258	}
	259	}
	260
	261	/*
	262	* all done with system files so we can unlock now...
	263	*/
	264	hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
	265	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
	266	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
	267
	268	goto CmdDone;
	269
	270	//-- Release any resources allocated so far before exiting with an error:
	271	MtVolErr:
	272	ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
	273	ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
	274
	275	CmdDone:
	276	return (error);
	277	}
	278
	279	//*******************************************************************************
	280	// Routine: hfs_MountHFSPlusVolume
	281	//
	282	//
	283	//*******************************************************************************
	284
	285	__private_extern__
	286	OSErr hfs_MountHFSPlusVolume(struct hfsmount hfsmp, HFSPlusVolumeHeader vhp,
	287	off_t embeddedOffset, u_int64_t disksize, __unused struct proc p, void args, kauth_cred_t cred)
	288	{
	289	register ExtendedVCB *vcb;
	290	struct cat_desc cndesc;
	291	struct cat_attr cnattr;
	292	struct cat_fork cfork;
	293	u_int32_t blockSize;
	294	daddr64_t spare_sectors;
	295	struct BTreeInfoRec btinfo;
	296	u_int16_t signature;
	297	u_int16_t hfs_version;
	298	int i;
	299	OSErr retval;
	300
	301	signature = SWAP_BE16(vhp->signature);
	302	hfs_version = SWAP_BE16(vhp->version);
	303
	304	if (signature == kHFSPlusSigWord) {
	305	if (hfs_version != kHFSPlusVersion) {
	306	printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version);
	307	return (EINVAL);
	308	}
	309	} else if (signature == kHFSXSigWord) {
	310	if (hfs_version != kHFSXVersion) {
	311	printf("hfs_mount: invalid HFSX version: %d\n", hfs_version);
	312	return (EINVAL);
	313	}
	314	/* The in-memory signature is always 'H+'. */
	315	signature = kHFSPlusSigWord;
	316	hfsmp->hfs_flags \|= HFS_X;
	317	} else {
	318	/* Removed printf for invalid HFS+ signature because it gives
	319	* false error for UFS root volume
	320	*/
	321	return (EINVAL);
	322	}
	323
	324	/* Block size must be at least 512 and a power of 2 */
	325	blockSize = SWAP_BE32(vhp->blockSize);
	326	if (blockSize < 512 \|\| !powerof2(blockSize))
	327	return (EINVAL);
	328
	329	/* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
	330	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
	331	(SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0)
	332	return (EINVAL);
	333
	334	/* Make sure we can live with the physical block size. */
	335	if ((disksize & (hfsmp->hfs_logical_block_size - 1)) \|\|
	336	(embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) \|\|
	337	(blockSize < hfsmp->hfs_logical_block_size)) {
	338	return (ENXIO);
	339	}
	340
	341	/* If allocation block size is less than the physical
	342	* block size, we assume that the physical block size
	343	* is same as logical block size. The physical block
	344	* size value is used to round down the offsets for
	345	* reading and writing the primary and alternate volume
	346	* headers at physical block boundary and will cause
	347	* problems if it is less than the block size.
	348	*/
	349	if (blockSize < hfsmp->hfs_physical_block_size) {
	350	hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
	351	hfsmp->hfs_log_per_phys = 1;
	352	}
	353
	354	/*
	355	* The VolumeHeader seems OK: transfer info from it into VCB
	356	* Note - the VCB starts out clear (all zeros)
	357	*/
	358	vcb = HFSTOVCB(hfsmp);
	359
	360	vcb->vcbSigWord = signature;
	361	vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
	362	vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
	363	vcb->vcbAtrb = SWAP_BE32(vhp->attributes);
	364	vcb->vcbClpSiz = SWAP_BE32(vhp->rsrcClumpSize);
	365	vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
	366	vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
	367	vcb->vcbWrCnt = SWAP_BE32(vhp->writeCount);
	368	vcb->vcbFilCnt = SWAP_BE32(vhp->fileCount);
	369	vcb->vcbDirCnt = SWAP_BE32(vhp->folderCount);
	370
	371	/* copy 32 bytes of Finder info */
	372	bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
	373
	374	vcb->vcbAlBlSt = 0; /* hfs+ allocation blocks start at first block of volume */
	375	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
	376	vcb->vcbWrCnt++; /* compensate for write of Volume Header on last flush */
	377
	378	/* Now fill in the Extended VCB info */
	379	vcb->nextAllocation = SWAP_BE32(vhp->nextAllocation);
	380	vcb->totalBlocks = SWAP_BE32(vhp->totalBlocks);
	381	vcb->allocLimit = vcb->totalBlocks;
	382	vcb->freeBlocks = SWAP_BE32(vhp->freeBlocks);
	383	vcb->blockSize = blockSize;
	384	vcb->encodingsBitmap = SWAP_BE64(vhp->encodingsBitmap);
	385	vcb->localCreateDate = SWAP_BE32(vhp->createDate);
	386
	387	vcb->hfsPlusIOPosOffset = embeddedOffset;
	388
	389	/* Default to no free block reserve */
	390	vcb->reserveBlocks = 0;
	391
	392	/*
	393	* Update the logical block size in the mount struct
	394	* (currently set up from the wrapper MDB) using the
	395	* new blocksize value:
	396	*/
	397	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
	398	vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
	399
	400	/*
	401	* Validate and initialize the location of the alternate volume header.
	402	*/
	403	spare_sectors = hfsmp->hfs_logical_block_count -
	404	(((daddr64_t)vcb->totalBlocks * blockSize) /
	405	hfsmp->hfs_logical_block_size);
	406
	407	if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
	408	hfsmp->hfs_alt_id_sector = 0; /* partition has grown! */
	409	} else {
	410	hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
	411	HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
	412	hfsmp->hfs_logical_block_count);
	413	}
	414
	415	bzero(&cndesc, sizeof(cndesc));
	416	cndesc.cd_parentcnid = kHFSRootParentID;
	417	cndesc.cd_flags \|= CD_ISMETA;
	418	bzero(&cnattr, sizeof(cnattr));
	419	cnattr.ca_linkcount = 1;
	420	cnattr.ca_mode = S_IFREG;
	421
	422	/*
	423	* Set up Extents B-tree vnode
	424	*/
	425	cndesc.cd_nameptr = hfs_extname;
	426	cndesc.cd_namelen = strlen((char *)hfs_extname);
	427	cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
	428
	429	cfork.cf_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
	430	cfork.cf_new_size= 0;
	431	cfork.cf_clump = SWAP_BE32 (vhp->extentsFile.clumpSize);
	432	cfork.cf_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
	433	cfork.cf_vblocks = 0;
	434	cnattr.ca_blocks = cfork.cf_blocks;
	435	for (i = 0; i < kHFSPlusExtentDensity; i++) {
	436	cfork.cf_extents[i].startBlock =
	437	SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
	438	cfork.cf_extents[i].blockCount =
	439	SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
	440	}
	441	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
	442	&hfsmp->hfs_extents_vp);
	443	if (retval)
	444	{
	445	goto ErrorExit;
	446	}
	447	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
	448	hfs_unlock(hfsmp->hfs_extents_cp);
	449
	450	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
	451	(KeyCompareProcPtr) CompareExtentKeysPlus));
	452	if (retval)
	453	{
	454	goto ErrorExit;
	455	}
	456	/*
	457	* Set up Catalog B-tree vnode
	458	*/
	459	cndesc.cd_nameptr = hfs_catname;
	460	cndesc.cd_namelen = strlen((char *)hfs_catname);
	461	cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
	462
	463	cfork.cf_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
	464	cfork.cf_clump = SWAP_BE32 (vhp->catalogFile.clumpSize);
	465	cfork.cf_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
	466	cfork.cf_vblocks = 0;
	467	cnattr.ca_blocks = cfork.cf_blocks;
	468	for (i = 0; i < kHFSPlusExtentDensity; i++) {
	469	cfork.cf_extents[i].startBlock =
	470	SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
	471	cfork.cf_extents[i].blockCount =
	472	SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
	473	}
	474	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
	475	&hfsmp->hfs_catalog_vp);
	476	if (retval) {
	477	goto ErrorExit;
	478	}
	479	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
	480	hfs_unlock(hfsmp->hfs_catalog_cp);
	481
	482	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
	483	(KeyCompareProcPtr) CompareExtendedCatalogKeys));
	484	if (retval) {
	485	goto ErrorExit;
	486	}
	487	if ((hfsmp->hfs_flags & HFS_X) &&
	488	BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
	489	if (btinfo.keyCompareType == kHFSBinaryCompare) {
	490	hfsmp->hfs_flags \|= HFS_CASE_SENSITIVE;
	491	/* Install a case-sensitive key compare */
	492	(void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
	493	(KeyCompareProcPtr)cat_binarykeycompare);
	494	}
	495	}
	496
	497	/*
	498	* Set up Allocation file vnode
	499	*/
	500	cndesc.cd_nameptr = hfs_vbmname;
	501	cndesc.cd_namelen = strlen((char *)hfs_vbmname);
	502	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
	503
	504	cfork.cf_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
	505	cfork.cf_clump = SWAP_BE32 (vhp->allocationFile.clumpSize);
	506	cfork.cf_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
	507	cfork.cf_vblocks = 0;
	508	cnattr.ca_blocks = cfork.cf_blocks;
	509	for (i = 0; i < kHFSPlusExtentDensity; i++) {
	510	cfork.cf_extents[i].startBlock =
	511	SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
	512	cfork.cf_extents[i].blockCount =
	513	SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
	514	}
	515	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
	516	&hfsmp->hfs_allocation_vp);
	517	if (retval) {
	518	goto ErrorExit;
	519	}
	520	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
	521	hfs_unlock(hfsmp->hfs_allocation_cp);
	522
	523	/*
	524	* Set up Attribute B-tree vnode
	525	*/
	526	if (vhp->attributesFile.totalBlocks != 0) {
	527	cndesc.cd_nameptr = hfs_attrname;
	528	cndesc.cd_namelen = strlen((char *)hfs_attrname);
	529	cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
	530
	531	cfork.cf_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
	532	cfork.cf_clump = SWAP_BE32 (vhp->attributesFile.clumpSize);
	533	cfork.cf_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
	534	cfork.cf_vblocks = 0;
	535	cnattr.ca_blocks = cfork.cf_blocks;
	536	for (i = 0; i < kHFSPlusExtentDensity; i++) {
	537	cfork.cf_extents[i].startBlock =
	538	SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
	539	cfork.cf_extents[i].blockCount =
	540	SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
	541	}
	542	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
	543	&hfsmp->hfs_attribute_vp);
	544	if (retval) {
	545	goto ErrorExit;
	546	}
	547	hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
	548	hfs_unlock(hfsmp->hfs_attribute_cp);
	549	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
	550	(KeyCompareProcPtr) hfs_attrkeycompare));
	551	if (retval) {
	552	goto ErrorExit;
	553	}
	554	}
	555
	556	/*
	557	* Set up Startup file vnode
	558	*/
	559	if (vhp->startupFile.totalBlocks != 0) {
	560	cndesc.cd_nameptr = hfs_startupname;
	561	cndesc.cd_namelen = strlen((char *)hfs_startupname);
	562	cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
	563
	564	cfork.cf_size = SWAP_BE64 (vhp->startupFile.logicalSize);
	565	cfork.cf_clump = SWAP_BE32 (vhp->startupFile.clumpSize);
	566	cfork.cf_blocks = SWAP_BE32 (vhp->startupFile.totalBlocks);
	567	cfork.cf_vblocks = 0;
	568	cnattr.ca_blocks = cfork.cf_blocks;
	569	for (i = 0; i < kHFSPlusExtentDensity; i++) {
	570	cfork.cf_extents[i].startBlock =
	571	SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
	572	cfork.cf_extents[i].blockCount =
	573	SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
	574	}
	575	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
	576	&hfsmp->hfs_startup_vp);
	577	if (retval) {
	578	goto ErrorExit;
	579	}
	580	hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
	581	hfs_unlock(hfsmp->hfs_startup_cp);
	582	}
	583
	584	/* Pick up volume name and create date */
	585	retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL);
	586	if (retval) {
	587	goto ErrorExit;
	588	}
	589	vcb->vcbCrDate = cnattr.ca_itime;
	590	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
	591	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
	592	cat_releasedesc(&cndesc);
	593
	594	/* mark the volume dirty (clear clean unmount bit) */
	595	vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
	596	if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
	597	hfs_flushvolumeheader(hfsmp, TRUE, 0);
	598	}
	599
	600	/* kHFSHasFolderCount is only supported/updated on HFSX volumes */
	601	if ((hfsmp->hfs_flags & HFS_X) != 0) {
	602	hfsmp->hfs_flags \|= HFS_FOLDERCOUNT;
	603	}
	604
	605	//
	606	// Check if we need to do late journal initialization. This only
	607	// happens if a previous version of MacOS X (or 9) touched the disk.
	608	// In that case hfs_late_journal_init() will go re-locate the journal
	609	// and journal_info_block files and validate that they're still kosher.
	610	//
	611	if ( (vcb->vcbAtrb & kHFSVolumeJournaledMask)
	612	&& (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
	613	&& (hfsmp->jnl == NULL)) {
	614
	615	retval = hfs_late_journal_init(hfsmp, vhp, args);
	616	if (retval != 0) {
	617	if (retval == EROFS) {
	618	// EROFS is a special error code that means the volume has an external
	619	// journal which we couldn't find. in that case we do not want to
	620	// rewrite the volume header - we'll just refuse to mount the volume.
	621	retval = EINVAL;
	622	goto ErrorExit;
	623	}
	624
	625	hfsmp->jnl = NULL;
	626
	627	// if the journal failed to open, then set the lastMountedVersion
	628	// to be "FSK!" which fsck_hfs will see and force the fsck instead
	629	// of just bailing out because the volume is journaled.
	630	if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
	631	HFSPlusVolumeHeader *jvhp;
	632	daddr64_t mdb_offset;
	633	struct buf *bp = NULL;
	634
	635	hfsmp->hfs_flags \|= HFS_NEED_JNL_RESET;
	636
	637	mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
	638
	639	bp = NULL;
	640	retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
	641	HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
	642	hfsmp->hfs_physical_block_size, cred, &bp);
	643	if (retval == 0) {
	644	jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
	645
	646	if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord \|\| SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
	647	printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n");
	648	jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
	649	buf_bwrite(bp);
	650	} else {
	651	buf_brelse(bp);
	652	}
	653	bp = NULL;
	654	} else if (bp) {
	655	buf_brelse(bp);
	656	// clear this so the error exit path won't try to use it
	657	bp = NULL;
	658	}
	659	}
	660
	661	retval = EINVAL;
	662	goto ErrorExit;
	663	} else if (hfsmp->jnl) {
	664	vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
	665	}
	666	} else if (hfsmp->jnl \|\| ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
	667	struct cat_attr jinfo_attr, jnl_attr;
	668
	669	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	670	vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
	671	}
	672
	673	// if we're here we need to fill in the fileid's for the
	674	// journal and journal_info_block.
	675	hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
	676	hfsmp->hfs_jnlfileid = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
	677	if (hfsmp->hfs_jnlinfoblkid == 0 \|\| hfsmp->hfs_jnlfileid == 0) {
	678	printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
	679	printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
	680	}
	681
	682	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	683	vcb->vcbAtrb \|= kHFSVolumeJournaledMask;
	684	}
	685
	686	if (hfsmp->jnl == NULL) {
	687	vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
	688	}
	689	}
	690
	691	/*
	692	* Establish a metadata allocation zone.
	693	*/
	694	hfs_metadatazone_init(hfsmp);
	695
	696	/*
	697	* Make any metadata zone adjustments.
	698	*/
	699	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
	700	/* Keep the roving allocator out of the metadata zone. */
	701	if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
	702	vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
	703	HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
	704	}
	705	} else {
	706	if (vcb->nextAllocation <= 1) {
	707	vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
	708	}
	709	}
	710	vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
	711
	712	/* Setup private/hidden directories for hardlinks. */
	713	hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
	714	hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
	715
	716	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
	717	hfs_remove_orphans(hfsmp);
	718
	719	/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
	720	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
	721	{
	722	retval = hfs_erase_unused_nodes(hfsmp);
	723	if (retval)
	724	goto ErrorExit;
	725	}
	726
	727	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected
	728	{
	729	MarkVCBDirty( vcb ); // mark VCB dirty so it will be written
	730	}
	731
	732	/*
	733	* Allow hot file clustering if conditions allow.
	734	*/
	735	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
	736	((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) {
	737	(void) hfs_recording_init(hfsmp);
	738	}
	739
	740	/* Force ACLs on HFS+ file systems. */
	741	vfs_setextendedsecurity(HFSTOVFS(hfsmp));
	742
	743	/* Check if volume supports writing of extent-based extended attributes */
	744	hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE);
	745
	746	return (0);
	747
	748	ErrorExit:
	749	/*
	750	* A fatal error occurred and the volume cannot be mounted
	751	* release any resources that we aquired...
	752	*/
	753	if (hfsmp->hfs_attribute_vp)
	754	ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
	755	ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
	756	ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
	757	ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
	758
	759	return (retval);
	760	}
	761
	762
	763	/*
	764	* ReleaseMetaFileVNode
	765	*
	766	* vp L - -
	767	*/
	768	static void ReleaseMetaFileVNode(struct vnode *vp)
	769	{
	770	struct filefork *fp;
	771
	772	if (vp && (fp = VTOF(vp))) {
	773	if (fp->fcbBTCBPtr != NULL) {
	774	(void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
	775	(void) BTClosePath(fp);
	776	hfs_unlock(VTOC(vp));
	777	}
	778
	779	/* release the node even if BTClosePath fails */
	780	vnode_recycle(vp);
	781	vnode_put(vp);
	782	}
	783	}
	784
	785
	786	/*************************************************************
	787	*
	788	* Unmounts a hfs volume.
	789	* At this point vflush() has been called (to dump all non-metadata files)
	790	*
	791	*************************************************************/
	792
	793	__private_extern__
	794	int
	795	hfsUnmount( register struct hfsmount hfsmp, __unused struct proc p)
	796	{
	797	/* Get rid of our attribute data vnode (if any). */
	798	if (hfsmp->hfs_attrdata_vp) {
	799	vnode_t advp = hfsmp->hfs_attrdata_vp;
	800
	801	if (vnode_get(advp) == 0) {
	802	vnode_rele_ext(advp, O_EVTONLY, 0);
	803	vnode_put(advp);
	804	}
	805	hfsmp->hfs_attrdata_vp = NULLVP;
	806	}
	807
	808	if (hfsmp->hfs_startup_vp)
	809	ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
	810
	811	if (hfsmp->hfs_allocation_vp)
	812	ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
	813
	814	if (hfsmp->hfs_attribute_vp)
	815	ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
	816
	817	ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
	818	ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
	819
	820	/*
	821	* Setting these pointers to NULL so that any references
	822	* past this point will fail, and tell us the point of failure.
	823	* Also, facilitates a check in hfs_update for a null catalog
	824	* vp
	825	*/
	826	hfsmp->hfs_allocation_vp = NULL;
	827	hfsmp->hfs_attribute_vp = NULL;
	828	hfsmp->hfs_catalog_vp = NULL;
	829	hfsmp->hfs_extents_vp = NULL;
	830	hfsmp->hfs_startup_vp = NULL;
	831
	832	return (0);
	833	}
	834
	835
	836	/*
	837	* Test if fork has overflow extents.
	838	*/
	839	__private_extern__
	840	int
	841	overflow_extents(struct filefork *fp)
	842	{
	843	u_int32_t blocks;
	844
	845	//
	846	// If the vnode pointer is NULL then we're being called
	847	// from hfs_remove_orphans() with a faked-up filefork
	848	// and therefore it has to be an HFS+ volume. Otherwise
	849	// we check through the volume header to see what type
	850	// of volume we're on.
	851	//
	852	if (FTOV(fp) == NULL \|\| VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
	853	if (fp->ff_extents[7].blockCount == 0)
	854	return (0);
	855
	856	blocks = fp->ff_extents[0].blockCount +
	857	fp->ff_extents[1].blockCount +
	858	fp->ff_extents[2].blockCount +
	859	fp->ff_extents[3].blockCount +
	860	fp->ff_extents[4].blockCount +
	861	fp->ff_extents[5].blockCount +
	862	fp->ff_extents[6].blockCount +
	863	fp->ff_extents[7].blockCount;
	864	} else {
	865	if (fp->ff_extents[2].blockCount == 0)
	866	return false;
	867
	868	blocks = fp->ff_extents[0].blockCount +
	869	fp->ff_extents[1].blockCount +
	870	fp->ff_extents[2].blockCount;
	871	}
	872
	873	return (fp->ff_blocks > blocks);
	874	}
	875
	876
	877	/*
	878	* Lock HFS system file(s).
	879	*/
	880	__private_extern__
	881	int
	882	hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
	883	{
	884	/*
	885	* Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
	886	*/
	887	if (flags & SFL_CATALOG) {
	888
	889	#ifdef HFS_CHECK_LOCK_ORDER
	890	if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
	891	panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
	892	}
	893	if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
	894	panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
	895	}
	896	if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
	897	panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
	898	}
	899	#endif /* HFS_CHECK_LOCK_ORDER */
	900
	901	(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
	902	/*
	903	* When the catalog file has overflow extents then
	904	* also acquire the extents b-tree lock if its not
	905	* already requested.
	906	*/
	907	if ((flags & SFL_EXTENTS) == 0 &&
	908	overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) {
	909	flags \|= SFL_EXTENTS;
	910	}
	911	}
	912	if (flags & SFL_ATTRIBUTE) {
	913
	914	#ifdef HFS_CHECK_LOCK_ORDER
	915	if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
	916	panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
	917	}
	918	if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
	919	panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
	920	}
	921	#endif /* HFS_CHECK_LOCK_ORDER */
	922
	923	if (hfsmp->hfs_attribute_cp) {
	924	(void) hfs_lock(hfsmp->hfs_attribute_cp, locktype);
	925	/*
	926	* When the attribute file has overflow extents then
	927	* also acquire the extents b-tree lock if its not
	928	* already requested.
	929	*/
	930	if ((flags & SFL_EXTENTS) == 0 &&
	931	overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) {
	932	flags \|= SFL_EXTENTS;
	933	}
	934	} else {
	935	flags &= ~SFL_ATTRIBUTE;
	936	}
	937	}
	938	if (flags & SFL_STARTUP) {
	939	#ifdef HFS_CHECK_LOCK_ORDER
	940	if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
	941	panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
	942	}
	943	#endif /* HFS_CHECK_LOCK_ORDER */
	944
	945	(void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
	946	/*
	947	* When the startup file has overflow extents then
	948	* also acquire the extents b-tree lock if its not
	949	* already requested.
	950	*/
	951	if ((flags & SFL_EXTENTS) == 0 &&
	952	overflow_extents(VTOF(hfsmp->hfs_startup_vp))) {
	953	flags \|= SFL_EXTENTS;
	954	}
	955	}
	956	/*
	957	* To prevent locks being taken in the wrong order, the extent lock
	958	* gets a bitmap lock as well.
	959	*/
	960	if (flags & (SFL_BITMAP \| SFL_EXTENTS)) {
	961	/*
	962	* Since the only bitmap operations are clearing and
	963	* setting bits we always need exclusive access. And
	964	* when we have a journal, we can "hide" behind that
	965	* lock since we can only change the bitmap from
	966	* within a transaction.
	967	*/
	968	if (hfsmp->jnl \|\| (hfsmp->hfs_allocation_cp == NULL)) {
	969	flags &= ~SFL_BITMAP;
	970	} else {
	971	(void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
	972	/* The bitmap lock is also grabbed when only extent lock
	973	* was requested. Set the bitmap lock bit in the lock
	974	* flags which callers will use during unlock.
	975	*/
	976	flags \|= SFL_BITMAP;
	977	}
	978	}
	979	if (flags & SFL_EXTENTS) {
	980	/*
	981	* Since the extents btree lock is recursive we always
	982	* need exclusive access.
	983	*/
	984	(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
	985	}
	986	return (flags);
	987	}
	988
	989	/*
	990	* unlock HFS system file(s).
	991	*/
	992	__private_extern__
	993	void
	994	hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
	995	{
	996	struct timeval tv;
	997	u_int32_t lastfsync;
	998	int numOfLockedBuffs;
	999
	1000	if (hfsmp->jnl == NULL) {
	1001	microuptime(&tv);
	1002	lastfsync = tv.tv_sec;
	1003	}
	1004	if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
	1005	hfs_unlock(hfsmp->hfs_startup_cp);
	1006	}
	1007	if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
	1008	if (hfsmp->jnl == NULL) {
	1009	BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
	1010	numOfLockedBuffs = count_lock_queue();
	1011	if ((numOfLockedBuffs > kMaxLockedMetaBuffers) \|\|
	1012	((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
	1013	kMaxSecsForFsync))) {
	1014	hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
	1015	}
	1016	}
	1017	hfs_unlock(hfsmp->hfs_attribute_cp);
	1018	}
	1019	if (flags & SFL_CATALOG) {
	1020	if (hfsmp->jnl == NULL) {
	1021	BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
	1022	numOfLockedBuffs = count_lock_queue();
	1023	if ((numOfLockedBuffs > kMaxLockedMetaBuffers) \|\|
	1024	((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
	1025	kMaxSecsForFsync))) {
	1026	hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
	1027	}
	1028	}
	1029	hfs_unlock(hfsmp->hfs_catalog_cp);
	1030	}
	1031	if (flags & SFL_BITMAP) {
	1032	hfs_unlock(hfsmp->hfs_allocation_cp);
	1033	}
	1034	if (flags & SFL_EXTENTS) {
	1035	if (hfsmp->jnl == NULL) {
	1036	BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
	1037	numOfLockedBuffs = count_lock_queue();
	1038	if ((numOfLockedBuffs > kMaxLockedMetaBuffers) \|\|
	1039	((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
	1040	kMaxSecsForFsync))) {
	1041	hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
	1042	}
	1043	}
	1044	hfs_unlock(hfsmp->hfs_extents_cp);
	1045	}
	1046	}
	1047
	1048
	1049	/*
	1050	* RequireFileLock
	1051	*
	1052	* Check to see if a vnode is locked in the current context
	1053	* This is to be used for debugging purposes only!!
	1054	*/
	1055	#if HFS_DIAGNOSTIC
	1056	void RequireFileLock(FileReference vp, int shareable)
	1057	{
	1058	int locked;
	1059
	1060	/* The extents btree and allocation bitmap are always exclusive. */
	1061	if (VTOC(vp)->c_fileid == kHFSExtentsFileID \|\|
	1062	VTOC(vp)->c_fileid == kHFSAllocationFileID) {
	1063	shareable = 0;
	1064	}
	1065
	1066	locked = VTOC(vp)->c_lockowner == (void *)current_thread();
	1067
	1068	if (!locked && !shareable) {
	1069	switch (VTOC(vp)->c_fileid) {
	1070	case kHFSExtentsFileID:
	1071	panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
	1072	break;
	1073	case kHFSCatalogFileID:
	1074	panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
	1075	break;
	1076	case kHFSAllocationFileID:
	1077	/* The allocation file can hide behind the jornal lock. */
	1078	if (VTOHFS(vp)->jnl == NULL)
	1079	panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
	1080	break;
	1081	case kHFSStartupFileID:
	1082	panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
	1083	case kHFSAttributesFileID:
	1084	panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
	1085	break;
	1086	}
	1087	}
	1088	}
	1089	#endif
	1090
	1091
	1092	/*
	1093	* There are three ways to qualify for ownership rights on an object:
	1094	*
	1095	* 1. (a) Your UID matches the cnode's UID.
	1096	* (b) The object in question is owned by "unknown"
	1097	* 2. (a) Permissions on the filesystem are being ignored and
	1098	* your UID matches the replacement UID.
	1099	* (b) Permissions on the filesystem are being ignored and
	1100	* the replacement UID is "unknown".
	1101	* 3. You are root.
	1102	*
	1103	*/
	1104	int
	1105	hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
	1106	__unused struct proc *p, int invokesuperuserstatus)
	1107	{
	1108	if ((kauth_cred_getuid(cred) == cnode_uid) \|\| /* [1a] */
	1109	(cnode_uid == UNKNOWNUID) \|\| /* [1b] */
	1110	((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) && /* [2] */
	1111	((kauth_cred_getuid(cred) == hfsmp->hfs_uid) \|\| /* [2a] */
	1112	(hfsmp->hfs_uid == UNKNOWNUID))) \|\| /* [2b] */
	1113	(invokesuperuserstatus && (suser(cred, 0) == 0))) { /* [3] */
	1114	return (0);
	1115	} else {
	1116	return (EPERM);
	1117	}
	1118	}
	1119
	1120
	1121	u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
	1122	u_int32_t blockSizeLimit,
	1123	u_int32_t baseMultiple) {
	1124	/*
	1125	Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
	1126	specified limit but still an even multiple of the baseMultiple.
	1127	*/
	1128	int baseBlockCount, blockCount;
	1129	u_int32_t trialBlockSize;
	1130
	1131	if (allocationBlockSize % baseMultiple != 0) {
	1132	/*
	1133	Whoops: the allocation blocks aren't even multiples of the specified base:
	1134	no amount of dividing them into even parts will be a multiple, either then!
	1135	*/
	1136	return 512; /* Hope for the best */
	1137	};
	1138
	1139	/* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
	1140	from being handled as two 6K logical blocks instead of 3 4K logical blocks.
	1141	Even though the former (the result of the loop below) is the larger allocation
	1142	block size, the latter is more efficient: */
	1143	if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
	1144
	1145	/* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
	1146	baseBlockCount = allocationBlockSize / baseMultiple; /* Now guaranteed to be an even multiple */
	1147
	1148	for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
	1149	trialBlockSize = blockCount * baseMultiple;
	1150	if (allocationBlockSize % trialBlockSize == 0) { /* An even multiple? */
	1151	if ((trialBlockSize <= blockSizeLimit) &&
	1152	(trialBlockSize % baseMultiple == 0)) {
	1153	return trialBlockSize;
	1154	};
	1155	};
	1156	};
	1157
	1158	/* Note: we should never get here, since blockCount = 1 should always work,
	1159	but this is nice and safe and makes the compiler happy, too ... */
	1160	return 512;
	1161	}
	1162
	1163
	1164	__private_extern__
	1165	u_int32_t
	1166	GetFileInfo(ExtendedVCB vcb, __unused u_int32_t dirid, const char name,
	1167	struct cat_attr fattr, struct cat_fork forkinfo)
	1168	{
	1169	struct hfsmount * hfsmp;
	1170	struct cat_desc jdesc;
	1171	int lockflags;
	1172	int error;
	1173
	1174	if (vcb->vcbSigWord != kHFSPlusSigWord)
	1175	return (0);
	1176
	1177	hfsmp = VCBTOHFS(vcb);
	1178
	1179	memset(&jdesc, 0, sizeof(struct cat_desc));
	1180	jdesc.cd_parentcnid = kRootDirID;
	1181	jdesc.cd_nameptr = (const u_int8_t *)name;
	1182	jdesc.cd_namelen = strlen(name);
	1183
	1184	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
	1185	error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL);
	1186	hfs_systemfile_unlock(hfsmp, lockflags);
	1187
	1188	if (error == 0) {
	1189	return (fattr->ca_fileid);
	1190	} else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1191	return (0);
	1192	}
	1193
	1194	return (0); /* XXX what callers expect on an error */
	1195	}
	1196
	1197
	1198	/*
	1199	* On HFS Plus Volumes, there can be orphaned files or directories
	1200	* These are files or directories that were unlinked while busy.
	1201	* If the volume was not cleanly unmounted then some of these may
	1202	* have persisted and need to be removed.
	1203	*/
	1204	__private_extern__
	1205	void
	1206	hfs_remove_orphans(struct hfsmount * hfsmp)
	1207	{
	1208	struct BTreeIterator * iterator = NULL;
	1209	struct FSBufferDescriptor btdata;
	1210	struct HFSPlusCatalogFile filerec;
	1211	struct HFSPlusCatalogKey * keyp;
	1212	struct proc *p = current_proc();
	1213	FCB *fcb;
	1214	ExtendedVCB *vcb;
	1215	char filename[32];
	1216	char tempname[32];
	1217	size_t namelen;
	1218	cat_cookie_t cookie;
	1219	int catlock = 0;
	1220	int catreserve = 0;
	1221	int started_tr = 0;
	1222	int lockflags;
	1223	int result;
	1224	int orphaned_files = 0;
	1225	int orphaned_dirs = 0;
	1226
	1227	bzero(&cookie, sizeof(cookie));
	1228
	1229	if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
	1230	return;
	1231
	1232	vcb = HFSTOVCB(hfsmp);
	1233	fcb = VTOF(hfsmp->hfs_catalog_vp);
	1234
	1235	btdata.bufferAddress = &filerec;
	1236	btdata.itemSize = sizeof(filerec);
	1237	btdata.itemCount = 1;
	1238
	1239	MALLOC(iterator, struct BTreeIterator , sizeof(iterator), M_TEMP, M_WAITOK);
	1240	bzero(iterator, sizeof(*iterator));
	1241
	1242	/* Build a key to "temp" */
	1243	keyp = (HFSPlusCatalogKey*)&iterator->key;
	1244	keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
	1245	keyp->nodeName.length = 4; /* "temp" */
	1246	keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
	1247	keyp->nodeName.unicode[0] = 't';
	1248	keyp->nodeName.unicode[1] = 'e';
	1249	keyp->nodeName.unicode[2] = 'm';
	1250	keyp->nodeName.unicode[3] = 'p';
	1251
	1252	/*
	1253	* Position the iterator just before the first real temp file/dir.
	1254	*/
	1255	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
	1256	(void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
	1257	hfs_systemfile_unlock(hfsmp, lockflags);
	1258
	1259	/* Visit all the temp files/dirs in the HFS+ private directory. */
	1260	for (;;) {
	1261	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
	1262	result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
	1263	hfs_systemfile_unlock(hfsmp, lockflags);
	1264	if (result)
	1265	break;
	1266	if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
	1267	break;
	1268
	1269	(void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
	1270	(u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
	1271
	1272	(void) snprintf(tempname, sizeof(tempname), "%s%d",
	1273	HFS_DELETE_PREFIX, filerec.fileID);
	1274
	1275	/*
	1276	* Delete all files (and directories) named "tempxxx",
	1277	* where xxx is the file's cnid in decimal.
	1278	*
	1279	*/
	1280	if (bcmp(tempname, filename, namelen) == 0) {
	1281	struct filefork dfork;
	1282	struct filefork rfork;
	1283	struct cnode cnode;
	1284
	1285	bzero(&dfork, sizeof(dfork));
	1286	bzero(&rfork, sizeof(rfork));
	1287	bzero(&cnode, sizeof(cnode));
	1288
	1289	/* Delete any attributes, ignore errors */
	1290	(void) hfs_removeallattr(hfsmp, filerec.fileID);
	1291
	1292	if (hfs_start_transaction(hfsmp) != 0) {
	1293	printf("hfs_remove_orphans: failed to start transaction\n");
	1294	goto exit;
	1295	}
	1296	started_tr = 1;
	1297
	1298	/*
	1299	* Reserve some space in the Catalog file.
	1300	*/
	1301	if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
	1302	printf("hfs_remove_orphans: cat_preflight failed\n");
	1303	goto exit;
	1304	}
	1305	catreserve = 1;
	1306
	1307	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG \| SFL_ATTRIBUTE \| SFL_EXTENTS \| SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
	1308	catlock = 1;
	1309
	1310	/* Build a fake cnode */
	1311	cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
	1312	&dfork.ff_data, &rfork.ff_data);
	1313	cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
	1314	cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
	1315	cnode.c_desc.cd_namelen = namelen;
	1316	cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
	1317	cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
	1318
	1319	/* Position iterator at previous entry */
	1320	if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
	1321	NULL, NULL) != 0) {
	1322	break;
	1323	}
	1324
	1325	/* Truncate the file to zero (both forks) */
	1326	if (dfork.ff_blocks > 0) {
	1327	u_int64_t fsize;
	1328
	1329	dfork.ff_cp = &cnode;
	1330	cnode.c_datafork = &dfork;
	1331	cnode.c_rsrcfork = NULL;
	1332	fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
	1333	while (fsize > 0) {
	1334	if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
	1335	fsize -= HFS_BIGFILE_SIZE;
	1336	} else {
	1337	fsize = 0;
	1338	}
	1339
	1340	if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) {
	1341	printf("hfs: error truncting data fork!\n");
	1342	break;
	1343	}
	1344
	1345	//
	1346	// if we're iteratively truncating this file down,
	1347	// then end the transaction and start a new one so
	1348	// that no one transaction gets too big.
	1349	//
	1350	if (fsize > 0 && started_tr) {
	1351	/* Drop system file locks before starting
	1352	* another transaction to preserve lock order.
	1353	*/
	1354	hfs_systemfile_unlock(hfsmp, lockflags);
	1355	catlock = 0;
	1356	hfs_end_transaction(hfsmp);
	1357
	1358	if (hfs_start_transaction(hfsmp) != 0) {
	1359	started_tr = 0;
	1360	break;
	1361	}
	1362	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG \| SFL_ATTRIBUTE \| SFL_EXTENTS \| SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
	1363	catlock = 1;
	1364	}
	1365	}
	1366	}
	1367
	1368	if (rfork.ff_blocks > 0) {
	1369	rfork.ff_cp = &cnode;
	1370	cnode.c_datafork = NULL;
	1371	cnode.c_rsrcfork = &rfork;
	1372	if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) {
	1373	printf("hfs: error truncting rsrc fork!\n");
	1374	break;
	1375	}
	1376	}
	1377
	1378	/* Remove the file or folder record from the Catalog */
	1379	if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
	1380	printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
	1381	hfs_systemfile_unlock(hfsmp, lockflags);
	1382	catlock = 0;
	1383	hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	1384	break;
	1385	}
	1386
	1387	if (cnode.c_attr.ca_mode & S_IFDIR) {
	1388	orphaned_dirs++;
	1389	}
	1390	else {
	1391	orphaned_files++;
	1392	}
	1393
	1394	/* Update parent and volume counts */
	1395	hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
	1396	if (cnode.c_attr.ca_mode & S_IFDIR) {
	1397	DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
	1398	}
	1399
	1400	(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
	1401	&hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
	1402
	1403	/* Drop locks and end the transaction */
	1404	hfs_systemfile_unlock(hfsmp, lockflags);
	1405	cat_postflight(hfsmp, &cookie, p);
	1406	catlock = catreserve = 0;
	1407
	1408	/*
	1409	Now that Catalog is unlocked, update the volume info, making
	1410	sure to differentiate between files and directories
	1411	*/
	1412	if (cnode.c_attr.ca_mode & S_IFDIR) {
	1413	hfs_volupdate(hfsmp, VOL_RMDIR, 0);
	1414	}
	1415	else{
	1416	hfs_volupdate(hfsmp, VOL_RMFILE, 0);
	1417	}
	1418
	1419	if (started_tr) {
	1420	hfs_end_transaction(hfsmp);
	1421	started_tr = 0;
	1422	}
	1423
	1424	} /* end if */
	1425	} /* end for */
	1426	if (orphaned_files > 0 \|\| orphaned_dirs > 0)
	1427	printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
	1428	exit:
	1429	if (catlock) {
	1430	hfs_systemfile_unlock(hfsmp, lockflags);
	1431	}
	1432	if (catreserve) {
	1433	cat_postflight(hfsmp, &cookie, p);
	1434	}
	1435	if (started_tr) {
	1436	hfs_end_transaction(hfsmp);
	1437	}
	1438
	1439	FREE(iterator, M_TEMP);
	1440	hfsmp->hfs_flags \|= HFS_CLEANED_ORPHANS;
	1441	}
	1442
	1443
	1444	/*
	1445	* This will return the correct logical block size for a given vnode.
	1446	* For most files, it is the allocation block size, for meta data like
	1447	* BTrees, this is kept as part of the BTree private nodeSize
	1448	*/
	1449	u_int32_t
	1450	GetLogicalBlockSize(struct vnode *vp)
	1451	{
	1452	u_int32_t logBlockSize;
	1453
	1454	DBG_ASSERT(vp != NULL);
	1455
	1456	/* start with default */
	1457	logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
	1458
	1459	if (vnode_issystem(vp)) {
	1460	if (VTOF(vp)->fcbBTCBPtr != NULL) {
	1461	BTreeInfoRec bTreeInfo;
	1462
	1463	/*
	1464	* We do not lock the BTrees, because if we are getting block..then the tree
	1465	* should be locked in the first place.
	1466	* We just want the nodeSize wich will NEVER change..so even if the world
	1467	* is changing..the nodeSize should remain the same. Which argues why lock
	1468	* it in the first place??
	1469	*/
	1470
	1471	(void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
	1472
	1473	logBlockSize = bTreeInfo.nodeSize;
	1474
	1475	} else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
	1476	logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
	1477	}
	1478	}
	1479
	1480	DBG_ASSERT(logBlockSize > 0);
	1481
	1482	return logBlockSize;
	1483	}
	1484
	1485	__private_extern__
	1486	u_int32_t
	1487	hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
	1488	{
	1489	u_int32_t freeblks;
	1490	u_int32_t rsrvblks;
	1491	u_int32_t loanblks;
	1492
	1493	/*
	1494	* We don't bother taking the mount lock
	1495	* to look at these values since the values
	1496	* themselves are each updated automically
	1497	* on aligned addresses.
	1498	*/
	1499	freeblks = hfsmp->freeBlocks;
	1500	rsrvblks = hfsmp->reserveBlocks;
	1501	loanblks = hfsmp->loanedBlocks;
	1502	if (wantreserve) {
	1503	if (freeblks > rsrvblks)
	1504	freeblks -= rsrvblks;
	1505	else
	1506	freeblks = 0;
	1507	}
	1508	if (freeblks > loanblks)
	1509	freeblks -= loanblks;
	1510	else
	1511	freeblks = 0;
	1512
	1513	#ifdef HFS_SPARSE_DEV
	1514	/*
	1515	* When the underlying device is sparse, check the
	1516	* available space on the backing store volume.
	1517	*/
	1518	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
	1519	struct vfsstatfs vfsp; / 272 bytes */
	1520	u_int64_t vfreeblks;
	1521	u_int32_t loanedblks;
	1522	struct mount * backingfs_mp;
	1523	struct timeval now;
	1524
	1525	backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
	1526
	1527	microtime(&now);
	1528	if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
	1529	vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
	1530	hfsmp->hfs_last_backingstatfs = now.tv_sec;
	1531	}
	1532
	1533	if ((vfsp = vfs_statfs(backingfs_mp))) {
	1534	HFS_MOUNT_LOCK(hfsmp, TRUE);
	1535	vfreeblks = vfsp->f_bavail;
	1536	/* Normalize block count if needed. */
	1537	if (vfsp->f_bsize != hfsmp->blockSize) {
	1538	vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
	1539	}
	1540	if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
	1541	vfreeblks -= hfsmp->hfs_sparsebandblks;
	1542	else
	1543	vfreeblks = 0;
	1544
	1545	/* Take into account any delayed allocations. */
	1546	loanedblks = 2 * hfsmp->loanedBlocks;
	1547	if (vfreeblks > loanedblks)
	1548	vfreeblks -= loanedblks;
	1549	else
	1550	vfreeblks = 0;
	1551
	1552	if (hfsmp->hfs_backingfs_maxblocks) {
	1553	vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
	1554	}
	1555	freeblks = MIN(vfreeblks, freeblks);
	1556	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	1557	}
	1558	}
	1559	#endif /* HFS_SPARSE_DEV */
	1560
	1561	return (freeblks);
	1562	}
	1563
	1564	/*
	1565	* Map HFS Common errors (negative) to BSD error codes (positive).
	1566	* Positive errors (ie BSD errors) are passed through unchanged.
	1567	*/
	1568	short MacToVFSError(OSErr err)
	1569	{
	1570	if (err >= 0)
	1571	return err;
	1572
	1573	switch (err) {
	1574	case dskFulErr: /* -34 */
	1575	case btNoSpaceAvail: /* -32733 */
	1576	return ENOSPC;
	1577	case fxOvFlErr: /* -32750 */
	1578	return EOVERFLOW;
	1579
	1580	case btBadNode: /* -32731 */
	1581	return EIO;
	1582
	1583	case memFullErr: /* -108 */
	1584	return ENOMEM; /* +12 */
	1585
	1586	case cmExists: /* -32718 */
	1587	case btExists: /* -32734 */
	1588	return EEXIST; /* +17 */
	1589
	1590	case cmNotFound: /* -32719 */
	1591	case btNotFound: /* -32735 */
	1592	return ENOENT; /* 28 */
	1593
	1594	case cmNotEmpty: /* -32717 */
	1595	return ENOTEMPTY; /* 66 */
	1596
	1597	case cmFThdDirErr: /* -32714 */
	1598	return EISDIR; /* 21 */
	1599
	1600	case fxRangeErr: /* -32751 */
	1601	return ERANGE;
	1602
	1603	case bdNamErr: /* -37 */
	1604	return ENAMETOOLONG; /* 63 */
	1605
	1606	case paramErr: /* -50 */
	1607	case fileBoundsErr: /* -1309 */
	1608	return EINVAL; /* +22 */
	1609
	1610	case fsBTBadNodeSize:
	1611	return ENXIO;
	1612
	1613	default:
	1614	return EIO; /* +5 */
	1615	}
	1616	}
	1617
	1618
	1619	/*
	1620	* Find the current thread's directory hint for a given index.
	1621	*
	1622	* Requires an exclusive lock on directory cnode.
	1623	*
	1624	* Use detach if the cnode lock must be dropped while the hint is still active.
	1625	*/
	1626	__private_extern__
	1627	directoryhint_t *
	1628	hfs_getdirhint(struct cnode *dcp, int index, int detach)
	1629	{
	1630	struct timeval tv;
	1631	directoryhint_t *hint;
	1632	boolean_t need_remove, need_init;
	1633	const u_int8_t * name;
	1634
	1635	microuptime(&tv);
	1636
	1637	/*
	1638	* Look for an existing hint first. If not found, create a new one (when
	1639	* the list is not full) or recycle the oldest hint. Since new hints are
	1640	* always added to the head of the list, the last hint is always the
	1641	* oldest.
	1642	*/
	1643	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
	1644	if (hint->dh_index == index)
	1645	break;
	1646	}
	1647	if (hint != NULL) { /* found an existing hint */
	1648	need_init = false;
	1649	need_remove = true;
	1650	} else { /* cannot find an existing hint */
	1651	need_init = true;
	1652	if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
	1653	/* Create a default directory hint */
	1654	MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
	1655	++dcp->c_dirhintcnt;
	1656	need_remove = false;
	1657	} else { /* recycle the last (i.e., the oldest) hint */
	1658	hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
	1659	if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
	1660	(name = hint->dh_desc.cd_nameptr)) {
	1661	hint->dh_desc.cd_nameptr = NULL;
	1662	hint->dh_desc.cd_namelen = 0;
	1663	hint->dh_desc.cd_flags &= ~CD_HASBUF;
	1664	vfs_removename((const char *)name);
	1665	}
	1666	need_remove = true;
	1667	}
	1668	}
	1669
	1670	if (need_remove)
	1671	TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
	1672
	1673	if (detach)
	1674	--dcp->c_dirhintcnt;
	1675	else
	1676	TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
	1677
	1678	if (need_init) {
	1679	hint->dh_index = index;
	1680	hint->dh_desc.cd_flags = 0;
	1681	hint->dh_desc.cd_encoding = 0;
	1682	hint->dh_desc.cd_namelen = 0;
	1683	hint->dh_desc.cd_nameptr = NULL;
	1684	hint->dh_desc.cd_parentcnid = dcp->c_fileid;
	1685	hint->dh_desc.cd_hint = dcp->c_childhint;
	1686	hint->dh_desc.cd_cnid = 0;
	1687	}
	1688	hint->dh_time = tv.tv_sec;
	1689	return (hint);
	1690	}
	1691
	1692	/*
	1693	* Release a single directory hint.
	1694	*
	1695	* Requires an exclusive lock on directory cnode.
	1696	*/
	1697	__private_extern__
	1698	void
	1699	hfs_reldirhint(struct cnode dcp, directoryhint_t relhint)
	1700	{
	1701	const u_int8_t * name;
	1702	directoryhint_t *hint;
	1703
	1704	/* Check if item is on list (could be detached) */
	1705	TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
	1706	if (hint == relhint) {
	1707	TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
	1708	--dcp->c_dirhintcnt;
	1709	break;
	1710	}
	1711	}
	1712	name = relhint->dh_desc.cd_nameptr;
	1713	if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
	1714	relhint->dh_desc.cd_nameptr = NULL;
	1715	relhint->dh_desc.cd_namelen = 0;
	1716	relhint->dh_desc.cd_flags &= ~CD_HASBUF;
	1717	vfs_removename((const char *)name);
	1718	}
	1719	FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
	1720	}
	1721
	1722	/*
	1723	* Release directory hints for given directory
	1724	*
	1725	* Requires an exclusive lock on directory cnode.
	1726	*/
	1727	__private_extern__
	1728	void
	1729	hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
	1730	{
	1731	struct timeval tv;
	1732	directoryhint_t hint, prev;
	1733	const u_int8_t * name;
	1734
	1735	if (stale_hints_only)
	1736	microuptime(&tv);
	1737
	1738	/* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
	1739	for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
	1740	if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
	1741	break; /* stop here if this entry is too new */
	1742	name = hint->dh_desc.cd_nameptr;
	1743	if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
	1744	hint->dh_desc.cd_nameptr = NULL;
	1745	hint->dh_desc.cd_namelen = 0;
	1746	hint->dh_desc.cd_flags &= ~CD_HASBUF;
	1747	vfs_removename((const char *)name);
	1748	}
	1749	prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
	1750	TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
	1751	FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
	1752	--dcp->c_dirhintcnt;
	1753	}
	1754	}
	1755
	1756	/*
	1757	* Insert a detached directory hint back into the list of dirhints.
	1758	*
	1759	* Requires an exclusive lock on directory cnode.
	1760	*/
	1761	__private_extern__
	1762	void
	1763	hfs_insertdirhint(struct cnode dcp, directoryhint_t hint)
	1764	{
	1765	directoryhint_t *test;
	1766
	1767	TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
	1768	if (test == hint)
	1769	panic("hfs_insertdirhint: hint %p already on list!", hint);
	1770	}
	1771
	1772	TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
	1773	++dcp->c_dirhintcnt;
	1774	}
	1775
	1776	/*
	1777	* Perform a case-insensitive compare of two UTF-8 filenames.
	1778	*
	1779	* Returns 0 if the strings match.
	1780	*/
	1781	__private_extern__
	1782	int
	1783	hfs_namecmp(const u_int8_t str1, size_t len1, const u_int8_t str2, size_t len2)
	1784	{
	1785	u_int16_t ustr1, ustr2;
	1786	size_t ulen1, ulen2;
	1787	size_t maxbytes;
	1788	int cmp = -1;
	1789
	1790	if (len1 != len2)
	1791	return (cmp);
	1792
	1793	maxbytes = kHFSPlusMaxFileNameChars << 1;
	1794	MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
	1795	ustr2 = ustr1 + (maxbytes >> 1);
	1796
	1797	if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
	1798	goto out;
	1799	if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
	1800	goto out;
	1801
	1802	cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
	1803	out:
	1804	FREE(ustr1, M_TEMP);
	1805	return (cmp);
	1806	}
	1807
	1808
	1809	typedef struct jopen_cb_info {
	1810	off_t jsize;
	1811	char *desired_uuid;
	1812	struct vnode *jvp;
	1813	size_t blksize;
	1814	int need_clean;
	1815	int need_init;
	1816	} jopen_cb_info;
	1817
	1818	static int
	1819	journal_open_cb(const char bsd_dev_name, const char uuid_str, void *arg)
	1820	{
	1821	struct nameidata nd;
	1822	jopen_cb_info ji = (jopen_cb_info )arg;
	1823	char bsd_name[256];
	1824	int error;
	1825
	1826	strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
	1827	strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
	1828
	1829	if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
	1830	return 1; // keep iterating
	1831	}
	1832
	1833	// if we're here, either the desired uuid matched or there was no
	1834	// desired uuid so let's try to open the device for writing and
	1835	// see if it works. if it does, we'll use it.
	1836
	1837	NDINIT(&nd, LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
	1838	if ((error = namei(&nd))) {
	1839	printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
	1840	return 1; // keep iterating
	1841	}
	1842
	1843	ji->jvp = nd.ni_vp;
	1844	nameidone(&nd);
	1845
	1846	if (ji->jvp == NULL) {
	1847	printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
	1848	} else {
	1849	error = VNOP_OPEN(ji->jvp, FREAD\|FWRITE, vfs_context_kernel());
	1850	if (error == 0) {
	1851	// if the journal is dirty and we didn't specify a desired
	1852	// journal device uuid, then do not use the journal. but
	1853	// if the journal is just invalid (e.g. it hasn't been
	1854	// initialized) then just set the need_init flag.
	1855	if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
	1856	error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
	1857	if (error == EBUSY) {
	1858	VNOP_CLOSE(ji->jvp, FREAD\|FWRITE, vfs_context_kernel());
	1859	vnode_put(ji->jvp);
	1860	ji->jvp = NULL;
	1861	return 1; // keep iterating
	1862	} else if (error == EINVAL) {
	1863	ji->need_init = 1;
	1864	}
	1865	}
	1866
	1867	if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
	1868	strlcpy(ji->desired_uuid, uuid_str, 128);
	1869	}
	1870	vnode_setmountedon(ji->jvp);
	1871	// printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
	1872	return 0; // stop iterating
	1873	} else {
	1874	vnode_put(ji->jvp);
	1875	ji->jvp = NULL;
	1876	}
	1877	}
	1878
	1879	return 1; // keep iterating
	1880	}
	1881
	1882	extern dev_t IOBSDGetMediaWithUUID(const char uuid_cstring, char bsd_name, int bsd_name_len, int timeout);
	1883	extern void IOBSDIterateMediaWithContent(const char uuid_cstring, int (func)(const char bsd_dev_name, const char uuid_str, void arg), void arg);
	1884	extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
	1885	kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
	1886
	1887
	1888	static vnode_t
	1889	open_journal_dev(const char *vol_device,
	1890	int need_clean,
	1891	char *uuid_str,
	1892	char *machine_serial_num,
	1893	off_t jsize,
	1894	size_t blksize,
	1895	int *need_init)
	1896	{
	1897	int retry_counter=0;
	1898	jopen_cb_info ji;
	1899
	1900	ji.jsize = jsize;
	1901	ji.desired_uuid = uuid_str;
	1902	ji.jvp = NULL;
	1903	ji.blksize = blksize;
	1904	ji.need_clean = need_clean;
	1905	ji.need_init = 0;
	1906
	1907	// if (uuid_str[0] == '\0') {
	1908	// printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
	1909	// } else {
	1910	// printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
	1911	// }
	1912	while (ji.jvp == NULL && retry_counter++ < 4) {
	1913	if (retry_counter > 1) {
	1914	if (uuid_str[0]) {
	1915	printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str);
	1916	} else {
	1917	printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n");
	1918	}
	1919	delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again
	1920	}
	1921
	1922	IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
	1923	}
	1924
	1925	if (ji.jvp == NULL) {
	1926	printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
	1927	vol_device, uuid_str, machine_serial_num);
	1928	}
	1929
	1930	*need_init = ji.need_init;
	1931
	1932	return ji.jvp;
	1933	}
	1934
	1935
	1936	__private_extern__
	1937	int
	1938	hfs_early_journal_init(struct hfsmount hfsmp, HFSPlusVolumeHeader vhp,
	1939	void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
	1940	HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
	1941	{
	1942	JournalInfoBlock *jibp;
	1943	struct buf jinfo_bp, bp;
	1944	int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
	1945	int retval, write_jibp = 0;
	1946	uint32_t blksize = hfsmp->hfs_logical_block_size;
	1947	struct vnode *devvp;
	1948	struct hfs_mount_args *args = _args;
	1949	u_int32_t jib_flags;
	1950	u_int64_t jib_offset;
	1951	u_int64_t jib_size;
	1952	const char *dev_name;
	1953
	1954	devvp = hfsmp->hfs_devvp;
	1955	dev_name = vnode_name(devvp);
	1956	if (dev_name == NULL) {
	1957	dev_name = "unknown-dev";
	1958	}
	1959
	1960	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
	1961	arg_flags = args->journal_flags;
	1962	arg_tbufsz = args->journal_tbuffer_size;
	1963	}
	1964
	1965	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
	1966
	1967	jinfo_bp = NULL;
	1968	retval = (int)buf_meta_bread(devvp,
	1969	(daddr64_t)((embeddedOffset/blksize) +
	1970	((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
	1971	hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
	1972	if (retval) {
	1973	if (jinfo_bp) {
	1974	buf_brelse(jinfo_bp);
	1975	}
	1976	return retval;
	1977	}
	1978
	1979	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
	1980	jib_flags = SWAP_BE32(jibp->flags);
	1981	jib_size = SWAP_BE64(jibp->size);
	1982
	1983	if (jib_flags & kJIJournalInFSMask) {
	1984	hfsmp->jvp = hfsmp->hfs_devvp;
	1985	jib_offset = SWAP_BE64(jibp->offset);
	1986	} else {
	1987	int need_init=0;
	1988
	1989	// if the volume was unmounted cleanly then we'll pick any
	1990	// available external journal partition
	1991	//
	1992	if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
	1993	((char )&jibp->ext_jnl_uuid[0]) = '\0';
	1994	}
	1995
	1996	hfsmp->jvp = open_journal_dev(dev_name,
	1997	!(jib_flags & kJIJournalNeedInitMask),
	1998	(char *)&jibp->ext_jnl_uuid[0],
	1999	(char *)&jibp->machine_serial_num[0],
	2000	jib_size,
	2001	hfsmp->hfs_logical_block_size,
	2002	&need_init);
	2003	if (hfsmp->jvp == NULL) {
	2004	buf_brelse(jinfo_bp);
	2005	return EROFS;
	2006	} else {
	2007	if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
	2008	strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
	2009	}
	2010	}
	2011
	2012	jib_offset = 0;
	2013	write_jibp = 1;
	2014	if (need_init) {
	2015	jib_flags \|= kJIJournalNeedInitMask;
	2016	}
	2017	}
	2018
	2019	// save this off for the hack-y check in hfs_remove()
	2020	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
	2021	hfsmp->jnl_size = jib_size;
	2022
	2023	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
	2024	// if the file system is read-only, check if the journal is empty.
	2025	// if it is, then we can allow the mount. otherwise we have to
	2026	// return failure.
	2027	retval = journal_is_clean(hfsmp->jvp,
	2028	jib_offset + embeddedOffset,
	2029	jib_size,
	2030	devvp,
	2031	hfsmp->hfs_logical_block_size);
	2032
	2033	hfsmp->jnl = NULL;
	2034
	2035	buf_brelse(jinfo_bp);
	2036
	2037	if (retval) {
	2038	const char *name = vnode_getname(devvp);
	2039	printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
	2040	name ? name : "");
	2041	if (name)
	2042	vnode_putname(name);
	2043	}
	2044
	2045	return retval;
	2046	}
	2047
	2048	if (jib_flags & kJIJournalNeedInitMask) {
	2049	printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
	2050	jib_offset + embeddedOffset, jib_size);
	2051	hfsmp->jnl = journal_create(hfsmp->jvp,
	2052	jib_offset + embeddedOffset,
	2053	jib_size,
	2054	devvp,
	2055	blksize,
	2056	arg_flags,
	2057	arg_tbufsz,
	2058	hfs_sync_metadata, hfsmp->hfs_mp);
	2059
	2060	// no need to start a transaction here... if this were to fail
	2061	// we'd just re-init it on the next mount.
	2062	jib_flags &= ~kJIJournalNeedInitMask;
	2063	jibp->flags = SWAP_BE32(jib_flags);
	2064	buf_bwrite(jinfo_bp);
	2065	jinfo_bp = NULL;
	2066	jibp = NULL;
	2067	} else {
	2068	//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
	2069	// jib_offset + embeddedOffset,
	2070	// jib_size, SWAP_BE32(vhp->blockSize));
	2071
	2072	hfsmp->jnl = journal_open(hfsmp->jvp,
	2073	jib_offset + embeddedOffset,
	2074	jib_size,
	2075	devvp,
	2076	blksize,
	2077	arg_flags,
	2078	arg_tbufsz,
	2079	hfs_sync_metadata, hfsmp->hfs_mp);
	2080
	2081	if (write_jibp) {
	2082	buf_bwrite(jinfo_bp);
	2083	} else {
	2084	buf_brelse(jinfo_bp);
	2085	}
	2086	jinfo_bp = NULL;
	2087	jibp = NULL;
	2088
	2089	if (hfsmp->jnl && mdbp) {
	2090	// reload the mdb because it could have changed
	2091	// if the journal had to be replayed.
	2092	if (mdb_offset == 0) {
	2093	mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
	2094	}
	2095	bp = NULL;
	2096	retval = (int)buf_meta_bread(devvp,
	2097	HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
	2098	hfsmp->hfs_physical_block_size, cred, &bp);
	2099	if (retval) {
	2100	if (bp) {
	2101	buf_brelse(bp);
	2102	}
	2103	printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
	2104	retval);
	2105	return retval;
	2106	}
	2107	bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
	2108	buf_brelse(bp);
	2109	bp = NULL;
	2110	}
	2111	}
	2112
	2113
	2114	//printf("journal @ 0x%x\n", hfsmp->jnl);
	2115
	2116	// if we expected the journal to be there and we couldn't
	2117	// create it or open it then we have to bail out.
	2118	if (hfsmp->jnl == NULL) {
	2119	printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
	2120	return EINVAL;
	2121	}
	2122
	2123	return 0;
	2124	}
	2125
	2126
	2127	//
	2128	// This function will go and re-locate the .journal_info_block and
	2129	// the .journal files in case they moved (which can happen if you
	2130	// run Norton SpeedDisk). If we fail to find either file we just
	2131	// disable journaling for this volume and return. We turn off the
	2132	// journaling bit in the vcb and assume it will get written to disk
	2133	// later (if it doesn't on the next mount we'd do the same thing
	2134	// again which is harmless). If we disable journaling we don't
	2135	// return an error so that the volume is still mountable.
	2136	//
	2137	// If the info we find for the .journal_info_block and .journal files
	2138	// isn't what we had stored, we re-set our cached info and proceed
	2139	// with opening the journal normally.
	2140	//
	2141	static int
	2142	hfs_late_journal_init(struct hfsmount hfsmp, HFSPlusVolumeHeader vhp, void *_args)
	2143	{
	2144	JournalInfoBlock *jibp;
	2145	struct buf *jinfo_bp;
	2146	int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
	2147	int retval, write_jibp = 0, recreate_journal = 0;
	2148	struct vnode *devvp;
	2149	struct cat_attr jib_attr, jattr;
	2150	struct cat_fork jib_fork, jfork;
	2151	ExtendedVCB *vcb;
	2152	u_int32_t fid;
	2153	struct hfs_mount_args *args = _args;
	2154	u_int32_t jib_flags;
	2155	u_int64_t jib_offset;
	2156	u_int64_t jib_size;
	2157
	2158	devvp = hfsmp->hfs_devvp;
	2159	vcb = HFSTOVCB(hfsmp);
	2160
	2161	if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
	2162	if (args->journal_disable) {
	2163	return 0;
	2164	}
	2165
	2166	arg_flags = args->journal_flags;
	2167	arg_tbufsz = args->journal_tbuffer_size;
	2168	}
	2169
	2170	fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
	2171	if (fid == 0 \|\| jib_fork.cf_extents[0].startBlock == 0 \|\| jib_fork.cf_size == 0) {
	2172	printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
	2173	jib_fork.cf_extents[0].startBlock);
	2174	vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
	2175	return 0;
	2176	}
	2177	hfsmp->hfs_jnlinfoblkid = fid;
	2178
	2179	// make sure the journal_info_block begins where we think it should.
	2180	if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
	2181	printf("hfs: The journal_info_block moved (was: %d; is: %d). Fixing up\n",
	2182	SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
	2183
	2184	vcb->vcbJinfoBlock = jib_fork.cf_extents[0].startBlock;
	2185	vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
	2186	recreate_journal = 1;
	2187	}
	2188
	2189
	2190	sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
	2191	jinfo_bp = NULL;
	2192	retval = (int)buf_meta_bread(devvp,
	2193	(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
	2194	((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
	2195	hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
	2196	if (retval) {
	2197	if (jinfo_bp) {
	2198	buf_brelse(jinfo_bp);
	2199	}
	2200	printf("hfs: can't read journal info block. disabling journaling.\n");
	2201	vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
	2202	return 0;
	2203	}
	2204
	2205	jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
	2206	jib_flags = SWAP_BE32(jibp->flags);
	2207	jib_offset = SWAP_BE64(jibp->offset);
	2208	jib_size = SWAP_BE64(jibp->size);
	2209
	2210	fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
	2211	if (fid == 0 \|\| jfork.cf_extents[0].startBlock == 0 \|\| jfork.cf_size == 0) {
	2212	printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
	2213	jfork.cf_extents[0].startBlock);
	2214	buf_brelse(jinfo_bp);
	2215	vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
	2216	return 0;
	2217	}
	2218	hfsmp->hfs_jnlfileid = fid;
	2219
	2220	// make sure the journal file begins where we think it should.
	2221	if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
	2222	printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n",
	2223	(jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
	2224
	2225	jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
	2226	write_jibp = 1;
	2227	recreate_journal = 1;
	2228	}
	2229
	2230	// check the size of the journal file.
	2231	if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
	2232	printf("hfs: The journal file changed size! (was %lld; is %lld). Fixing up.\n",
	2233	jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
	2234
	2235	jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
	2236	write_jibp = 1;
	2237	recreate_journal = 1;
	2238	}
	2239
	2240	if (jib_flags & kJIJournalInFSMask) {
	2241	hfsmp->jvp = hfsmp->hfs_devvp;
	2242	jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
	2243	} else {
	2244	const char *dev_name;
	2245	int need_init = 0;
	2246
	2247	dev_name = vnode_name(devvp);
	2248	if (dev_name == NULL) {
	2249	dev_name = "unknown-dev";
	2250	}
	2251
	2252	// since the journal is empty, just use any available external journal
	2253	((char )&jibp->ext_jnl_uuid[0]) = '\0';
	2254
	2255	// this fills in the uuid of the device we actually get
	2256	hfsmp->jvp = open_journal_dev(dev_name,
	2257	!(jib_flags & kJIJournalNeedInitMask),
	2258	(char *)&jibp->ext_jnl_uuid[0],
	2259	(char *)&jibp->machine_serial_num[0],
	2260	jib_size,
	2261	hfsmp->hfs_logical_block_size,
	2262	&need_init);
	2263	if (hfsmp->jvp == NULL) {
	2264	buf_brelse(jinfo_bp);
	2265	return EROFS;
	2266	} else {
	2267	if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
	2268	strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
	2269	}
	2270	}
	2271	jib_offset = 0;
	2272	recreate_journal = 1;
	2273	write_jibp = 1;
	2274	if (need_init) {
	2275	jib_flags \|= kJIJournalNeedInitMask;
	2276	}
	2277	}
	2278
	2279	// save this off for the hack-y check in hfs_remove()
	2280	hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
	2281	hfsmp->jnl_size = jib_size;
	2282
	2283	if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
	2284	// if the file system is read-only, check if the journal is empty.
	2285	// if it is, then we can allow the mount. otherwise we have to
	2286	// return failure.
	2287	retval = journal_is_clean(hfsmp->jvp,
	2288	jib_offset,
	2289	jib_size,
	2290	devvp,
	2291	hfsmp->hfs_logical_block_size);
	2292
	2293	hfsmp->jnl = NULL;
	2294
	2295	buf_brelse(jinfo_bp);
	2296
	2297	if (retval) {
	2298	const char *name = vnode_getname(devvp);
	2299	printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
	2300	name ? name : "");
	2301	if (name)
	2302	vnode_putname(name);
	2303	}
	2304
	2305	return retval;
	2306	}
	2307
	2308	if ((jib_flags & kJIJournalNeedInitMask) \|\| recreate_journal) {
	2309	printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
	2310	jib_offset, jib_size);
	2311	hfsmp->jnl = journal_create(hfsmp->jvp,
	2312	jib_offset,
	2313	jib_size,
	2314	devvp,
	2315	hfsmp->hfs_logical_block_size,
	2316	arg_flags,
	2317	arg_tbufsz,
	2318	hfs_sync_metadata, hfsmp->hfs_mp);
	2319
	2320	// no need to start a transaction here... if this were to fail
	2321	// we'd just re-init it on the next mount.
	2322	jib_flags &= ~kJIJournalNeedInitMask;
	2323	write_jibp = 1;
	2324
	2325	} else {
	2326	//
	2327	// if we weren't the last person to mount this volume
	2328	// then we need to throw away the journal because it
	2329	// is likely that someone else mucked with the disk.
	2330	// if the journal is empty this is no big deal. if the
	2331	// disk is dirty this prevents us from replaying the
	2332	// journal over top of changes that someone else made.
	2333	//
	2334	arg_flags \|= JOURNAL_RESET;
	2335
	2336	//printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
	2337	// jib_offset,
	2338	// jib_size, SWAP_BE32(vhp->blockSize));
	2339
	2340	hfsmp->jnl = journal_open(hfsmp->jvp,
	2341	jib_offset,
	2342	jib_size,
	2343	devvp,
	2344	hfsmp->hfs_logical_block_size,
	2345	arg_flags,
	2346	arg_tbufsz,
	2347	hfs_sync_metadata, hfsmp->hfs_mp);
	2348	}
	2349
	2350
	2351	if (write_jibp) {
	2352	jibp->flags = SWAP_BE32(jib_flags);
	2353	jibp->offset = SWAP_BE64(jib_offset);
	2354	jibp->size = SWAP_BE64(jib_size);
	2355
	2356	buf_bwrite(jinfo_bp);
	2357	} else {
	2358	buf_brelse(jinfo_bp);
	2359	}
	2360	jinfo_bp = NULL;
	2361	jibp = NULL;
	2362
	2363	//printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
	2364
	2365	// if we expected the journal to be there and we couldn't
	2366	// create it or open it then we have to bail out.
	2367	if (hfsmp->jnl == NULL) {
	2368	printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
	2369	return EINVAL;
	2370	}
	2371
	2372	return 0;
	2373	}
	2374
	2375	/*
	2376	* Calculate the allocation zone for metadata.
	2377	*
	2378	* This zone includes the following:
	2379	* Allocation Bitmap file
	2380	* Overflow Extents file
	2381	* Journal file
	2382	* Quota files
	2383	* Clustered Hot files
	2384	* Catalog file
	2385	*
	2386	* METADATA ALLOCATION ZONE
	2387	* ____________________________________________________________________________
	2388	* \| \| \| \| \| \| \|
	2389	* \| BM \| JF \| OEF \| CATALOG \|---> \| HOT FILES \|
	2390	* \|____\|____\|_____\|_______________\|______________________________\|___________\|
	2391	*
	2392	* <------------------------------- N * 128 MB ------------------------------->
	2393	*
	2394	*/
	2395	#define GIGABYTE (u_int64_t)(102410241024)
	2396
	2397	#define OVERFLOW_DEFAULT_SIZE (410241024)
	2398	#define OVERFLOW_MAXIMUM_SIZE (12810241024)
	2399	#define JOURNAL_DEFAULT_SIZE (810241024)
	2400	#define JOURNAL_MAXIMUM_SIZE (51210241024)
	2401	#define HOTBAND_MINIMUM_SIZE (1010241024)
	2402	#define HOTBAND_MAXIMUM_SIZE (51210241024)
	2403
	2404	static void
	2405	hfs_metadatazone_init(struct hfsmount *hfsmp)
	2406	{
	2407	ExtendedVCB *vcb;
	2408	u_int64_t fs_size;
	2409	u_int64_t zonesize;
	2410	u_int64_t temp;
	2411	u_int64_t filesize;
	2412	u_int32_t blk;
	2413	int items, really_do_it=1;
	2414
	2415	vcb = HFSTOVCB(hfsmp);
	2416	fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->totalBlocks;
	2417
	2418	/*
	2419	* For volumes less than 10 GB, don't bother.
	2420	*/
	2421	if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
	2422	really_do_it = 0;
	2423	}
	2424
	2425	/*
	2426	* Skip non-journaled volumes as well.
	2427	*/
	2428	if (hfsmp->jnl == NULL) {
	2429	really_do_it = 0;
	2430	}
	2431
	2432	/*
	2433	* Start with space for the boot blocks and Volume Header.
	2434	* 1536 = byte offset from start of volume to end of volume header:
	2435	* 1024 bytes is the offset from the start of the volume to the
	2436	* start of the volume header (defined by the volume format)
	2437	* + 512 bytes (the size of the volume header).
	2438	*/
	2439	zonesize = roundup(1536, hfsmp->blockSize);
	2440
	2441	/*
	2442	* Add the on-disk size of allocation bitmap.
	2443	*/
	2444	zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
	2445
	2446	/*
	2447	* Add space for the Journal Info Block and Journal (if they're in
	2448	* this file system).
	2449	*/
	2450	if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
	2451	zonesize += hfsmp->blockSize + hfsmp->jnl_size;
	2452	}
	2453
	2454	/*
	2455	* Add the existing size of the Extents Overflow B-tree.
	2456	* (It rarely grows, so don't bother reserving additional room for it.)
	2457	*/
	2458	zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
	2459
	2460	/*
	2461	* If there is an Attributes B-tree, leave room for 11 clumps worth.
	2462	* newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
	2463	* When installing a full OS install onto a 20GB volume, we use
	2464	* 7 to 8 clumps worth of space (depending on packages), so that leaves
	2465	* us with another 3 or 4 clumps worth before we need another extent.
	2466	*/
	2467	if (hfsmp->hfs_attribute_cp) {
	2468	zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
	2469	}
	2470
	2471	/*
	2472	* Leave room for 11 clumps of the Catalog B-tree.
	2473	* Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
	2474	* When installing a full OS install onto a 20GB volume, we use
	2475	* 7 to 8 clumps worth of space (depending on packages), so that leaves
	2476	* us with another 3 or 4 clumps worth before we need another extent.
	2477	*/
	2478	zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
	2479
	2480	/*
	2481	* Add space for hot file region.
	2482	*
	2483	* ...for now, use 5 MB per 1 GB (0.5 %)
	2484	*/
	2485	filesize = (fs_size / 1024) * 5;
	2486	if (filesize > HOTBAND_MAXIMUM_SIZE)
	2487	filesize = HOTBAND_MAXIMUM_SIZE;
	2488	else if (filesize < HOTBAND_MINIMUM_SIZE)
	2489	filesize = HOTBAND_MINIMUM_SIZE;
	2490	/*
	2491	* Calculate user quota file requirements.
	2492	*/
	2493	if (hfsmp->hfs_flags & HFS_QUOTAS) {
	2494	items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
	2495	if (items < QF_MIN_USERS)
	2496	items = QF_MIN_USERS;
	2497	else if (items > QF_MAX_USERS)
	2498	items = QF_MAX_USERS;
	2499	if (!powerof2(items)) {
	2500	int x = items;
	2501	items = 4;
	2502	while (x>>1 != 1) {
	2503	x = x >> 1;
	2504	items = items << 1;
	2505	}
	2506	}
	2507	filesize += (items + 1) * sizeof(struct dqblk);
	2508	/*
	2509	* Calculate group quota file requirements.
	2510	*
	2511	*/
	2512	items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
	2513	if (items < QF_MIN_GROUPS)
	2514	items = QF_MIN_GROUPS;
	2515	else if (items > QF_MAX_GROUPS)
	2516	items = QF_MAX_GROUPS;
	2517	if (!powerof2(items)) {
	2518	int x = items;
	2519	items = 4;
	2520	while (x>>1 != 1) {
	2521	x = x >> 1;
	2522	items = items << 1;
	2523	}
	2524	}
	2525	filesize += (items + 1) * sizeof(struct dqblk);
	2526	}
	2527	zonesize += filesize;
	2528
	2529	/*
	2530	* Round up entire zone to a bitmap block's worth.
	2531	* The extra space goes to the catalog file and hot file area.
	2532	*/
	2533	temp = zonesize;
	2534	zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
	2535	hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
	2536	/*
	2537	* If doing the round up for hfs_min_alloc_start would push us past
	2538	* totalBlocks, then just reset it back to 0. Though using a value
	2539	* bigger than totalBlocks would not cause damage in the block allocator
	2540	* code, this value could get stored in the volume header and make it out
	2541	* to disk, making the volume header technically corrupt.
	2542	*/
	2543	if (hfsmp->hfs_min_alloc_start >= hfsmp->totalBlocks) {
	2544	hfsmp->hfs_min_alloc_start = 0;
	2545	}
	2546
	2547	if (really_do_it == 0) {
	2548	return;
	2549	}
	2550
	2551	temp = zonesize - temp; /* temp has extra space */
	2552	filesize += temp / 3;
	2553	hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
	2554
	2555	hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
	2556
	2557	/* Convert to allocation blocks. */
	2558	blk = zonesize / vcb->blockSize;
	2559
	2560	/* The default metadata zone location is at the start of volume. */
	2561	hfsmp->hfs_metazone_start = 1;
	2562	hfsmp->hfs_metazone_end = blk - 1;
	2563
	2564	/* The default hotfile area is at the end of the zone. */
	2565	hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
	2566	hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
	2567	hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
	2568	#if 0
	2569	printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
	2570	printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
	2571	printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
	2572	#endif
	2573	hfsmp->hfs_flags \|= HFS_METADATA_ZONE;
	2574	}
	2575
	2576
	2577	static u_int32_t
	2578	hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
	2579	{
	2580	ExtendedVCB *vcb = HFSTOVCB(hfsmp);
	2581	int lockflags;
	2582	int freeblocks;
	2583
	2584	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
	2585	freeblocks = MetaZoneFreeBlocks(vcb);
	2586	hfs_systemfile_unlock(hfsmp, lockflags);
	2587
	2588	/* Minus Extents overflow file reserve. */
	2589	freeblocks -=
	2590	hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
	2591	/* Minus catalog file reserve. */
	2592	freeblocks -=
	2593	hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
	2594	if (freeblocks < 0)
	2595	freeblocks = 0;
	2596
	2597	return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
	2598	}
	2599
	2600	/*
	2601	* Determine if a file is a "virtual" metadata file.
	2602	* This includes journal and quota files.
	2603	*/
	2604	__private_extern__
	2605	int
	2606	hfs_virtualmetafile(struct cnode *cp)
	2607	{
	2608	const char * filename;
	2609
	2610
	2611	if (cp->c_parentcnid != kHFSRootFolderID)
	2612	return (0);
	2613
	2614	filename = (const char *)cp->c_desc.cd_nameptr;
	2615	if (filename == NULL)
	2616	return (0);
	2617
	2618	if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) \|\|
	2619	(strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) \|\|
	2620	(strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) \|\|
	2621	(strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) \|\|
	2622	(strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
	2623	return (1);
	2624
	2625	return (0);
	2626	}
	2627
	2628
	2629	//
	2630	// Fire off a timed callback to sync the disk if the
	2631	// volume is on ejectable media.
	2632	//
	2633	__private_extern__
	2634	void
	2635	hfs_sync_ejectable(struct hfsmount *hfsmp)
	2636	{
	2637	if (hfsmp->hfs_syncer) {
	2638	clock_sec_t secs;
	2639	clock_usec_t usecs;
	2640	uint64_t now;
	2641
	2642	clock_get_calendar_microtime(&secs, &usecs);
	2643	now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
	2644
	2645	if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
	2646	// if we have a sync scheduled but i/o is starting to pile up,
	2647	// don't call thread_call_enter_delayed() again because that
	2648	// will defer the sync.
	2649	return;
	2650	}
	2651
	2652	if (hfsmp->hfs_sync_scheduled == 0) {
	2653	uint64_t deadline;
	2654
	2655	hfsmp->hfs_last_sync_request_time = now;
	2656
	2657	clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
	2658
	2659	/*
	2660	* Increment hfs_sync_scheduled on the assumption that we're the
	2661	* first thread to schedule the timer. If some other thread beat
	2662	* us, then we'll decrement it. If we were the first to
	2663	* schedule the timer, then we need to keep track that the
	2664	* callback is waiting to complete.
	2665	*/
	2666	OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
	2667	if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
	2668	OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
	2669	else
	2670	OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
	2671	}
	2672	}
	2673	}
	2674
	2675
	2676	__private_extern__
	2677	int
	2678	hfs_start_transaction(struct hfsmount *hfsmp)
	2679	{
	2680	int ret, unlock_on_err=0;
	2681	void * thread = current_thread();
	2682
	2683	#ifdef HFS_CHECK_LOCK_ORDER
	2684	/*
	2685	* You cannot start a transaction while holding a system
	2686	* file lock. (unless the transaction is nested.)
	2687	*/
	2688	if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
	2689	if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
	2690	panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
	2691	}
	2692	if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
	2693	panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
	2694	}
	2695	if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
	2696	panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
	2697	}
	2698	}
	2699	#endif /* HFS_CHECK_LOCK_ORDER */
	2700
	2701	if (hfsmp->jnl == NULL \|\| journal_owner(hfsmp->jnl) != thread) {
	2702	lck_rw_lock_shared(&hfsmp->hfs_global_lock);
	2703	OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
	2704	unlock_on_err = 1;
	2705	}
	2706
	2707	/* If a downgrade to read-only mount is in progress, no other
	2708	* process than the downgrade process is allowed to modify
	2709	* the file system.
	2710	*/
	2711	if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
	2712	(hfsmp->hfs_downgrading_proc != thread)) {
	2713	ret = EROFS;
	2714	goto out;
	2715	}
	2716
	2717	if (hfsmp->jnl) {
	2718	ret = journal_start_transaction(hfsmp->jnl);
	2719	if (ret == 0) {
	2720	OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
	2721	}
	2722	} else {
	2723	ret = 0;
	2724	}
	2725
	2726	out:
	2727	if (ret != 0 && unlock_on_err) {
	2728	lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
	2729	OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
	2730	}
	2731
	2732	return ret;
	2733	}
	2734
	2735	__private_extern__
	2736	int
	2737	hfs_end_transaction(struct hfsmount *hfsmp)
	2738	{
	2739	int need_unlock=0, ret;
	2740
	2741	if ( hfsmp->jnl == NULL
	2742	\|\| ( journal_owner(hfsmp->jnl) == current_thread()
	2743	&& (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
	2744
	2745	need_unlock = 1;
	2746	}
	2747
	2748	if (hfsmp->jnl) {
	2749	ret = journal_end_transaction(hfsmp->jnl);
	2750	} else {
	2751	ret = 0;
	2752	}
	2753
	2754	if (need_unlock) {
	2755	OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
	2756	lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
	2757	hfs_sync_ejectable(hfsmp);
	2758	}
	2759
	2760	return ret;
	2761	}
	2762
	2763
	2764	__private_extern__
	2765	int
	2766	hfs_journal_flush(struct hfsmount *hfsmp)
	2767	{
	2768	int ret;
	2769
	2770	if (hfsmp->jnl) {
	2771	lck_rw_lock_shared(&hfsmp->hfs_global_lock);
	2772	ret = journal_flush(hfsmp->jnl);
	2773	lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
	2774	} else {
	2775	ret = 0;
	2776	}
	2777
	2778	return ret;
	2779	}
	2780
	2781
	2782	/*
	2783	* hfs_erase_unused_nodes
	2784	*
	2785	* Check wheter a volume may suffer from unused Catalog B-tree nodes that
	2786	* are not zeroed (due to <rdar://problem/6947811>). If so, just write
	2787	* zeroes to the unused nodes.
	2788	*
	2789	* How do we detect when a volume needs this repair? We can't always be
	2790	* certain. If a volume was created after a certain date, then it may have
	2791	* been created with the faulty newfs_hfs. Since newfs_hfs only created one
	2792	* clump, we can assume that if a Catalog B-tree is larger than its clump size,
	2793	* that means that the entire first clump must have been written to, which means
	2794	* there shouldn't be unused and unwritten nodes in that first clump, and this
	2795	* repair is not needed.
	2796	*
	2797	* We have defined a bit in the Volume Header's attributes to indicate when the
	2798	* unused nodes have been repaired. A newer newfs_hfs will set this bit.
	2799	* As will fsck_hfs when it repairs the unused nodes.
	2800	*/
	2801	__private_extern__
	2802	int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
	2803	{
	2804	int result;
	2805	struct filefork *catalog;
	2806	int lockflags;
	2807
	2808	if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
	2809	{
	2810	/* This volume has already been checked and repaired. */
	2811	return 0;
	2812	}
	2813
	2814	if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
	2815	{
	2816	/* This volume is too old to have had the problem. */
	2817	hfsmp->vcbAtrb \|= kHFSUnusedNodeFixMask;
	2818	return 0;
	2819	}
	2820
	2821	catalog = hfsmp->hfs_catalog_cp->c_datafork;
	2822	if (catalog->ff_size > catalog->ff_clumpsize)
	2823	{
	2824	/* The entire first clump must have been in use at some point. */
	2825	hfsmp->vcbAtrb \|= kHFSUnusedNodeFixMask;
	2826	return 0;
	2827	}
	2828
	2829	/*
	2830	* If we get here, we need to zero out those unused nodes.
	2831	*
	2832	* We start a transaction and lock the catalog since we're going to be
	2833	* making on-disk changes. But note that BTZeroUnusedNodes doens't actually
	2834	* do its writing via the journal, because that would be too much I/O
	2835	* to fit in a transaction, and it's a pain to break it up into multiple
	2836	* transactions. (It behaves more like growing a B-tree would.)
	2837	*/
	2838	printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
	2839	result = hfs_start_transaction(hfsmp);
	2840	if (result)
	2841	goto done;
	2842	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
	2843	result = BTZeroUnusedNodes(catalog);
	2844	vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
	2845	hfs_systemfile_unlock(hfsmp, lockflags);
	2846	hfs_end_transaction(hfsmp);
	2847	if (result == 0)
	2848	hfsmp->vcbAtrb \|= kHFSUnusedNodeFixMask;
	2849	printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
	2850
	2851	done:
	2852	return result;
	2853	}