bsd/hfs/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/buf.h>
  43 #include <sys/buf_internal.h>
  44 #include <sys/ubc.h>
  45 #include <sys/unistd.h>
  46 #include <sys/utfconv.h>
  47 #include <sys/kauth.h>
  48 #include <sys/fcntl.h>
  49 #include <sys/fsctl.h>
  50 #include <sys/vnode_internal.h>
  51 #include <kern/clock.h>
  52 #include <stdbool.h>
  53
  54 #include <libkern/OSAtomic.h>
  55
  56 /* for parsing boot-args */
  57 #include <pexpert/pexpert.h>
  58
  59 #include "hfs.h"
  60 #include "hfs_catalog.h"
  61 #include "hfs_dbg.h"
  62 #include "hfs_mount.h"
  63 #include "hfs_endian.h"
  64 #include "hfs_cnode.h"
  65 #include "hfs_fsctl.h"
  66 #include "hfs_cprotect.h"
  67
  68 #include "hfscommon/headers/FileMgrInternal.h"
  69 #include "hfscommon/headers/BTreesInternal.h"
  70 #include "hfscommon/headers/HFSUnicodeWrappers.h"
  71
  72 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
  73 extern int hfs_resize_debug;
  74
  75 static void ReleaseMetaFileVNode(struct vnode *vp);
  76 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  77
  78 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  79 static void hfs_thaw_locked(struct hfsmount *hfsmp);
  80
  81 #define HFS_MOUNT_DEBUG 1
  82
  83
  84 //*******************************************************************************
  85 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  86 //       hence are not in the right byte order on little endian machines. It is
  87 //       the responsibility of the finder and other clients to swap the data.
  88 //*******************************************************************************
  89
  90 //*******************************************************************************
  91 //      Routine:        hfs_MountHFSVolume
  92 //
  93 //
  94 //*******************************************************************************
  95 unsigned char hfs_catname[] = "Catalog B-tree";
  96 unsigned char hfs_extname[] = "Extents B-tree";
  97 unsigned char hfs_vbmname[] = "Volume Bitmap";
  98 unsigned char hfs_attrname[] = "Attribute B-tree";
  99 unsigned char hfs_startupname[] = "Startup File";
 100
 101 #if CONFIG_HFS_STD
 102 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 103                 __unused struct proc *p)
 104 {
 105         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 106         int error;
 107         ByteCount utf8chars;
 108         struct cat_desc cndesc;
 109         struct cat_attr cnattr;
 110         struct cat_fork fork;
 111         int newvnode_flags = 0;
 112
 113         /* Block size must be a multiple of 512 */
 114         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 115             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 116                 return (EINVAL);
 117
 118         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 119         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 120             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 121                 return (EINVAL);
 122         }
 123         hfsmp->hfs_flags |= HFS_STANDARD;
 124         /*
 125          * The MDB seems OK: transfer info from it into VCB
 126          * Note - the VCB starts out clear (all zeros)
 127          *
 128          */
 129         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 130         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 131         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 132         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 133         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 134         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 135         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 136         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 137         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 138         vcb->allocLimit         = vcb->totalBlocks;
 139         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 140         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 141         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 142         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 143         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 144         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 145         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 146         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 147         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 148         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 149         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 150         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 151                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 152
 153         /* convert hfs encoded name into UTF-8 string */
 154         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 155         /*
 156          * When an HFS name cannot be encoded with the current
 157          * volume encoding we use MacRoman as a fallback.
 158          */
 159         if (error || (utf8chars == 0)) {
 160                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 161                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 162                 if (error) {
 163                         goto MtVolErr;
 164                 }
 165         }
 166
 167         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 168         vcb->vcbVBMIOSize = kHFSBlockSize;
 169
 170         /* Generate the partition-based AVH location */
 171         hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 172                                                   hfsmp->hfs_logical_block_count);
 173
 174         /* HFS standard is read-only, so just stuff the FS location in here, too */
 175         hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 176
 177         bzero(&cndesc, sizeof(cndesc));
 178         cndesc.cd_parentcnid = kHFSRootParentID;
 179         cndesc.cd_flags |= CD_ISMETA;
 180         bzero(&cnattr, sizeof(cnattr));
 181         cnattr.ca_linkcount = 1;
 182         cnattr.ca_mode = S_IFREG;
 183         bzero(&fork, sizeof(fork));
 184
 185         /*
 186          * Set up Extents B-tree vnode
 187          */
 188         cndesc.cd_nameptr = hfs_extname;
 189         cndesc.cd_namelen = strlen((char *)hfs_extname);
 190         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 191         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 192         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 193         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 194         fork.cf_vblocks = 0;
 195         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 196         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 197         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 198         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 199         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 200         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 201         cnattr.ca_blocks = fork.cf_blocks;
 202
 203         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 204                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 205         if (error) {
 206                 if (HFS_MOUNT_DEBUG) {
 207                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 208                 }
 209                 goto MtVolErr;
 210         }
 211         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 212                                          (KeyCompareProcPtr)CompareExtentKeys));
 213         if (error) {
 214                 if (HFS_MOUNT_DEBUG) {
 215                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 216                 }
 217                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 218                 goto MtVolErr;
 219         }
 220         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 221
 222         /*
 223          * Set up Catalog B-tree vnode...
 224          */
 225         cndesc.cd_nameptr = hfs_catname;
 226         cndesc.cd_namelen = strlen((char *)hfs_catname);
 227         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 228         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 229         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 230         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 231         fork.cf_vblocks = 0;
 232         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 233         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 234         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 235         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 236         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 237         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 238         cnattr.ca_blocks = fork.cf_blocks;
 239
 240         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 241                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 242         if (error) {
 243                 if (HFS_MOUNT_DEBUG) {
 244                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 245                 }
 246                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 247                 goto MtVolErr;
 248         }
 249         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 250                                          (KeyCompareProcPtr)CompareCatalogKeys));
 251         if (error) {
 252                 if (HFS_MOUNT_DEBUG) {
 253                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 254                 }
 255                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 256                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 257                 goto MtVolErr;
 258         }
 259         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 260
 261         /*
 262          * Set up dummy Allocation file vnode (used only for locking bitmap)
 263          */
 264         cndesc.cd_nameptr = hfs_vbmname;
 265         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 266         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 267         bzero(&fork, sizeof(fork));
 268         cnattr.ca_blocks = 0;
 269
 270         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 271                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 272         if (error) {
 273                 if (HFS_MOUNT_DEBUG) {
 274                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 275                 }
 276                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 277                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 278                 goto MtVolErr;
 279         }
 280         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 281
 282         /* mark the volume dirty (clear clean unmount bit) */
 283         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 284
 285     if (error == noErr) {
 286                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 287                 if (HFS_MOUNT_DEBUG) {
 288                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 289                 }
 290         }
 291
 292     if (error == noErr) {
 293                 /* If the disk isn't write protected.. */
 294         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 295             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 296                 }
 297         }
 298
 299         /*
 300          * all done with system files so we can unlock now...
 301          */
 302         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 303         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 304         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 305
 306         if (error == noErr) {
 307                 /* If successful, then we can just return once we've unlocked the cnodes */
 308                 return error;
 309         }
 310
 311     //--        Release any resources allocated so far before exiting with an error:
 312 MtVolErr:
 313         hfsUnmount(hfsmp, NULL);
 314
 315     return (error);
 316 }
 317
 318 #endif
 319
 320 //*******************************************************************************
 321 //
 322 // Sanity check Volume Header Block:
 323 //              Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
 324 //              not been endian-swapped and represents the on-disk contents of this sector.
 325 //              This routine will not change the endianness of vhp block.
 326 //
 327 //*******************************************************************************
 328 OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
 329 {
 330         u_int16_t signature;
 331         u_int16_t hfs_version;
 332         u_int32_t blockSize;
 333
 334         signature = SWAP_BE16(vhp->signature);
 335         hfs_version = SWAP_BE16(vhp->version);
 336
 337         if (signature == kHFSPlusSigWord) {
 338                 if (hfs_version != kHFSPlusVersion) {
 339                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
 340                         return (EINVAL);
 341                 }
 342         } else if (signature == kHFSXSigWord) {
 343                 if (hfs_version != kHFSXVersion) {
 344                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
 345                         return (EINVAL);
 346                 }
 347         } else {
 348                 /* Removed printf for invalid HFS+ signature because it gives
 349                  * false error for UFS root volume
 350                  */
 351                 if (HFS_MOUNT_DEBUG) {
 352                         printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
 353                 }
 354                 return (EINVAL);
 355         }
 356
 357         /* Block size must be at least 512 and a power of 2 */
 358         blockSize = SWAP_BE32(vhp->blockSize);
 359         if (blockSize < 512 || !powerof2(blockSize)) {
 360                 if (HFS_MOUNT_DEBUG) {
 361                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
 362                 }
 363                 return (EINVAL);
 364         }
 365
 366         if (blockSize < hfsmp->hfs_logical_block_size) {
 367                 if (HFS_MOUNT_DEBUG) {
 368                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 369                                         blockSize, hfsmp->hfs_logical_block_size);
 370                 }
 371                 return (EINVAL);
 372         }
 373         return 0;
 374 }
 375
 376 //*******************************************************************************
 377 //      Routine:        hfs_MountHFSPlusVolume
 378 //
 379 //
 380 //*******************************************************************************
 381
 382 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 383         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 384 {
 385         register ExtendedVCB *vcb;
 386         struct cat_desc cndesc;
 387         struct cat_attr cnattr;
 388         struct cat_fork cfork;
 389         u_int32_t blockSize;
 390         daddr64_t spare_sectors;
 391         struct BTreeInfoRec btinfo;
 392         u_int16_t  signature;
 393         u_int16_t  hfs_version;
 394         int newvnode_flags = 0;
 395         int  i;
 396         OSErr retval;
 397         char converted_volname[256];
 398         size_t volname_length = 0;
 399         size_t conv_volname_length = 0;
 400
 401         signature = SWAP_BE16(vhp->signature);
 402         hfs_version = SWAP_BE16(vhp->version);
 403
 404         retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
 405         if (retval)
 406                 return retval;
 407
 408         if (signature == kHFSXSigWord) {
 409                 /* The in-memory signature is always 'H+'. */
 410                 signature = kHFSPlusSigWord;
 411                 hfsmp->hfs_flags |= HFS_X;
 412         }
 413
 414         blockSize = SWAP_BE32(vhp->blockSize);
 415         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 416         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 417             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 418                 if (HFS_MOUNT_DEBUG) {
 419                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 420                 }
 421                 return (EINVAL);
 422         }
 423
 424         /* Make sure we can live with the physical block size. */
 425         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 426             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
 427                 if (HFS_MOUNT_DEBUG) {
 428                         printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
 429                                         hfsmp->hfs_logical_block_size);
 430                 }
 431                 return (ENXIO);
 432         }
 433
 434         /*
 435          * If allocation block size is less than the physical block size,
 436          * same data could be cached in two places and leads to corruption.
 437          *
 438          * HFS Plus reserves one allocation block for the Volume Header.
 439          * If the physical size is larger, then when we read the volume header,
 440          * we will also end up reading in the next allocation block(s).
 441          * If those other allocation block(s) is/are modified, and then the volume
 442          * header is modified, the write of the volume header's buffer will write
 443          * out the old contents of the other allocation blocks.
 444          *
 445          * We assume that the physical block size is same as logical block size.
 446          * The physical block size value is used to round down the offsets for
 447          * reading and writing the primary and alternate volume headers.
 448          *
 449          * The same logic to ensure good hfs_physical_block_size is also in
 450          * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
 451          * later are doing the I/Os using same block size.
 452          */
 453         if (blockSize < hfsmp->hfs_physical_block_size) {
 454                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 455                 hfsmp->hfs_log_per_phys = 1;
 456         }
 457
 458         /*
 459          * The VolumeHeader seems OK: transfer info from it into VCB
 460          * Note - the VCB starts out clear (all zeros)
 461          */
 462         vcb = HFSTOVCB(hfsmp);
 463
 464         vcb->vcbSigWord = signature;
 465         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 466         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 467         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 468         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 469         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 470         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 471         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 472         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 473         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 474
 475         /* copy 32 bytes of Finder info */
 476         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 477
 478         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 479         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 480                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 481
 482         /* Now fill in the Extended VCB info */
 483         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 484         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 485         vcb->allocLimit         = vcb->totalBlocks;
 486         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 487         vcb->blockSize          = blockSize;
 488         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 489         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 490
 491         vcb->hfsPlusIOPosOffset = embeddedOffset;
 492
 493         /* Default to no free block reserve */
 494         vcb->reserveBlocks = 0;
 495
 496         /*
 497          * Update the logical block size in the mount struct
 498          * (currently set up from the wrapper MDB) using the
 499          * new blocksize value:
 500          */
 501         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 502         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 503
 504         /*
 505          * Validate and initialize the location of the alternate volume header.
 506          *
 507          * Note that there may be spare sectors beyond the end of the filesystem that still
 508          * belong to our partition.
 509          */
 510
 511         spare_sectors = hfsmp->hfs_logical_block_count -
 512                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 513                            hfsmp->hfs_logical_block_size);
 514
 515         /*
 516          * Differentiate between "innocuous" spare sectors and the more unusual
 517          * degenerate case:
 518          *
 519          * *** Innocuous spare sectors exist if:
 520          *
 521          * A) the number of bytes assigned to the partition (by multiplying logical
 522          * block size * logical block count) is greater than the filesystem size
 523          * (by multiplying allocation block count and allocation block size)
 524          *
 525          * and
 526          *
 527          * B) the remainder is less than the size of a full allocation block's worth of bytes.
 528          *
 529          * This handles the normal case where there may be a few extra sectors, but the two
 530          * are fundamentally in sync.
 531          *
 532          * *** Degenerate spare sectors exist if:
 533          * A) The number of bytes assigned to the partition (by multiplying logical
 534          * block size * logical block count) is greater than the filesystem size
 535          * (by multiplying allocation block count and block size).
 536          *
 537          * and
 538          *
 539          * B) the remainder is greater than a full allocation's block worth of bytes.
 540          * In this case,  a smaller file system exists in a larger partition.
 541          * This can happen in various ways, including when volume is resized but the
 542          * partition is yet to be resized.  Under this condition, we have to assume that
 543          * a partition management software may resize the partition to match
 544          * the file system size in the future.  Therefore we should update
 545          * alternate volume header at two locations on the disk,
 546          *   a. 1024 bytes before end of the partition
 547          *   b. 1024 bytes before end of the file system
 548          */
 549
 550         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 551                 /*
 552                  * Handle the degenerate case above. FS < partition size.
 553                  * AVH located at 1024 bytes from the end of the partition
 554                  */
 555                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 556                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 557
 558                 /* AVH located at 1024 bytes from the end of the filesystem */
 559                 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 560                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 561                                                 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
 562         }
 563         else {
 564                 /* Innocuous spare sectors; Partition & FS notion are in sync */
 565                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 566                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 567
 568                 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 569         }
 570         if (hfs_resize_debug) {
 571                 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 572                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 573         }
 574
 575         bzero(&cndesc, sizeof(cndesc));
 576         cndesc.cd_parentcnid = kHFSRootParentID;
 577         cndesc.cd_flags |= CD_ISMETA;
 578         bzero(&cnattr, sizeof(cnattr));
 579         cnattr.ca_linkcount = 1;
 580         cnattr.ca_mode = S_IFREG;
 581
 582         /*
 583          * Set up Extents B-tree vnode
 584          */
 585         cndesc.cd_nameptr = hfs_extname;
 586         cndesc.cd_namelen = strlen((char *)hfs_extname);
 587         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 588
 589         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 590         cfork.cf_new_size= 0;
 591         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 592         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 593         cfork.cf_vblocks = 0;
 594         cnattr.ca_blocks = cfork.cf_blocks;
 595         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 596                 cfork.cf_extents[i].startBlock =
 597                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 598                 cfork.cf_extents[i].blockCount =
 599                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 600         }
 601         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 602                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 603         if (retval)
 604         {
 605                 if (HFS_MOUNT_DEBUG) {
 606                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 607                 }
 608                 goto ErrorExit;
 609         }
 610
 611         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 612         hfs_unlock(hfsmp->hfs_extents_cp);
 613
 614         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 615                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 616         if (retval)
 617         {
 618                 if (HFS_MOUNT_DEBUG) {
 619                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 620                 }
 621                 goto ErrorExit;
 622         }
 623         /*
 624          * Set up Catalog B-tree vnode
 625          */
 626         cndesc.cd_nameptr = hfs_catname;
 627         cndesc.cd_namelen = strlen((char *)hfs_catname);
 628         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 629
 630         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 631         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 632         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 633         cfork.cf_vblocks = 0;
 634         cnattr.ca_blocks = cfork.cf_blocks;
 635         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 636                 cfork.cf_extents[i].startBlock =
 637                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 638                 cfork.cf_extents[i].blockCount =
 639                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 640         }
 641         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 642                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 643         if (retval) {
 644                 if (HFS_MOUNT_DEBUG) {
 645                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 646                 }
 647                 goto ErrorExit;
 648         }
 649         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 650         hfs_unlock(hfsmp->hfs_catalog_cp);
 651
 652         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 653                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 654         if (retval) {
 655                 if (HFS_MOUNT_DEBUG) {
 656                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 657                 }
 658                 goto ErrorExit;
 659         }
 660         if ((hfsmp->hfs_flags & HFS_X) &&
 661             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 662                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 663                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 664                         /* Install a case-sensitive key compare */
 665                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 666                                           (KeyCompareProcPtr)cat_binarykeycompare);
 667                 }
 668         }
 669
 670         /*
 671          * Set up Allocation file vnode
 672          */
 673         cndesc.cd_nameptr = hfs_vbmname;
 674         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 675         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 676
 677         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 678         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 679         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 680         cfork.cf_vblocks = 0;
 681         cnattr.ca_blocks = cfork.cf_blocks;
 682         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 683                 cfork.cf_extents[i].startBlock =
 684                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 685                 cfork.cf_extents[i].blockCount =
 686                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 687         }
 688         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 689                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 690         if (retval) {
 691                 if (HFS_MOUNT_DEBUG) {
 692                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 693                 }
 694                 goto ErrorExit;
 695         }
 696         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 697         hfs_unlock(hfsmp->hfs_allocation_cp);
 698
 699         /*
 700          * Set up Attribute B-tree vnode
 701          */
 702         if (vhp->attributesFile.totalBlocks != 0) {
 703                 cndesc.cd_nameptr = hfs_attrname;
 704                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 705                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 706
 707                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 708                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 709                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 710                 cfork.cf_vblocks = 0;
 711                 cnattr.ca_blocks = cfork.cf_blocks;
 712                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 713                         cfork.cf_extents[i].startBlock =
 714                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 715                         cfork.cf_extents[i].blockCount =
 716                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 717                 }
 718                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 719                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 720                 if (retval) {
 721                         if (HFS_MOUNT_DEBUG) {
 722                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 723                         }
 724                         goto ErrorExit;
 725                 }
 726                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 727                 hfs_unlock(hfsmp->hfs_attribute_cp);
 728                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 729                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 730                 if (retval) {
 731                         if (HFS_MOUNT_DEBUG) {
 732                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 733                         }
 734                         goto ErrorExit;
 735                 }
 736
 737                 /* Initialize vnode for virtual attribute data file that spans the
 738                  * entire file system space for performing I/O to attribute btree
 739                  * We hold iocount on the attrdata vnode for the entire duration
 740                  * of mount (similar to btree vnodes)
 741                  */
 742                 retval = init_attrdata_vnode(hfsmp);
 743                 if (retval) {
 744                         if (HFS_MOUNT_DEBUG) {
 745                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 746                         }
 747                         goto ErrorExit;
 748                 }
 749         }
 750
 751         /*
 752          * Set up Startup file vnode
 753          */
 754         if (vhp->startupFile.totalBlocks != 0) {
 755                 cndesc.cd_nameptr = hfs_startupname;
 756                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 757                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 758
 759                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 760                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 761                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 762                 cfork.cf_vblocks = 0;
 763                 cnattr.ca_blocks = cfork.cf_blocks;
 764                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 765                         cfork.cf_extents[i].startBlock =
 766                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 767                         cfork.cf_extents[i].blockCount =
 768                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 769                 }
 770                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 771                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 772                 if (retval) {
 773                         if (HFS_MOUNT_DEBUG) {
 774                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 775                         }
 776                         goto ErrorExit;
 777                 }
 778                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 779                 hfs_unlock(hfsmp->hfs_startup_cp);
 780         }
 781
 782         /*
 783          * Pick up volume name and create date
 784          *
 785          * Acquiring the volume name should not manipulate the bitmap, only the catalog
 786          * btree and possibly the extents overflow b-tree.
 787          */
 788         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 789         if (retval) {
 790                 if (HFS_MOUNT_DEBUG) {
 791                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 792                 }
 793                 goto ErrorExit;
 794         }
 795         vcb->hfs_itime = cnattr.ca_itime;
 796         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 797         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 798         volname_length = strlen ((const char*)vcb->vcbVN);
 799         cat_releasedesc(&cndesc);
 800
 801         /* Send the volume name down to CoreStorage if necessary */
 802         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 803         if (retval == 0) {
 804                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 805         }
 806
 807         /* reset retval == 0. we don't care about errors in volname conversion */
 808         retval = 0;
 809
 810
 811         /*
 812          * We now always initiate a full bitmap scan even if the volume is read-only because this is
 813          * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
 814          * expects. TRIMs will not be delivered to the underlying media if the volume is not
 815          * read-write though.
 816          */
 817         thread_t allocator_scanner;
 818         hfsmp->scan_var = 0;
 819
 820         /* Take the HFS mount mutex and wait on scan_var */
 821         hfs_lock_mount (hfsmp);
 822
 823         kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
 824         /* Wait until it registers that it's got the appropriate locks (or that it is finished) */
 825         while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
 826                 msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0);
 827         }
 828
 829         hfs_unlock_mount(hfsmp);
 830
 831         thread_deallocate (allocator_scanner);
 832
 833         /* mark the volume dirty (clear clean unmount bit) */
 834         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 835         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 836                 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
 837         }
 838
 839         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 840         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 841                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 842         }
 843
 844         //
 845         // Check if we need to do late journal initialization.  This only
 846         // happens if a previous version of MacOS X (or 9) touched the disk.
 847         // In that case hfs_late_journal_init() will go re-locate the journal
 848         // and journal_info_block files and validate that they're still kosher.
 849         //
 850         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 851                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 852                 && (hfsmp->jnl == NULL)) {
 853
 854                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 855                 if (retval != 0) {
 856                         if (retval == EROFS) {
 857                                 // EROFS is a special error code that means the volume has an external
 858                                 // journal which we couldn't find.  in that case we do not want to
 859                                 // rewrite the volume header - we'll just refuse to mount the volume.
 860                                 if (HFS_MOUNT_DEBUG) {
 861                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 862                                 }
 863                                 retval = EINVAL;
 864                                 goto ErrorExit;
 865                         }
 866
 867                         hfsmp->jnl = NULL;
 868
 869                         // if the journal failed to open, then set the lastMountedVersion
 870                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 871                         // of just bailing out because the volume is journaled.
 872                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 873                                 HFSPlusVolumeHeader *jvhp;
 874                                 daddr64_t mdb_offset;
 875                                 struct buf *bp = NULL;
 876
 877                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 878
 879                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 880
 881                                 bp = NULL;
 882                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 883                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 884                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 885                                 if (retval == 0) {
 886                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 887
 888                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 889                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 890                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 891                                                 buf_bwrite(bp);
 892                                         } else {
 893                                                 buf_brelse(bp);
 894                                         }
 895                                         bp = NULL;
 896                                 } else if (bp) {
 897                                         buf_brelse(bp);
 898                                         // clear this so the error exit path won't try to use it
 899                                         bp = NULL;
 900                             }
 901                         }
 902
 903                         if (HFS_MOUNT_DEBUG) {
 904                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 905                         }
 906                         retval = EINVAL;
 907                         goto ErrorExit;
 908                 } else if (hfsmp->jnl) {
 909                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 910                 }
 911         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 912                 struct cat_attr jinfo_attr, jnl_attr;
 913
 914                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 915                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 916                 }
 917
 918                 // if we're here we need to fill in the fileid's for the
 919                 // journal and journal_info_block.
 920                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 921                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 922                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 923                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 924                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 925                 }
 926
 927                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 928                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 929                 }
 930
 931                 if (hfsmp->jnl == NULL) {
 932                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 933                 }
 934         }
 935
 936         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 937         {
 938                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 939         }
 940
 941         if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
 942                 hfs_pin_fs_metadata(hfsmp);
 943         }
 944         /*
 945          * Distinguish 3 potential cases involving content protection:
 946          * 1. mount point bit set; vcbAtrb does not support it. Fail.
 947          * 2. mount point bit set; vcbattrb supports it. we're good.
 948          * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
 949          */
 950         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 951                 /* Does the mount point support it ? */
 952                 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
 953                         /* Case 1 above */
 954                         retval = EINVAL;
 955                         goto ErrorExit;
 956                 }
 957         }
 958         else {
 959                 /* not requested in the mount point. Is it in FS? */
 960                 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
 961                         /* Case 3 above */
 962                         vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
 963                 }
 964         }
 965
 966         /* At this point, if the mount point flag is set, we can enable it. */
 967         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 968                 /* Cases 2+3 above */
 969 #if CONFIG_PROTECT
 970                 /* Get the EAs as needed. */
 971                 int cperr = 0;
 972                 struct cp_root_xattr *xattr = NULL;
 973                 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
 974
 975                 /* go get the EA to get the version information */
 976                 cperr = cp_getrootxattr (hfsmp, xattr);
 977                 /*
 978                  * If there was no EA there, then write one out.
 979                  * Assuming EA is not present on the root means
 980                  * this is an erase install or a very old FS
 981                  */
 982
 983                 if (cperr == 0) {
 984                         /* Have to run a valid CP version. */
 985                         if (!cp_is_supported_version(xattr->major_version)) {
 986                                 cperr = EINVAL;
 987                         }
 988                 }
 989                 else if (cperr == ENOATTR) {
 990                         printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
 991                         bzero(xattr, sizeof(struct cp_root_xattr));
 992                         xattr->major_version = CP_CURRENT_VERS;
 993                         xattr->minor_version = CP_MINOR_VERS;
 994                         cperr = cp_setrootxattr (hfsmp, xattr);
 995                 }
 996
 997                 if (cperr) {
 998                         FREE(xattr, M_TEMP);
 999                         retval = EPERM;
1000                         goto ErrorExit;
1001                 }
1002
1003                 /* If we got here, then the CP version is valid. Set it in the mount point */
1004                 hfsmp->hfs_running_cp_major_vers = xattr->major_version;
1005                 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
1006                 hfsmp->cproot_flags = xattr->flags;
1007                 hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
1008
1009                 FREE(xattr, M_TEMP);
1010
1011                 /*
1012                  * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
1013                  * Ensure that the boot-arg's value is valid for FILES (not directories),
1014                  * since only files are actually protected for now.
1015                  */
1016
1017                 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1018
1019                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1020                         PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1021                 }
1022
1023 #if HFS_TMPDBG
1024 #if !SECURE_KERNEL
1025                 PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
1026 #endif
1027 #endif
1028
1029                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1030                         hfsmp->default_cp_class = PROTECTION_CLASS_C;
1031                 }
1032
1033 #else
1034                 /* If CONFIG_PROTECT not built, ignore CP */
1035                 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1036 #endif
1037         }
1038
1039         /*
1040          * Establish a metadata allocation zone.
1041          */
1042         hfs_metadatazone_init(hfsmp, false);
1043
1044         /*
1045          * Make any metadata zone adjustments.
1046          */
1047         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1048                 /* Keep the roving allocator out of the metadata zone. */
1049                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1050                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1051                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1052                 }
1053         } else {
1054                 if (vcb->nextAllocation <= 1) {
1055                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1056                 }
1057         }
1058         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1059
1060         /* Setup private/hidden directories for hardlinks. */
1061         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1062         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1063
1064         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1065                 hfs_remove_orphans(hfsmp);
1066
1067         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1068         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1069         {
1070                 retval = hfs_erase_unused_nodes(hfsmp);
1071                 if (retval) {
1072                         if (HFS_MOUNT_DEBUG) {
1073                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1074                         }
1075
1076                         goto ErrorExit;
1077                 }
1078         }
1079
1080         /*
1081          * Allow hot file clustering if conditions allow.
1082          */
1083         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
1084             ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
1085                 //
1086                 // Wait until the bitmap scan completes before we initializes the
1087                 // hotfile area so that we do not run into any issues with the
1088                 // bitmap being read while hotfiles is initializing itself.  On
1089                 // some older/slower machines, without this interlock, the bitmap
1090                 // would sometimes get corrupted at boot time.
1091                 //
1092                 hfs_lock_mount(hfsmp);
1093                 while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
1094                         (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
1095                 }
1096                 hfs_unlock_mount(hfsmp);
1097
1098                 /*
1099                  * Note: at this point we are not allowed to fail the
1100                  *       mount operation because the HotFile init code
1101                  *       in hfs_recording_init() will lookup vnodes with
1102                  *       VNOP_LOOKUP() which hangs vnodes off the mount
1103                  *       (and if we were to fail, VFS is not prepared to
1104                  *       clean that up at this point.  Since HotFiles are
1105                  *       optional, this is not a big deal.
1106                  */
1107                 (void) hfs_recording_init(hfsmp);
1108         }
1109
1110         /* Force ACLs on HFS+ file systems. */
1111         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1112
1113         /* Enable extent-based extended attributes by default */
1114         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1115
1116         return (0);
1117
1118 ErrorExit:
1119         /*
1120          * A fatal error occurred and the volume cannot be mounted, so
1121          * release any resources that we acquired...
1122          */
1123         hfsUnmount(hfsmp, NULL);
1124
1125         if (HFS_MOUNT_DEBUG) {
1126                 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1127         }
1128         return (retval);
1129 }
1130
1131 static int
1132 _pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
1133 {
1134         int err;
1135
1136         err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
1137         if (err == 0) {
1138                 err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL, vfs_context_kernel());
1139                 hfs_unlock(VTOC(vp));
1140         }
1141
1142         return err;
1143 }
1144
1145 void
1146 hfs_pin_fs_metadata(struct hfsmount *hfsmp)
1147 {
1148         ExtendedVCB *vcb;
1149         int err;
1150
1151         vcb = HFSTOVCB(hfsmp);
1152
1153         err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
1154         if (err != 0) {
1155                 printf("hfs: failed to pin extents overflow file %d\n", err);
1156         }
1157         err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
1158         if (err != 0) {
1159                 printf("hfs: failed to pin catalog file %d\n", err);
1160         }
1161         err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
1162         if (err != 0) {
1163                 printf("hfs: failed to pin bitmap file %d\n", err);
1164         }
1165         err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
1166         if (err != 0) {
1167                 printf("hfs: failed to pin extended attr file %d\n", err);
1168         }
1169
1170         hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1, vfs_context_kernel());
1171         hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1, vfs_context_kernel());
1172
1173         if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
1174                 // and hey, if we've got a journal, let's pin that too!
1175                 hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize), vfs_context_kernel());
1176         }
1177 }
1178
1179 /*
1180  * ReleaseMetaFileVNode
1181  *
1182  * vp   L - -
1183  */
1184 static void ReleaseMetaFileVNode(struct vnode *vp)
1185 {
1186         struct filefork *fp;
1187
1188         if (vp && (fp = VTOF(vp))) {
1189                 if (fp->fcbBTCBPtr != NULL) {
1190                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1191                         (void) BTClosePath(fp);
1192                         hfs_unlock(VTOC(vp));
1193                 }
1194
1195                 /* release the node even if BTClosePath fails */
1196                 vnode_recycle(vp);
1197                 vnode_put(vp);
1198         }
1199 }
1200
1201
1202 /*************************************************************
1203 *
1204 * Unmounts a hfs volume.
1205 *       At this point vflush() has been called (to dump all non-metadata files)
1206 *
1207 *************************************************************/
1208
1209 int
1210 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1211 {
1212         /* Get rid of our attribute data vnode (if any).  This is done
1213          * after the vflush() during mount, so we don't need to worry
1214          * about any locks.
1215          */
1216         if (hfsmp->hfs_attrdata_vp) {
1217                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1218                 hfsmp->hfs_attrdata_vp = NULLVP;
1219         }
1220
1221         if (hfsmp->hfs_startup_vp) {
1222                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1223                 hfsmp->hfs_startup_cp = NULL;
1224                 hfsmp->hfs_startup_vp = NULL;
1225         }
1226
1227         if (hfsmp->hfs_attribute_vp) {
1228                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1229                 hfsmp->hfs_attribute_cp = NULL;
1230                 hfsmp->hfs_attribute_vp = NULL;
1231         }
1232
1233         if (hfsmp->hfs_catalog_vp) {
1234                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1235                 hfsmp->hfs_catalog_cp = NULL;
1236                 hfsmp->hfs_catalog_vp = NULL;
1237         }
1238
1239         if (hfsmp->hfs_extents_vp) {
1240                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1241                 hfsmp->hfs_extents_cp = NULL;
1242                 hfsmp->hfs_extents_vp = NULL;
1243         }
1244
1245         if (hfsmp->hfs_allocation_vp) {
1246                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1247                 hfsmp->hfs_allocation_cp = NULL;
1248                 hfsmp->hfs_allocation_vp = NULL;
1249         }
1250
1251         return (0);
1252 }
1253
1254
1255 /*
1256  * Test if fork has overflow extents.
1257  *
1258  * Returns:
1259  *      non-zero - overflow extents exist
1260  *      zero     - overflow extents do not exist
1261  */
1262 __private_extern__
1263 bool overflow_extents(struct filefork *fp)
1264 {
1265         u_int32_t blocks;
1266
1267         //
1268         // If the vnode pointer is NULL then we're being called
1269         // from hfs_remove_orphans() with a faked-up filefork
1270         // and therefore it has to be an HFS+ volume.  Otherwise
1271         // we check through the volume header to see what type
1272         // of volume we're on.
1273         //
1274
1275 #if CONFIG_HFS_STD
1276         if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1277                 if (fp->ff_extents[2].blockCount == 0)
1278                         return false;
1279
1280                 blocks = fp->ff_extents[0].blockCount +
1281                         fp->ff_extents[1].blockCount +
1282                         fp->ff_extents[2].blockCount;
1283
1284                 return fp->ff_blocks > blocks;
1285         }
1286 #endif
1287
1288         if (fp->ff_extents[7].blockCount == 0)
1289                 return false;
1290
1291         blocks = fp->ff_extents[0].blockCount +
1292                 fp->ff_extents[1].blockCount +
1293                 fp->ff_extents[2].blockCount +
1294                 fp->ff_extents[3].blockCount +
1295                 fp->ff_extents[4].blockCount +
1296                 fp->ff_extents[5].blockCount +
1297                 fp->ff_extents[6].blockCount +
1298                 fp->ff_extents[7].blockCount;
1299
1300         return fp->ff_blocks > blocks;
1301 }
1302
1303 static __attribute__((pure))
1304 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1305 {
1306         return (hfsmp->hfs_freeze_state == HFS_FROZEN
1307                         || (hfsmp->hfs_freeze_state == HFS_FREEZING
1308                                 && current_thread() != hfsmp->hfs_freezing_thread));
1309 }
1310
1311 /*
1312  * Lock the HFS global journal lock
1313  */
1314 int
1315 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1316 {
1317         thread_t thread = current_thread();
1318
1319         if (hfsmp->hfs_global_lockowner == thread) {
1320                 panic ("hfs_lock_global: locking against myself!");
1321         }
1322
1323         /*
1324          * This check isn't really necessary but this stops us taking
1325          * the mount lock in most cases.  The essential check is below.
1326          */
1327         if (hfs_is_frozen(hfsmp)) {
1328                 /*
1329                  * Unfortunately, there is no easy way of getting a notification
1330                  * for when a process is exiting and it's possible for the exiting
1331                  * process to get blocked somewhere else.  To catch this, we
1332                  * periodically monitor the frozen process here and thaw if
1333                  * we spot that it's exiting.
1334                  */
1335 frozen:
1336                 hfs_lock_mount(hfsmp);
1337
1338                 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1339
1340                 while (hfs_is_frozen(hfsmp)) {
1341                         if (hfsmp->hfs_freeze_state == HFS_FROZEN
1342                                 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1343                                 hfs_thaw_locked(hfsmp);
1344                                 break;
1345                         }
1346
1347                         msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1348                                PWAIT, "hfs_lock_global (frozen)", &ts);
1349                 }
1350                 hfs_unlock_mount(hfsmp);
1351         }
1352
1353         /* HFS_SHARED_LOCK */
1354         if (locktype == HFS_SHARED_LOCK) {
1355                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1356                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1357         }
1358         /* HFS_EXCLUSIVE_LOCK */
1359         else {
1360                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1361                 hfsmp->hfs_global_lockowner = thread;
1362         }
1363
1364         /*
1365          * We have to check if we're frozen again because of the time
1366          * between when we checked and when we took the global lock.
1367          */
1368         if (hfs_is_frozen(hfsmp)) {
1369                 hfs_unlock_global(hfsmp);
1370                 goto frozen;
1371         }
1372
1373         return 0;
1374 }
1375
1376
1377 /*
1378  * Unlock the HFS global journal lock
1379  */
1380 void
1381 hfs_unlock_global (struct hfsmount *hfsmp)
1382 {
1383         thread_t thread = current_thread();
1384
1385         /* HFS_LOCK_EXCLUSIVE */
1386         if (hfsmp->hfs_global_lockowner == thread) {
1387                 hfsmp->hfs_global_lockowner = NULL;
1388                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1389         }
1390         /* HFS_LOCK_SHARED */
1391         else {
1392                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1393         }
1394 }
1395
1396 /*
1397  * Lock the HFS mount lock
1398  *
1399  * Note: this is a mutex, not a rw lock!
1400  */
1401 inline
1402 void hfs_lock_mount (struct hfsmount *hfsmp) {
1403         lck_mtx_lock (&(hfsmp->hfs_mutex));
1404 }
1405
1406 /*
1407  * Unlock the HFS mount lock
1408  *
1409  * Note: this is a mutex, not a rw lock!
1410  */
1411 inline
1412 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1413         lck_mtx_unlock (&(hfsmp->hfs_mutex));
1414 }
1415
1416 /*
1417  * Lock HFS system file(s).
1418  *
1419  * This function accepts a @flags parameter which indicates which
1420  * system file locks are required.  The value it returns should be
1421  * used in a subsequent call to hfs_systemfile_unlock.  The caller
1422  * should treat this value as opaque; it may or may not have a
1423  * relation to the @flags field that is passed in.  The *only*
1424  * guarantee that we make is that a value of zero means that no locks
1425  * were taken and that there is no need to call hfs_systemfile_unlock
1426  * (although it is harmless to do so).  Recursion is supported but
1427  * care must still be taken to ensure correct lock ordering.  Note
1428  * that requests for certain locks may cause other locks to also be
1429  * taken, including locks that are not possible to ask for via the
1430  * @flags parameter.
1431  */
1432 int
1433 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1434 {
1435         /*
1436          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1437          */
1438         if (flags & SFL_CATALOG) {
1439                 if (hfsmp->hfs_catalog_cp
1440                         && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
1441 #ifdef HFS_CHECK_LOCK_ORDER
1442                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1443                                 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1444                         }
1445                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1446                                 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1447                         }
1448                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1449                                 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1450                         }
1451 #endif /* HFS_CHECK_LOCK_ORDER */
1452
1453                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1454                         /*
1455                          * When the catalog file has overflow extents then
1456                          * also acquire the extents b-tree lock if its not
1457                          * already requested.
1458                          */
1459                         if (((flags & SFL_EXTENTS) == 0) &&
1460                             (hfsmp->hfs_catalog_vp != NULL) &&
1461                             (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1462                                 flags |= SFL_EXTENTS;
1463                         }
1464                 } else {
1465                         flags &= ~SFL_CATALOG;
1466                 }
1467         }
1468
1469         if (flags & SFL_ATTRIBUTE) {
1470                 if (hfsmp->hfs_attribute_cp
1471                         && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
1472 #ifdef HFS_CHECK_LOCK_ORDER
1473                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1474                                 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1475                         }
1476                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1477                                 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1478                         }
1479 #endif /* HFS_CHECK_LOCK_ORDER */
1480
1481                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1482                         /*
1483                          * When the attribute file has overflow extents then
1484                          * also acquire the extents b-tree lock if its not
1485                          * already requested.
1486                          */
1487                         if (((flags & SFL_EXTENTS) == 0) &&
1488                             (hfsmp->hfs_attribute_vp != NULL) &&
1489                             (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1490                                 flags |= SFL_EXTENTS;
1491                         }
1492                 } else {
1493                         flags &= ~SFL_ATTRIBUTE;
1494                 }
1495         }
1496
1497         if (flags & SFL_STARTUP) {
1498                 if (hfsmp->hfs_startup_cp
1499                         && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
1500 #ifdef HFS_CHECK_LOCK_ORDER
1501                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1502                                 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1503                         }
1504 #endif /* HFS_CHECK_LOCK_ORDER */
1505
1506                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1507                         /*
1508                          * When the startup file has overflow extents then
1509                          * also acquire the extents b-tree lock if its not
1510                          * already requested.
1511                          */
1512                         if (((flags & SFL_EXTENTS) == 0) &&
1513                             (hfsmp->hfs_startup_vp != NULL) &&
1514                             (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1515                                 flags |= SFL_EXTENTS;
1516                         }
1517                 } else {
1518                         flags &= ~SFL_STARTUP;
1519                 }
1520         }
1521
1522         /*
1523          * To prevent locks being taken in the wrong order, the extent lock
1524          * gets a bitmap lock as well.
1525          */
1526         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1527                 if (hfsmp->hfs_allocation_cp) {
1528                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1529                         /*
1530                          * The bitmap lock is also grabbed when only extent lock
1531                          * was requested. Set the bitmap lock bit in the lock
1532                          * flags which callers will use during unlock.
1533                          */
1534                         flags |= SFL_BITMAP;
1535                 } else {
1536                         flags &= ~SFL_BITMAP;
1537                 }
1538         }
1539
1540         if (flags & SFL_EXTENTS) {
1541                 /*
1542                  * Since the extents btree lock is recursive we always
1543                  * need exclusive access.
1544                  */
1545                 if (hfsmp->hfs_extents_cp) {
1546                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1547
1548                         if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) {
1549                                 /*
1550                                  * because we may need this lock on the pageout path (if a swapfile allocation
1551                                  * spills into the extents overflow tree), we will grant the holder of this
1552                                  * lock the privilege of dipping into the reserve free pool in order to prevent
1553                                  * a deadlock from occurring if we need those pageouts to complete before we
1554                                  * will make any new pages available on the free list... the deadlock can occur
1555                                  * if this thread needs to allocate memory while this lock is held
1556                                  */
1557                                 if (set_vm_privilege(TRUE) == FALSE) {
1558                                         /*
1559                                          * indicate that we need to drop vm_privilege
1560                                          * when we unlock
1561                                          */
1562                                         flags |= SFL_VM_PRIV;
1563                                 }
1564                         }
1565                 } else {
1566                         flags &= ~SFL_EXTENTS;
1567                 }
1568         }
1569
1570         return (flags);
1571 }
1572
1573 /*
1574  * unlock HFS system file(s).
1575  */
1576 void
1577 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1578 {
1579         if (!flags)
1580                 return;
1581
1582         struct timeval tv;
1583         u_int32_t lastfsync;
1584         int numOfLockedBuffs;
1585
1586         if (hfsmp->jnl == NULL) {
1587                 microuptime(&tv);
1588                 lastfsync = tv.tv_sec;
1589         }
1590         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1591                 hfs_unlock(hfsmp->hfs_startup_cp);
1592         }
1593         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1594                 if (hfsmp->jnl == NULL) {
1595                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1596                         numOfLockedBuffs = count_lock_queue();
1597                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1598                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1599                               kMaxSecsForFsync))) {
1600                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1601                         }
1602                 }
1603                 hfs_unlock(hfsmp->hfs_attribute_cp);
1604         }
1605         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1606                 if (hfsmp->jnl == NULL) {
1607                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1608                         numOfLockedBuffs = count_lock_queue();
1609                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1610                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1611                               kMaxSecsForFsync))) {
1612                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1613                         }
1614                 }
1615                 hfs_unlock(hfsmp->hfs_catalog_cp);
1616         }
1617         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1618                 hfs_unlock(hfsmp->hfs_allocation_cp);
1619         }
1620         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1621                 if (hfsmp->jnl == NULL) {
1622                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1623                         numOfLockedBuffs = count_lock_queue();
1624                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1625                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1626                               kMaxSecsForFsync))) {
1627                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1628                         }
1629                 }
1630                 hfs_unlock(hfsmp->hfs_extents_cp);
1631
1632                 if (flags & SFL_VM_PRIV) {
1633                         /*
1634                          * revoke the vm_privilege we granted this thread
1635                          * now that we have unlocked the overflow extents
1636                          */
1637                         set_vm_privilege(FALSE);
1638                 }
1639         }
1640 }
1641
1642
1643 /*
1644  * RequireFileLock
1645  *
1646  * Check to see if a vnode is locked in the current context
1647  * This is to be used for debugging purposes only!!
1648  */
1649 #if HFS_DIAGNOSTIC
1650 void RequireFileLock(FileReference vp, int shareable)
1651 {
1652         int locked;
1653
1654         /* The extents btree and allocation bitmap are always exclusive. */
1655         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1656             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1657                 shareable = 0;
1658         }
1659
1660         locked = VTOC(vp)->c_lockowner == current_thread();
1661
1662         if (!locked && !shareable) {
1663                 switch (VTOC(vp)->c_fileid) {
1664                 case kHFSExtentsFileID:
1665                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1666                         break;
1667                 case kHFSCatalogFileID:
1668                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1669                         break;
1670                 case kHFSAllocationFileID:
1671                         /* The allocation file can hide behind the jornal lock. */
1672                         if (VTOHFS(vp)->jnl == NULL)
1673                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1674                         break;
1675                 case kHFSStartupFileID:
1676                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1677                 case kHFSAttributesFileID:
1678                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1679                         break;
1680                 }
1681         }
1682 }
1683 #endif
1684
1685
1686 /*
1687  * There are three ways to qualify for ownership rights on an object:
1688  *
1689  * 1. (a) Your UID matches the cnode's UID.
1690  *    (b) The object in question is owned by "unknown"
1691  * 2. (a) Permissions on the filesystem are being ignored and
1692  *        your UID matches the replacement UID.
1693  *    (b) Permissions on the filesystem are being ignored and
1694  *        the replacement UID is "unknown".
1695  * 3. You are root.
1696  *
1697  */
1698 int
1699 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1700                 __unused struct proc *p, int invokesuperuserstatus)
1701 {
1702         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1703             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1704             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1705               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1706                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1707             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1708                 return (0);
1709         } else {
1710                 return (EPERM);
1711         }
1712 }
1713
1714
1715 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1716                                u_int32_t blockSizeLimit,
1717                                u_int32_t baseMultiple) {
1718     /*
1719        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1720        specified limit but still an even multiple of the baseMultiple.
1721      */
1722     int baseBlockCount, blockCount;
1723     u_int32_t trialBlockSize;
1724
1725     if (allocationBlockSize % baseMultiple != 0) {
1726         /*
1727            Whoops: the allocation blocks aren't even multiples of the specified base:
1728            no amount of dividing them into even parts will be a multiple, either then!
1729         */
1730         return 512;             /* Hope for the best */
1731     };
1732
1733     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1734        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1735        Even though the former (the result of the loop below) is the larger allocation
1736        block size, the latter is more efficient: */
1737     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1738
1739     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1740     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1741
1742     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1743         trialBlockSize = blockCount * baseMultiple;
1744         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1745             if ((trialBlockSize <= blockSizeLimit) &&
1746                 (trialBlockSize % baseMultiple == 0)) {
1747                 return trialBlockSize;
1748             };
1749         };
1750     };
1751
1752     /* Note: we should never get here, since blockCount = 1 should always work,
1753        but this is nice and safe and makes the compiler happy, too ... */
1754     return 512;
1755 }
1756
1757
1758 u_int32_t
1759 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1760                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1761 {
1762         struct hfsmount * hfsmp;
1763         struct cat_desc jdesc;
1764         int lockflags;
1765         int error;
1766
1767         if (vcb->vcbSigWord != kHFSPlusSigWord)
1768                 return (0);
1769
1770         hfsmp = VCBTOHFS(vcb);
1771
1772         memset(&jdesc, 0, sizeof(struct cat_desc));
1773         jdesc.cd_parentcnid = kRootDirID;
1774         jdesc.cd_nameptr = (const u_int8_t *)name;
1775         jdesc.cd_namelen = strlen(name);
1776
1777         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1778         error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1779         hfs_systemfile_unlock(hfsmp, lockflags);
1780
1781         if (error == 0) {
1782                 return (fattr->ca_fileid);
1783         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1784                 return (0);
1785         }
1786
1787         return (0);     /* XXX what callers expect on an error */
1788 }
1789
1790
1791 /*
1792  * On HFS Plus Volumes, there can be orphaned files or directories
1793  * These are files or directories that were unlinked while busy.
1794  * If the volume was not cleanly unmounted then some of these may
1795  * have persisted and need to be removed.
1796  */
1797 void
1798 hfs_remove_orphans(struct hfsmount * hfsmp)
1799 {
1800         struct BTreeIterator * iterator = NULL;
1801         struct FSBufferDescriptor btdata;
1802         struct HFSPlusCatalogFile filerec;
1803         struct HFSPlusCatalogKey * keyp;
1804         struct proc *p = current_proc();
1805         FCB *fcb;
1806         ExtendedVCB *vcb;
1807         char filename[32];
1808         char tempname[32];
1809         size_t namelen;
1810         cat_cookie_t cookie;
1811         int catlock = 0;
1812         int catreserve = 0;
1813         bool started_tr = false;
1814         int lockflags;
1815         int result;
1816         int orphaned_files = 0;
1817         int orphaned_dirs = 0;
1818
1819         bzero(&cookie, sizeof(cookie));
1820
1821         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1822                 return;
1823
1824         vcb = HFSTOVCB(hfsmp);
1825         fcb = VTOF(hfsmp->hfs_catalog_vp);
1826
1827         btdata.bufferAddress = &filerec;
1828         btdata.itemSize = sizeof(filerec);
1829         btdata.itemCount = 1;
1830
1831         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1832         bzero(iterator, sizeof(*iterator));
1833
1834         /* Build a key to "temp" */
1835         keyp = (HFSPlusCatalogKey*)&iterator->key;
1836         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1837         keyp->nodeName.length = 4;  /* "temp" */
1838         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1839         keyp->nodeName.unicode[0] = 't';
1840         keyp->nodeName.unicode[1] = 'e';
1841         keyp->nodeName.unicode[2] = 'm';
1842         keyp->nodeName.unicode[3] = 'p';
1843
1844         /*
1845          * Position the iterator just before the first real temp file/dir.
1846          */
1847         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1848         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1849         hfs_systemfile_unlock(hfsmp, lockflags);
1850
1851         /* Visit all the temp files/dirs in the HFS+ private directory. */
1852         for (;;) {
1853                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1854                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1855                 hfs_systemfile_unlock(hfsmp, lockflags);
1856                 if (result)
1857                         break;
1858                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1859                         break;
1860
1861                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1862                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1863
1864                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1865                                 HFS_DELETE_PREFIX, filerec.fileID);
1866
1867                 /*
1868                  * Delete all files (and directories) named "tempxxx",
1869                  * where xxx is the file's cnid in decimal.
1870                  *
1871                  */
1872                 if (bcmp(tempname, filename, namelen) != 0)
1873                         continue;
1874
1875                 struct filefork dfork;
1876                 struct filefork rfork;
1877                 struct cnode cnode;
1878                 int mode = 0;
1879
1880                 bzero(&dfork, sizeof(dfork));
1881                 bzero(&rfork, sizeof(rfork));
1882                 bzero(&cnode, sizeof(cnode));
1883
1884                 if (hfs_start_transaction(hfsmp) != 0) {
1885                         printf("hfs_remove_orphans: failed to start transaction\n");
1886                         goto exit;
1887                 }
1888                 started_tr = true;
1889
1890                 /*
1891                  * Reserve some space in the Catalog file.
1892                  */
1893                 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1894                         printf("hfs_remove_orphans: cat_preflight failed\n");
1895                         goto exit;
1896                 }
1897                 catreserve = 1;
1898
1899                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1900                 catlock = 1;
1901
1902                 /* Build a fake cnode */
1903                 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1904                                                 &dfork.ff_data, &rfork.ff_data);
1905                 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1906                 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1907                 cnode.c_desc.cd_namelen = namelen;
1908                 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1909                 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1910
1911                 /* Position iterator at previous entry */
1912                 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1913                                                         NULL, NULL) != 0) {
1914                         break;
1915                 }
1916
1917                 /* Truncate the file to zero (both forks) */
1918                 if (dfork.ff_blocks > 0) {
1919                         u_int64_t fsize;
1920
1921                         dfork.ff_cp = &cnode;
1922                         cnode.c_datafork = &dfork;
1923                         cnode.c_rsrcfork = NULL;
1924                         fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1925                         while (fsize > 0) {
1926                                 if (fsize > HFS_BIGFILE_SIZE) {
1927                                         fsize -= HFS_BIGFILE_SIZE;
1928                                 } else {
1929                                         fsize = 0;
1930                                 }
1931
1932                                 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1933                                                                   cnode.c_attr.ca_fileid, false) != 0) {
1934                                         printf("hfs: error truncating data fork!\n");
1935                                         break;
1936                                 }
1937
1938                                 //
1939                                 // if we're iteratively truncating this file down,
1940                                 // then end the transaction and start a new one so
1941                                 // that no one transaction gets too big.
1942                                 //
1943                                 if (fsize > 0) {
1944                                         /* Drop system file locks before starting
1945                                          * another transaction to preserve lock order.
1946                                          */
1947                                         hfs_systemfile_unlock(hfsmp, lockflags);
1948                                         catlock = 0;
1949                                         hfs_end_transaction(hfsmp);
1950
1951                                         if (hfs_start_transaction(hfsmp) != 0) {
1952                                                 started_tr = false;
1953                                                 goto exit;
1954                                         }
1955                                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1956                                         catlock = 1;
1957                                 }
1958                         }
1959                 }
1960
1961                 if (rfork.ff_blocks > 0) {
1962                         rfork.ff_cp = &cnode;
1963                         cnode.c_datafork = NULL;
1964                         cnode.c_rsrcfork = &rfork;
1965                         if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1966                                 printf("hfs: error truncating rsrc fork!\n");
1967                                 break;
1968                         }
1969                 }
1970
1971                 // Deal with extended attributes
1972                 if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
1973                         // hfs_removeallattr uses its own transactions
1974                         hfs_systemfile_unlock(hfsmp, lockflags);
1975                         catlock = false;
1976                         hfs_end_transaction(hfsmp);
1977
1978                         hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
1979
1980                         if (!started_tr) {
1981                                 if (hfs_start_transaction(hfsmp) != 0) {
1982                                         printf("hfs_remove_orphans: failed to start transaction\n");
1983                                         goto exit;
1984                                 }
1985                                 started_tr = true;
1986                         }
1987
1988                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1989                         catlock = 1;
1990                 }
1991
1992                 /* Remove the file or folder record from the Catalog */
1993                 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1994                         printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1995                         hfs_systemfile_unlock(hfsmp, lockflags);
1996                         catlock = 0;
1997                         hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1998                         break;
1999                 }
2000
2001                 mode = cnode.c_attr.ca_mode & S_IFMT;
2002
2003                 if (mode == S_IFDIR) {
2004                         orphaned_dirs++;
2005                 }
2006                 else {
2007                         orphaned_files++;
2008                 }
2009
2010                 /* Update parent and volume counts */
2011                 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
2012                 if (mode == S_IFDIR) {
2013                         DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
2014                 }
2015
2016                 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
2017                                                  &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
2018
2019                 /* Drop locks and end the transaction */
2020                 hfs_systemfile_unlock(hfsmp, lockflags);
2021                 cat_postflight(hfsmp, &cookie, p);
2022                 catlock = catreserve = 0;
2023
2024                 /*
2025                    Now that Catalog is unlocked, update the volume info, making
2026                    sure to differentiate between files and directories
2027                 */
2028                 if (mode == S_IFDIR) {
2029                         hfs_volupdate(hfsmp, VOL_RMDIR, 0);
2030                 }
2031                 else{
2032                         hfs_volupdate(hfsmp, VOL_RMFILE, 0);
2033                 }
2034
2035                 hfs_end_transaction(hfsmp);
2036                 started_tr = false;
2037         } /* end for */
2038
2039 exit:
2040
2041         if (orphaned_files > 0 || orphaned_dirs > 0)
2042                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
2043         if (catlock) {
2044                 hfs_systemfile_unlock(hfsmp, lockflags);
2045         }
2046         if (catreserve) {
2047                 cat_postflight(hfsmp, &cookie, p);
2048         }
2049         if (started_tr) {
2050                 hfs_end_transaction(hfsmp);
2051         }
2052
2053         FREE(iterator, M_TEMP);
2054         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
2055 }
2056
2057
2058 /*
2059  * This will return the correct logical block size for a given vnode.
2060  * For most files, it is the allocation block size, for meta data like
2061  * BTrees, this is kept as part of the BTree private nodeSize
2062  */
2063 u_int32_t
2064 GetLogicalBlockSize(struct vnode *vp)
2065 {
2066 u_int32_t logBlockSize;
2067
2068         DBG_ASSERT(vp != NULL);
2069
2070         /* start with default */
2071         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
2072
2073         if (vnode_issystem(vp)) {
2074                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
2075                         BTreeInfoRec                    bTreeInfo;
2076
2077                         /*
2078                          * We do not lock the BTrees, because if we are getting block..then the tree
2079                          * should be locked in the first place.
2080                          * We just want the nodeSize wich will NEVER change..so even if the world
2081                          * is changing..the nodeSize should remain the same. Which argues why lock
2082                          * it in the first place??
2083                          */
2084
2085                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
2086
2087                         logBlockSize = bTreeInfo.nodeSize;
2088
2089                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
2090                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
2091                 }
2092         }
2093
2094         DBG_ASSERT(logBlockSize > 0);
2095
2096         return logBlockSize;
2097 }
2098
2099 #if HFS_SPARSE_DEV
2100 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
2101 {
2102         struct vfsstatfs *vfsp;  /* 272 bytes */
2103         uint64_t vfreeblks;
2104         struct timeval now;
2105
2106         hfs_lock_mount(hfsmp);
2107
2108         vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp;
2109         if (!backing_vp) {
2110                 hfs_unlock_mount(hfsmp);
2111                 return false;
2112         }
2113
2114         // usecount is not enough; we need iocount
2115         if (vnode_get(backing_vp)) {
2116                 hfs_unlock_mount(hfsmp);
2117                 *pfree_blks = 0;
2118                 return true;
2119         }
2120
2121         uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2122         uint32_t bandblks       = hfsmp->hfs_sparsebandblks;
2123         uint64_t maxblks        = hfsmp->hfs_backingfs_maxblocks;
2124
2125         hfs_unlock_mount(hfsmp);
2126
2127         mount_t backingfs_mp = vnode_mount(backing_vp);
2128
2129         microtime(&now);
2130         if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
2131                 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
2132                 hfsmp->hfs_last_backingstatfs = now.tv_sec;
2133         }
2134
2135         if (!(vfsp = vfs_statfs(backingfs_mp))) {
2136                 vnode_put(backing_vp);
2137                 return false;
2138         }
2139
2140         vfreeblks = vfsp->f_bavail;
2141         /* Normalize block count if needed. */
2142         if (vfsp->f_bsize != hfsmp->blockSize)
2143                 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2144         if (vfreeblks > bandblks)
2145                 vfreeblks -= bandblks;
2146         else
2147                 vfreeblks = 0;
2148
2149         /*
2150          * Take into account any delayed allocations.  It is not
2151          * certain what the original reason for the "2 *" is.  Most
2152          * likely it is to allow for additional requirements in the
2153          * host file system and metadata required by disk images.  The
2154          * number of loaned blocks is likely to be small and we will
2155          * stop using them as we get close to the limit.
2156          */
2157         loanedblks = 2 * loanedblks;
2158         if (vfreeblks > loanedblks)
2159                 vfreeblks -= loanedblks;
2160         else
2161                 vfreeblks = 0;
2162
2163         if (maxblks)
2164                 vfreeblks = MIN(vfreeblks, maxblks);
2165
2166         vnode_put(backing_vp);
2167
2168         *pfree_blks = vfreeblks;
2169
2170         return true;
2171 }
2172 #endif
2173
2174 u_int32_t
2175 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2176 {
2177         u_int32_t freeblks;
2178         u_int32_t rsrvblks;
2179         u_int32_t loanblks;
2180
2181         /*
2182          * We don't bother taking the mount lock
2183          * to look at these values since the values
2184          * themselves are each updated atomically
2185          * on aligned addresses.
2186          */
2187         freeblks = hfsmp->freeBlocks;
2188         rsrvblks = hfsmp->reserveBlocks;
2189         loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2190         if (wantreserve) {
2191                 if (freeblks > rsrvblks)
2192                         freeblks -= rsrvblks;
2193                 else
2194                         freeblks = 0;
2195         }
2196         if (freeblks > loanblks)
2197                 freeblks -= loanblks;
2198         else
2199                 freeblks = 0;
2200
2201 #if HFS_SPARSE_DEV
2202         /*
2203          * When the underlying device is sparse, check the
2204          * available space on the backing store volume.
2205          */
2206         uint64_t vfreeblks;
2207         if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2208                 freeblks = MIN(freeblks, vfreeblks);
2209 #endif /* HFS_SPARSE_DEV */
2210
2211         return (freeblks);
2212 }
2213
2214 /*
2215  * Map HFS Common errors (negative) to BSD error codes (positive).
2216  * Positive errors (ie BSD errors) are passed through unchanged.
2217  */
2218 short MacToVFSError(OSErr err)
2219 {
2220         if (err >= 0)
2221                 return err;
2222
2223         /* BSD/VFS internal errnos */
2224         switch (err) {
2225                 case ERESERVEDNAME: /* -8 */
2226                         return err;
2227         }
2228
2229         switch (err) {
2230         case dskFulErr:                 /*    -34 */
2231         case btNoSpaceAvail:            /* -32733 */
2232                 return ENOSPC;
2233         case fxOvFlErr:                 /* -32750 */
2234                 return EOVERFLOW;
2235
2236         case btBadNode:                 /* -32731 */
2237                 return EIO;
2238
2239         case memFullErr:                /*  -108 */
2240                 return ENOMEM;          /*   +12 */
2241
2242         case cmExists:                  /* -32718 */
2243         case btExists:                  /* -32734 */
2244                 return EEXIST;          /*    +17 */
2245
2246         case cmNotFound:                /* -32719 */
2247         case btNotFound:                /* -32735 */
2248                 return ENOENT;          /*     28 */
2249
2250         case cmNotEmpty:                /* -32717 */
2251                 return ENOTEMPTY;       /*     66 */
2252
2253         case cmFThdDirErr:              /* -32714 */
2254                 return EISDIR;          /*     21 */
2255
2256         case fxRangeErr:                /* -32751 */
2257                 return ERANGE;
2258
2259         case bdNamErr:                  /*   -37 */
2260                 return ENAMETOOLONG;    /*    63 */
2261
2262         case paramErr:                  /*   -50 */
2263         case fileBoundsErr:             /* -1309 */
2264                 return EINVAL;          /*   +22 */
2265
2266         case fsBTBadNodeSize:
2267                 return ENXIO;
2268
2269         default:
2270                 return EIO;             /*   +5 */
2271         }
2272 }
2273
2274
2275 /*
2276  * Find the current thread's directory hint for a given index.
2277  *
2278  * Requires an exclusive lock on directory cnode.
2279  *
2280  * Use detach if the cnode lock must be dropped while the hint is still active.
2281  */
2282 __private_extern__
2283 directoryhint_t *
2284 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2285 {
2286         struct timeval tv;
2287         directoryhint_t *hint;
2288         boolean_t need_remove, need_init;
2289         const u_int8_t * name;
2290
2291         microuptime(&tv);
2292
2293         /*
2294          *  Look for an existing hint first.  If not found, create a new one (when
2295          *  the list is not full) or recycle the oldest hint.  Since new hints are
2296          *  always added to the head of the list, the last hint is always the
2297          *  oldest.
2298          */
2299         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2300                 if (hint->dh_index == index)
2301                         break;
2302         }
2303         if (hint != NULL) { /* found an existing hint */
2304                 need_init = false;
2305                 need_remove = true;
2306         } else { /* cannot find an existing hint */
2307                 need_init = true;
2308                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2309                         /* Create a default directory hint */
2310                         MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
2311                         ++dcp->c_dirhintcnt;
2312                         need_remove = false;
2313                 } else {                                /* recycle the last (i.e., the oldest) hint */
2314                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2315                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2316                             (name = hint->dh_desc.cd_nameptr)) {
2317                                 hint->dh_desc.cd_nameptr = NULL;
2318                                 hint->dh_desc.cd_namelen = 0;
2319                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2320                                 vfs_removename((const char *)name);
2321                         }
2322                         need_remove = true;
2323                 }
2324         }
2325
2326         if (need_remove)
2327                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2328
2329         if (detach)
2330                 --dcp->c_dirhintcnt;
2331         else
2332                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2333
2334         if (need_init) {
2335                 hint->dh_index = index;
2336                 hint->dh_desc.cd_flags = 0;
2337                 hint->dh_desc.cd_encoding = 0;
2338                 hint->dh_desc.cd_namelen = 0;
2339                 hint->dh_desc.cd_nameptr = NULL;
2340                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2341                 hint->dh_desc.cd_hint = dcp->c_childhint;
2342                 hint->dh_desc.cd_cnid = 0;
2343         }
2344         hint->dh_time = tv.tv_sec;
2345         return (hint);
2346 }
2347
2348 /*
2349  * Release a single directory hint.
2350  *
2351  * Requires an exclusive lock on directory cnode.
2352  */
2353 __private_extern__
2354 void
2355 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2356 {
2357         const u_int8_t * name;
2358         directoryhint_t *hint;
2359
2360         /* Check if item is on list (could be detached) */
2361         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2362                 if (hint == relhint) {
2363                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2364                         --dcp->c_dirhintcnt;
2365                         break;
2366                 }
2367         }
2368         name = relhint->dh_desc.cd_nameptr;
2369         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2370                 relhint->dh_desc.cd_nameptr = NULL;
2371                 relhint->dh_desc.cd_namelen = 0;
2372                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2373                 vfs_removename((const char *)name);
2374         }
2375         FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2376 }
2377
2378 /*
2379  * Release directory hints for given directory
2380  *
2381  * Requires an exclusive lock on directory cnode.
2382  */
2383 __private_extern__
2384 void
2385 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2386 {
2387         struct timeval tv;
2388         directoryhint_t *hint, *prev;
2389         const u_int8_t * name;
2390
2391         if (stale_hints_only)
2392                 microuptime(&tv);
2393
2394         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2395         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2396                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2397                         break;  /* stop here if this entry is too new */
2398                 name = hint->dh_desc.cd_nameptr;
2399                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2400                         hint->dh_desc.cd_nameptr = NULL;
2401                         hint->dh_desc.cd_namelen = 0;
2402                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
2403                         vfs_removename((const char *)name);
2404                 }
2405                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2406                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2407                 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2408                 --dcp->c_dirhintcnt;
2409         }
2410 }
2411
2412 /*
2413  * Insert a detached directory hint back into the list of dirhints.
2414  *
2415  * Requires an exclusive lock on directory cnode.
2416  */
2417 __private_extern__
2418 void
2419 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2420 {
2421         directoryhint_t *test;
2422
2423         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2424                 if (test == hint)
2425                         panic("hfs_insertdirhint: hint %p already on list!", hint);
2426         }
2427
2428         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2429         ++dcp->c_dirhintcnt;
2430 }
2431
2432 /*
2433  * Perform a case-insensitive compare of two UTF-8 filenames.
2434  *
2435  * Returns 0 if the strings match.
2436  */
2437 __private_extern__
2438 int
2439 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2440 {
2441         u_int16_t *ustr1, *ustr2;
2442         size_t ulen1, ulen2;
2443         size_t maxbytes;
2444         int cmp = -1;
2445
2446         if (len1 != len2)
2447                 return (cmp);
2448
2449         maxbytes = kHFSPlusMaxFileNameChars << 1;
2450         MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2451         ustr2 = ustr1 + (maxbytes >> 1);
2452
2453         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2454                 goto out;
2455         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2456                 goto out;
2457
2458         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2459 out:
2460         FREE(ustr1, M_TEMP);
2461         return (cmp);
2462 }
2463
2464
2465 typedef struct jopen_cb_info {
2466         off_t   jsize;
2467         char   *desired_uuid;
2468         struct  vnode *jvp;
2469         size_t  blksize;
2470         int     need_clean;
2471         int     need_init;
2472 } jopen_cb_info;
2473
2474 static int
2475 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2476 {
2477         struct nameidata nd;
2478         jopen_cb_info *ji = (jopen_cb_info *)arg;
2479         char bsd_name[256];
2480         int error;
2481
2482         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2483         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2484
2485         if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2486                 return 1;   // keep iterating
2487         }
2488
2489         // if we're here, either the desired uuid matched or there was no
2490         // desired uuid so let's try to open the device for writing and
2491         // see if it works.  if it does, we'll use it.
2492
2493         NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2494         if ((error = namei(&nd))) {
2495                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2496                 return 1;   // keep iterating
2497         }
2498
2499         ji->jvp = nd.ni_vp;
2500         nameidone(&nd);
2501
2502         if (ji->jvp == NULL) {
2503                 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2504         } else {
2505                 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2506                 if (error == 0) {
2507                         // if the journal is dirty and we didn't specify a desired
2508                         // journal device uuid, then do not use the journal.  but
2509                         // if the journal is just invalid (e.g. it hasn't been
2510                         // initialized) then just set the need_init flag.
2511                         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2512                                 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2513                                 if (error == EBUSY) {
2514                                         VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2515                                         vnode_put(ji->jvp);
2516                                         ji->jvp = NULL;
2517                                         return 1;    // keep iterating
2518                                 } else if (error == EINVAL) {
2519                                         ji->need_init = 1;
2520                                 }
2521                         }
2522
2523                         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2524                                 strlcpy(ji->desired_uuid, uuid_str, 128);
2525                         }
2526                         vnode_setmountedon(ji->jvp);
2527                         return 0;   // stop iterating
2528                 } else {
2529                         vnode_put(ji->jvp);
2530                         ji->jvp = NULL;
2531                 }
2532         }
2533
2534         return 1;   // keep iterating
2535 }
2536
2537 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2538 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2539
2540
2541 static vnode_t
2542 open_journal_dev(const char *vol_device,
2543                  int need_clean,
2544                  char *uuid_str,
2545                  char *machine_serial_num,
2546                  off_t jsize,
2547                  size_t blksize,
2548                  int *need_init)
2549 {
2550     int retry_counter=0;
2551     jopen_cb_info ji;
2552
2553     ji.jsize        = jsize;
2554     ji.desired_uuid = uuid_str;
2555     ji.jvp          = NULL;
2556     ji.blksize      = blksize;
2557     ji.need_clean   = need_clean;
2558     ji.need_init    = 0;
2559
2560 //    if (uuid_str[0] == '\0') {
2561 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2562 //    } else {
2563 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2564 //    }
2565     while (ji.jvp == NULL && retry_counter++ < 4) {
2566             if (retry_counter > 1) {
2567                     if (uuid_str[0]) {
2568                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2569                     } else {
2570                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2571                     }
2572                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2573             }
2574
2575             IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2576     }
2577
2578     if (ji.jvp == NULL) {
2579             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2580                    vol_device, uuid_str, machine_serial_num);
2581     }
2582
2583     *need_init = ji.need_init;
2584
2585     return ji.jvp;
2586 }
2587
2588
2589 int
2590 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2591                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2592                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2593 {
2594         JournalInfoBlock *jibp;
2595         struct buf       *jinfo_bp, *bp;
2596         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2597         int               retval, write_jibp = 0;
2598         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2599         struct vnode     *devvp;
2600         struct hfs_mount_args *args = _args;
2601         u_int32_t         jib_flags;
2602         u_int64_t         jib_offset;
2603         u_int64_t         jib_size;
2604         const char *dev_name;
2605
2606         devvp = hfsmp->hfs_devvp;
2607         dev_name = vnode_getname_printable(devvp);
2608
2609         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2610                 arg_flags  = args->journal_flags;
2611                 arg_tbufsz = args->journal_tbuffer_size;
2612         }
2613
2614         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2615
2616         jinfo_bp = NULL;
2617         retval = (int)buf_meta_bread(devvp,
2618                                                 (daddr64_t)((embeddedOffset/blksize) +
2619                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2620                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2621         if (retval) {
2622                 if (jinfo_bp) {
2623                         buf_brelse(jinfo_bp);
2624                 }
2625                 goto cleanup_dev_name;
2626         }
2627
2628         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2629         jib_flags  = SWAP_BE32(jibp->flags);
2630         jib_size   = SWAP_BE64(jibp->size);
2631
2632         if (jib_flags & kJIJournalInFSMask) {
2633                 hfsmp->jvp = hfsmp->hfs_devvp;
2634                 jib_offset = SWAP_BE64(jibp->offset);
2635         } else {
2636             int need_init=0;
2637
2638             // if the volume was unmounted cleanly then we'll pick any
2639             // available external journal partition
2640             //
2641             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2642                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2643             }
2644
2645             hfsmp->jvp = open_journal_dev(dev_name,
2646                                           !(jib_flags & kJIJournalNeedInitMask),
2647                                           (char *)&jibp->ext_jnl_uuid[0],
2648                                           (char *)&jibp->machine_serial_num[0],
2649                                           jib_size,
2650                                           hfsmp->hfs_logical_block_size,
2651                                           &need_init);
2652             if (hfsmp->jvp == NULL) {
2653                     buf_brelse(jinfo_bp);
2654                     retval = EROFS;
2655                     goto cleanup_dev_name;
2656             } else {
2657                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2658                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2659                     }
2660             }
2661
2662             jib_offset = 0;
2663             write_jibp = 1;
2664             if (need_init) {
2665                     jib_flags |= kJIJournalNeedInitMask;
2666             }
2667         }
2668
2669         // save this off for the hack-y check in hfs_remove()
2670         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2671         hfsmp->jnl_size  = jib_size;
2672
2673         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2674             // if the file system is read-only, check if the journal is empty.
2675             // if it is, then we can allow the mount.  otherwise we have to
2676             // return failure.
2677             retval = journal_is_clean(hfsmp->jvp,
2678                                       jib_offset + embeddedOffset,
2679                                       jib_size,
2680                                       devvp,
2681                                       hfsmp->hfs_logical_block_size);
2682
2683             hfsmp->jnl = NULL;
2684
2685             buf_brelse(jinfo_bp);
2686
2687             if (retval) {
2688                     const char *name = vnode_getname_printable(devvp);
2689                     printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2690                     name);
2691                     vnode_putname_printable(name);
2692             }
2693
2694             goto cleanup_dev_name;
2695         }
2696
2697         if (jib_flags & kJIJournalNeedInitMask) {
2698                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2699                            jib_offset + embeddedOffset, jib_size);
2700                 hfsmp->jnl = journal_create(hfsmp->jvp,
2701                                                                         jib_offset + embeddedOffset,
2702                                                                         jib_size,
2703                                                                         devvp,
2704                                                                         blksize,
2705                                                                         arg_flags,
2706                                                                         arg_tbufsz,
2707                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2708                                                                         hfsmp->hfs_mp);
2709                 if (hfsmp->jnl)
2710                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2711
2712                 // no need to start a transaction here... if this were to fail
2713                 // we'd just re-init it on the next mount.
2714                 jib_flags &= ~kJIJournalNeedInitMask;
2715                 jibp->flags  = SWAP_BE32(jib_flags);
2716                 buf_bwrite(jinfo_bp);
2717                 jinfo_bp = NULL;
2718                 jibp     = NULL;
2719         } else {
2720                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2721                 //         jib_offset + embeddedOffset,
2722                 //         jib_size, SWAP_BE32(vhp->blockSize));
2723
2724                 hfsmp->jnl = journal_open(hfsmp->jvp,
2725                                                                   jib_offset + embeddedOffset,
2726                                                                   jib_size,
2727                                                                   devvp,
2728                                                                   blksize,
2729                                                                   arg_flags,
2730                                                                   arg_tbufsz,
2731                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2732                                                                   hfsmp->hfs_mp);
2733                 if (hfsmp->jnl)
2734                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2735
2736                 if (write_jibp) {
2737                         buf_bwrite(jinfo_bp);
2738                 } else {
2739                         buf_brelse(jinfo_bp);
2740                 }
2741                 jinfo_bp = NULL;
2742                 jibp     = NULL;
2743
2744                 if (hfsmp->jnl && mdbp) {
2745                         // reload the mdb because it could have changed
2746                         // if the journal had to be replayed.
2747                         if (mdb_offset == 0) {
2748                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2749                         }
2750                         bp = NULL;
2751                         retval = (int)buf_meta_bread(devvp,
2752                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2753                                         hfsmp->hfs_physical_block_size, cred, &bp);
2754                         if (retval) {
2755                                 if (bp) {
2756                                         buf_brelse(bp);
2757                                 }
2758                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2759                                            retval);
2760                                 goto cleanup_dev_name;
2761                         }
2762                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2763                         buf_brelse(bp);
2764                         bp = NULL;
2765                 }
2766         }
2767
2768         // if we expected the journal to be there and we couldn't
2769         // create it or open it then we have to bail out.
2770         if (hfsmp->jnl == NULL) {
2771                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2772                 retval = EINVAL;
2773                 goto cleanup_dev_name;
2774         }
2775
2776         retval = 0;
2777
2778 cleanup_dev_name:
2779         vnode_putname_printable(dev_name);
2780         return retval;
2781 }
2782
2783
2784 //
2785 // This function will go and re-locate the .journal_info_block and
2786 // the .journal files in case they moved (which can happen if you
2787 // run Norton SpeedDisk).  If we fail to find either file we just
2788 // disable journaling for this volume and return.  We turn off the
2789 // journaling bit in the vcb and assume it will get written to disk
2790 // later (if it doesn't on the next mount we'd do the same thing
2791 // again which is harmless).  If we disable journaling we don't
2792 // return an error so that the volume is still mountable.
2793 //
2794 // If the info we find for the .journal_info_block and .journal files
2795 // isn't what we had stored, we re-set our cached info and proceed
2796 // with opening the journal normally.
2797 //
2798 static int
2799 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2800 {
2801         JournalInfoBlock *jibp;
2802         struct buf       *jinfo_bp;
2803         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2804         int               retval, write_jibp = 0, recreate_journal = 0;
2805         struct vnode     *devvp;
2806         struct cat_attr   jib_attr, jattr;
2807         struct cat_fork   jib_fork, jfork;
2808         ExtendedVCB      *vcb;
2809         u_int32_t            fid;
2810         struct hfs_mount_args *args = _args;
2811         u_int32_t         jib_flags;
2812         u_int64_t         jib_offset;
2813         u_int64_t         jib_size;
2814
2815         devvp = hfsmp->hfs_devvp;
2816         vcb = HFSTOVCB(hfsmp);
2817
2818         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2819                 if (args->journal_disable) {
2820                         return 0;
2821                 }
2822
2823                 arg_flags  = args->journal_flags;
2824                 arg_tbufsz = args->journal_tbuffer_size;
2825         }
2826
2827         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2828         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2829                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2830                            jib_fork.cf_extents[0].startBlock);
2831                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2832                 return 0;
2833         }
2834         hfsmp->hfs_jnlinfoblkid = fid;
2835
2836         // make sure the journal_info_block begins where we think it should.
2837         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2838                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2839                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2840
2841                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2842                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2843                 recreate_journal = 1;
2844         }
2845
2846
2847         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2848         jinfo_bp = NULL;
2849         retval = (int)buf_meta_bread(devvp,
2850                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2851                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2852                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2853         if (retval) {
2854                 if (jinfo_bp) {
2855                         buf_brelse(jinfo_bp);
2856                 }
2857                 printf("hfs: can't read journal info block. disabling journaling.\n");
2858                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2859                 return 0;
2860         }
2861
2862         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2863         jib_flags  = SWAP_BE32(jibp->flags);
2864         jib_offset = SWAP_BE64(jibp->offset);
2865         jib_size   = SWAP_BE64(jibp->size);
2866
2867         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2868         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2869                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2870                            jfork.cf_extents[0].startBlock);
2871                 buf_brelse(jinfo_bp);
2872                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2873                 return 0;
2874         }
2875         hfsmp->hfs_jnlfileid = fid;
2876
2877         // make sure the journal file begins where we think it should.
2878         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2879                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2880                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2881
2882                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2883                 write_jibp   = 1;
2884                 recreate_journal = 1;
2885         }
2886
2887         // check the size of the journal file.
2888         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2889                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2890                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2891
2892                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2893                 write_jibp = 1;
2894                 recreate_journal = 1;
2895         }
2896
2897         if (jib_flags & kJIJournalInFSMask) {
2898                 hfsmp->jvp = hfsmp->hfs_devvp;
2899                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2900         } else {
2901             const char *dev_name;
2902             int need_init = 0;
2903
2904             dev_name = vnode_getname_printable(devvp);
2905
2906             // since the journal is empty, just use any available external journal
2907             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2908
2909             // this fills in the uuid of the device we actually get
2910             hfsmp->jvp = open_journal_dev(dev_name,
2911                                           !(jib_flags & kJIJournalNeedInitMask),
2912                                           (char *)&jibp->ext_jnl_uuid[0],
2913                                           (char *)&jibp->machine_serial_num[0],
2914                                           jib_size,
2915                                           hfsmp->hfs_logical_block_size,
2916                                           &need_init);
2917             if (hfsmp->jvp == NULL) {
2918                     buf_brelse(jinfo_bp);
2919                     vnode_putname_printable(dev_name);
2920                     return EROFS;
2921             } else {
2922                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2923                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2924                     }
2925             }
2926             jib_offset = 0;
2927             recreate_journal = 1;
2928             write_jibp = 1;
2929             if (need_init) {
2930                     jib_flags |= kJIJournalNeedInitMask;
2931             }
2932             vnode_putname_printable(dev_name);
2933         }
2934
2935         // save this off for the hack-y check in hfs_remove()
2936         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2937         hfsmp->jnl_size  = jib_size;
2938
2939         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2940             // if the file system is read-only, check if the journal is empty.
2941             // if it is, then we can allow the mount.  otherwise we have to
2942             // return failure.
2943             retval = journal_is_clean(hfsmp->jvp,
2944                                       jib_offset,
2945                                       jib_size,
2946                                       devvp,
2947                                       hfsmp->hfs_logical_block_size);
2948
2949             hfsmp->jnl = NULL;
2950
2951             buf_brelse(jinfo_bp);
2952
2953             if (retval) {
2954                     const char *name = vnode_getname_printable(devvp);
2955                     printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2956                     name);
2957                     vnode_putname_printable(name);
2958             }
2959
2960             return retval;
2961         }
2962
2963         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2964                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2965                            jib_offset, jib_size);
2966                 hfsmp->jnl = journal_create(hfsmp->jvp,
2967                                                                         jib_offset,
2968                                                                         jib_size,
2969                                                                         devvp,
2970                                                                         hfsmp->hfs_logical_block_size,
2971                                                                         arg_flags,
2972                                                                         arg_tbufsz,
2973                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2974                                                                         hfsmp->hfs_mp);
2975                 if (hfsmp->jnl)
2976                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2977
2978                 // no need to start a transaction here... if this were to fail
2979                 // we'd just re-init it on the next mount.
2980                 jib_flags &= ~kJIJournalNeedInitMask;
2981                 write_jibp   = 1;
2982
2983         } else {
2984                 //
2985                 // if we weren't the last person to mount this volume
2986                 // then we need to throw away the journal because it
2987                 // is likely that someone else mucked with the disk.
2988                 // if the journal is empty this is no big deal.  if the
2989                 // disk is dirty this prevents us from replaying the
2990                 // journal over top of changes that someone else made.
2991                 //
2992                 arg_flags |= JOURNAL_RESET;
2993
2994                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2995                 //         jib_offset,
2996                 //         jib_size, SWAP_BE32(vhp->blockSize));
2997
2998                 hfsmp->jnl = journal_open(hfsmp->jvp,
2999                                                                   jib_offset,
3000                                                                   jib_size,
3001                                                                   devvp,
3002                                                                   hfsmp->hfs_logical_block_size,
3003                                                                   arg_flags,
3004                                                                   arg_tbufsz,
3005                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
3006                                                                   hfsmp->hfs_mp);
3007                 if (hfsmp->jnl)
3008                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3009         }
3010
3011
3012         if (write_jibp) {
3013                 jibp->flags  = SWAP_BE32(jib_flags);
3014                 jibp->offset = SWAP_BE64(jib_offset);
3015                 jibp->size   = SWAP_BE64(jib_size);
3016
3017                 buf_bwrite(jinfo_bp);
3018         } else {
3019                 buf_brelse(jinfo_bp);
3020         }
3021         jinfo_bp = NULL;
3022         jibp     = NULL;
3023
3024         // if we expected the journal to be there and we couldn't
3025         // create it or open it then we have to bail out.
3026         if (hfsmp->jnl == NULL) {
3027                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
3028                 return EINVAL;
3029         }
3030
3031         return 0;
3032 }
3033
3034 /*
3035  * Calculate the allocation zone for metadata.
3036  *
3037  * This zone includes the following:
3038  *      Allocation Bitmap file
3039  *      Overflow Extents file
3040  *      Journal file
3041  *      Quota files
3042  *      Clustered Hot files
3043  *      Catalog file
3044  *
3045  *                          METADATA ALLOCATION ZONE
3046  * ____________________________________________________________________________
3047  * |    |    |     |               |                              |           |
3048  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
3049  * |____|____|_____|_______________|______________________________|___________|
3050  *
3051  * <------------------------------- N * 128 MB ------------------------------->
3052  *
3053  */
3054 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
3055
3056 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
3057 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
3058 #define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
3059 #define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
3060 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
3061 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
3062
3063 /* Initialize the metadata zone.
3064  *
3065  * If the size of  the volume is less than the minimum size for
3066  * metadata zone, metadata zone is disabled.
3067  *
3068  * If disable is true, disable metadata zone unconditionally.
3069  */
3070 void
3071 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
3072 {
3073         ExtendedVCB  *vcb;
3074         u_int64_t  fs_size;
3075         u_int64_t  zonesize;
3076         u_int64_t  temp;
3077         u_int64_t  filesize;
3078         u_int32_t  blk;
3079         int  items, really_do_it=1;
3080
3081         vcb = HFSTOVCB(hfsmp);
3082         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
3083
3084         /*
3085          * For volumes less than 10 GB, don't bother.
3086          */
3087         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
3088                 really_do_it = 0;
3089         }
3090
3091         /*
3092          * Skip non-journaled volumes as well.
3093          */
3094         if (hfsmp->jnl == NULL) {
3095                 really_do_it = 0;
3096         }
3097
3098         /* If caller wants to disable metadata zone, do it */
3099         if (disable == true) {
3100                 really_do_it = 0;
3101         }
3102
3103         /*
3104          * Start with space for the boot blocks and Volume Header.
3105          * 1536 = byte offset from start of volume to end of volume header:
3106          * 1024 bytes is the offset from the start of the volume to the
3107          * start of the volume header (defined by the volume format)
3108          * + 512 bytes (the size of the volume header).
3109          */
3110         zonesize = roundup(1536, hfsmp->blockSize);
3111
3112         /*
3113          * Add the on-disk size of allocation bitmap.
3114          */
3115         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3116
3117         /*
3118          * Add space for the Journal Info Block and Journal (if they're in
3119          * this file system).
3120          */
3121         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3122                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3123         }
3124
3125         /*
3126          * Add the existing size of the Extents Overflow B-tree.
3127          * (It rarely grows, so don't bother reserving additional room for it.)
3128          */
3129         zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
3130
3131         /*
3132          * If there is an Attributes B-tree, leave room for 11 clumps worth.
3133          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3134          * When installing a full OS install onto a 20GB volume, we use
3135          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3136          * us with another 3 or 4 clumps worth before we need another extent.
3137          */
3138         if (hfsmp->hfs_attribute_cp) {
3139                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3140         }
3141
3142         /*
3143          * Leave room for 11 clumps of the Catalog B-tree.
3144          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3145          * When installing a full OS install onto a 20GB volume, we use
3146          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3147          * us with another 3 or 4 clumps worth before we need another extent.
3148          */
3149         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3150
3151         /*
3152          * Add space for hot file region.
3153          *
3154          * ...for now, use 5 MB per 1 GB (0.5 %)
3155          */
3156         filesize = (fs_size / 1024) * 5;
3157         if (filesize > HOTBAND_MAXIMUM_SIZE)
3158                 filesize = HOTBAND_MAXIMUM_SIZE;
3159         else if (filesize < HOTBAND_MINIMUM_SIZE)
3160                 filesize = HOTBAND_MINIMUM_SIZE;
3161         /*
3162          * Calculate user quota file requirements.
3163          */
3164         if (hfsmp->hfs_flags & HFS_QUOTAS) {
3165                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3166                 if (items < QF_MIN_USERS)
3167                         items = QF_MIN_USERS;
3168                 else if (items > QF_MAX_USERS)
3169                         items = QF_MAX_USERS;
3170                 if (!powerof2(items)) {
3171                         int x = items;
3172                         items = 4;
3173                         while (x>>1 != 1) {
3174                                 x = x >> 1;
3175                                 items = items << 1;
3176                         }
3177                 }
3178                 filesize += (items + 1) * sizeof(struct dqblk);
3179                 /*
3180                  * Calculate group quota file requirements.
3181                  *
3182                  */
3183                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3184                 if (items < QF_MIN_GROUPS)
3185                         items = QF_MIN_GROUPS;
3186                 else if (items > QF_MAX_GROUPS)
3187                         items = QF_MAX_GROUPS;
3188                 if (!powerof2(items)) {
3189                         int x = items;
3190                         items = 4;
3191                         while (x>>1 != 1) {
3192                                 x = x >> 1;
3193                                 items = items << 1;
3194                         }
3195                 }
3196                 filesize += (items + 1) * sizeof(struct dqblk);
3197         }
3198         zonesize += filesize;
3199
3200         /*
3201          * Round up entire zone to a bitmap block's worth.
3202          * The extra space goes to the catalog file and hot file area.
3203          */
3204         temp = zonesize;
3205         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3206         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3207         /*
3208          * If doing the round up for hfs_min_alloc_start would push us past
3209          * allocLimit, then just reset it back to 0.  Though using a value
3210          * bigger than allocLimit would not cause damage in the block allocator
3211          * code, this value could get stored in the volume header and make it out
3212          * to disk, making the volume header technically corrupt.
3213          */
3214         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3215                 hfsmp->hfs_min_alloc_start = 0;
3216         }
3217
3218         if (really_do_it == 0) {
3219                 /* If metadata zone needs to be disabled because the
3220                  * volume was truncated, clear the bit and zero out
3221                  * the values that are no longer needed.
3222                  */
3223                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3224                         /* Disable metadata zone */
3225                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3226
3227                         /* Zero out mount point values that are not required */
3228                         hfsmp->hfs_catalog_maxblks = 0;
3229                         hfsmp->hfs_hotfile_maxblks = 0;
3230                         hfsmp->hfs_hotfile_start = 0;
3231                         hfsmp->hfs_hotfile_end = 0;
3232                         hfsmp->hfs_hotfile_freeblks = 0;
3233                         hfsmp->hfs_metazone_start = 0;
3234                         hfsmp->hfs_metazone_end = 0;
3235                 }
3236
3237                 return;
3238         }
3239
3240         temp = zonesize - temp;  /* temp has extra space */
3241         filesize += temp / 3;
3242         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3243
3244         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3245                 hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
3246         } else {
3247                 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3248         }
3249
3250         /* Convert to allocation blocks. */
3251         blk = zonesize / vcb->blockSize;
3252
3253         /* The default metadata zone location is at the start of volume. */
3254         hfsmp->hfs_metazone_start = 1;
3255         hfsmp->hfs_metazone_end = blk - 1;
3256
3257         /* The default hotfile area is at the end of the zone. */
3258         if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3259                 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3260                 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3261                 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3262         }
3263         else {
3264                 hfsmp->hfs_hotfile_start = 0;
3265                 hfsmp->hfs_hotfile_end = 0;
3266                 hfsmp->hfs_hotfile_freeblks = 0;
3267         }
3268 #if DEBUG
3269         printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3270         printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3271         printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
3272 #endif
3273
3274         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3275 }
3276
3277
3278 static u_int32_t
3279 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3280 {
3281         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
3282         int  lockflags;
3283         int  freeblocks;
3284
3285         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3286                 //
3287                 // This is only used at initialization time and on an ssd
3288                 // we'll get the real info from the hotfile btree user
3289                 // info
3290                 //
3291                 return 0;
3292         }
3293
3294         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3295         freeblocks = MetaZoneFreeBlocks(vcb);
3296         hfs_systemfile_unlock(hfsmp, lockflags);
3297
3298         /* Minus Extents overflow file reserve. */
3299         if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
3300                 freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3301         }
3302
3303         /* Minus catalog file reserve. */
3304         if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
3305                 freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3306         }
3307
3308         if (freeblocks < 0)
3309                 freeblocks = 0;
3310
3311         // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
3312         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3313 }
3314
3315 /*
3316  * Determine if a file is a "virtual" metadata file.
3317  * This includes journal and quota files.
3318  */
3319 int
3320 hfs_virtualmetafile(struct cnode *cp)
3321 {
3322         const char * filename;
3323
3324
3325         if (cp->c_parentcnid != kHFSRootFolderID)
3326                 return (0);
3327
3328         filename = (const char *)cp->c_desc.cd_nameptr;
3329         if (filename == NULL)
3330                 return (0);
3331
3332         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3333             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3334             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3335             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3336             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3337                 return (1);
3338
3339         return (0);
3340 }
3341
3342 __private_extern__
3343 void hfs_syncer_lock(struct hfsmount *hfsmp)
3344 {
3345     hfs_lock_mount(hfsmp);
3346 }
3347
3348 __private_extern__
3349 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3350 {
3351     hfs_unlock_mount(hfsmp);
3352 }
3353
3354 __private_extern__
3355 void hfs_syncer_wait(struct hfsmount *hfsmp)
3356 {
3357     msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3358            "hfs_syncer_wait", NULL);
3359 }
3360
3361 __private_extern__
3362 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3363 {
3364     wakeup(&hfsmp->hfs_sync_incomplete);
3365 }
3366
3367 __private_extern__
3368 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3369 {
3370     uint64_t deadline;
3371     clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3372     return deadline;
3373 }
3374
3375 __private_extern__
3376 void hfs_syncer_queue(thread_call_t syncer)
3377 {
3378     if (thread_call_enter_delayed_with_leeway(syncer,
3379                                               NULL,
3380                                               hfs_usecs_to_deadline(HFS_META_DELAY),
3381                                               0,
3382                                               THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3383                 printf("hfs: syncer already scheduled!\n");
3384     }
3385 }
3386
3387 //
3388 // Fire off a timed callback to sync the disk if the
3389 // volume is on ejectable media.
3390 //
3391  __private_extern__
3392 void
3393 hfs_sync_ejectable(struct hfsmount *hfsmp)
3394 {
3395     // If we don't have a syncer or we get called by the syncer, just return
3396     if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3397         return;
3398
3399     hfs_syncer_lock(hfsmp);
3400
3401     if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3402         microuptime(&hfsmp->hfs_sync_req_oldest);
3403
3404     /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3405        don't want to queue again if there is a sync outstanding. */
3406     if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3407         hfs_syncer_unlock(hfsmp);
3408         return;
3409     }
3410
3411     hfsmp->hfs_sync_incomplete = TRUE;
3412
3413     thread_call_t syncer = hfsmp->hfs_syncer;
3414
3415     hfs_syncer_unlock(hfsmp);
3416
3417     hfs_syncer_queue(syncer);
3418 }
3419
3420 int
3421 hfs_start_transaction(struct hfsmount *hfsmp)
3422 {
3423         int ret = 0, unlock_on_err = 0;
3424         thread_t thread = current_thread();
3425
3426 #ifdef HFS_CHECK_LOCK_ORDER
3427         /*
3428          * You cannot start a transaction while holding a system
3429          * file lock. (unless the transaction is nested.)
3430          */
3431         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3432                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3433                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3434                 }
3435                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3436                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3437                 }
3438                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3439                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3440                 }
3441         }
3442 #endif /* HFS_CHECK_LOCK_ORDER */
3443
3444 again:
3445
3446         if (hfsmp->jnl) {
3447                 if (journal_owner(hfsmp->jnl) != thread) {
3448                         /*
3449                          * The global lock should be held shared if journal is
3450                          * active to prevent disabling.  If we're not the owner
3451                          * of the journal lock, verify that we're not already
3452                          * holding the global lock exclusive before moving on.
3453                          */
3454                         if (hfsmp->hfs_global_lockowner == thread) {
3455                                 ret = EBUSY;
3456                                 goto out;
3457                         }
3458
3459                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3460
3461                         // Things could have changed
3462                         if (!hfsmp->jnl) {
3463                                 hfs_unlock_global(hfsmp);
3464                                 goto again;
3465                         }
3466
3467                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3468                         unlock_on_err = 1;
3469                 }
3470         } else {
3471                 // No journal
3472                 if (hfsmp->hfs_global_lockowner != thread) {
3473                         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3474
3475                         // Things could have changed
3476                         if (hfsmp->jnl) {
3477                                 hfs_unlock_global(hfsmp);
3478                                 goto again;
3479                         }
3480
3481                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3482                         unlock_on_err = 1;
3483                 }
3484         }
3485
3486         /* If a downgrade to read-only mount is in progress, no other
3487          * thread than the downgrade thread is allowed to modify
3488          * the file system.
3489          */
3490         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3491             hfsmp->hfs_downgrading_thread != thread) {
3492                 ret = EROFS;
3493                 goto out;
3494         }
3495
3496         if (hfsmp->jnl) {
3497                 ret = journal_start_transaction(hfsmp->jnl);
3498         } else {
3499                 ret = 0;
3500         }
3501
3502         if (ret == 0)
3503                 ++hfsmp->hfs_transaction_nesting;
3504
3505 out:
3506         if (ret != 0 && unlock_on_err) {
3507                 hfs_unlock_global (hfsmp);
3508                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3509         }
3510
3511     return ret;
3512 }
3513
3514 int
3515 hfs_end_transaction(struct hfsmount *hfsmp)
3516 {
3517     int ret;
3518
3519         assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
3520         assert(hfsmp->hfs_transaction_nesting > 0);
3521
3522         if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
3523                 hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
3524
3525         bool need_unlock = !--hfsmp->hfs_transaction_nesting;
3526
3527         if (hfsmp->jnl) {
3528                 ret = journal_end_transaction(hfsmp->jnl);
3529         } else {
3530                 ret = 0;
3531         }
3532
3533         if (need_unlock) {
3534                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3535                 hfs_unlock_global (hfsmp);
3536                 hfs_sync_ejectable(hfsmp);
3537         }
3538
3539     return ret;
3540 }
3541
3542
3543 void
3544 hfs_journal_lock(struct hfsmount *hfsmp)
3545 {
3546         /* Only peek at hfsmp->jnl while holding the global lock */
3547         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3548         if (hfsmp->jnl) {
3549                 journal_lock(hfsmp->jnl);
3550         }
3551         hfs_unlock_global (hfsmp);
3552 }
3553
3554 void
3555 hfs_journal_unlock(struct hfsmount *hfsmp)
3556 {
3557         /* Only peek at hfsmp->jnl while holding the global lock */
3558         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3559         if (hfsmp->jnl) {
3560                 journal_unlock(hfsmp->jnl);
3561         }
3562         hfs_unlock_global (hfsmp);
3563 }
3564
3565 /*
3566  * Flush the contents of the journal to the disk.
3567  *
3568  *  - HFS_FLUSH_JOURNAL
3569  *      Wait to write in-memory journal to the disk consistently.
3570  *      This means that the journal still contains uncommitted
3571  *      transactions and the file system metadata blocks in
3572  *      the journal transactions might be written asynchronously
3573  *      to the disk.  But there is no guarantee that they are
3574  *      written to the disk before returning to the caller.
3575  *      Note that this option is sufficient for file system
3576  *      data integrity as it guarantees consistent journal
3577  *      content on the disk.
3578  *
3579  *  - HFS_FLUSH_JOURNAL_META
3580  *      Wait to write in-memory journal to the disk
3581  *      consistently, and also wait to write all asynchronous
3582  *      metadata blocks to its corresponding locations
3583  *      consistently on the disk. This is overkill in normal
3584  *      scenarios but is useful whenever the metadata blocks
3585  *      are required to be consistent on-disk instead of
3586  *      just the journalbeing consistent; like before live
3587  *      verification and live volume resizing.  The update of the
3588  *      metadata doesn't include a barrier of track cache flush.
3589  *
3590  *  - HFS_FLUSH_FULL
3591  *      HFS_FLUSH_JOURNAL + force a track cache flush to media
3592  *
3593  *  - HFS_FLUSH_CACHE
3594  *      Force a track cache flush to media.
3595  *
3596  *  - HFS_FLUSH_BARRIER
3597  *      Barrier-only flush to ensure write order
3598  *
3599  */
3600 errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
3601 {
3602         errno_t error = 0;
3603         journal_flush_options_t options = 0;
3604         dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
3605
3606         switch (mode) {
3607                 case HFS_FLUSH_JOURNAL_META:
3608                         // wait for journal, metadata blocks and previous async flush to finish
3609                         SET(options, JOURNAL_WAIT_FOR_IO);
3610
3611                         // no break
3612
3613                 case HFS_FLUSH_JOURNAL:
3614                 case HFS_FLUSH_JOURNAL_BARRIER:
3615                 case HFS_FLUSH_FULL:
3616
3617                         if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
3618                             !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3619                                 mode = HFS_FLUSH_FULL;
3620
3621                         if (mode == HFS_FLUSH_FULL)
3622                                 SET(options, JOURNAL_FLUSH_FULL);
3623
3624                         /* Only peek at hfsmp->jnl while holding the global lock */
3625                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3626
3627                         if (hfsmp->jnl)
3628                                 error = journal_flush(hfsmp->jnl, options);
3629
3630                         hfs_unlock_global (hfsmp);
3631
3632                         /*
3633                          * This may result in a double barrier as
3634                          * journal_flush may have issued a barrier itself
3635                          */
3636                         if (mode == HFS_FLUSH_JOURNAL_BARRIER)
3637                                 error = VNOP_IOCTL(hfsmp->hfs_devvp,
3638                                     DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3639                                     FWRITE, vfs_context_kernel());
3640
3641                         break;
3642
3643                 case HFS_FLUSH_CACHE:
3644                         // Do a full sync
3645                         sync_req.options = 0;
3646
3647                         // no break
3648
3649                 case HFS_FLUSH_BARRIER:
3650                         // If barrier only flush doesn't support, fall back to use full flush.
3651                         if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3652                                 sync_req.options = 0;
3653
3654                         error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3655                                            FWRITE, vfs_context_kernel());
3656                         break;
3657
3658                 default:
3659                         error = EINVAL;
3660         }
3661
3662         return error;
3663 }
3664
3665 /*
3666  * hfs_erase_unused_nodes
3667  *
3668  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3669  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3670  * zeroes to the unused nodes.
3671  *
3672  * How do we detect when a volume needs this repair?  We can't always be
3673  * certain.  If a volume was created after a certain date, then it may have
3674  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3675  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3676  * that means that the entire first clump must have been written to, which means
3677  * there shouldn't be unused and unwritten nodes in that first clump, and this
3678  * repair is not needed.
3679  *
3680  * We have defined a bit in the Volume Header's attributes to indicate when the
3681  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3682  * As will fsck_hfs when it repairs the unused nodes.
3683  */
3684 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3685 {
3686         int result;
3687         struct filefork *catalog;
3688         int lockflags;
3689
3690         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3691         {
3692                 /* This volume has already been checked and repaired. */
3693                 return 0;
3694         }
3695
3696         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3697         {
3698                 /* This volume is too old to have had the problem. */
3699                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3700                 return 0;
3701         }
3702
3703         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3704         if (catalog->ff_size > catalog->ff_clumpsize)
3705         {
3706                 /* The entire first clump must have been in use at some point. */
3707                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3708                 return 0;
3709         }
3710
3711         /*
3712          * If we get here, we need to zero out those unused nodes.
3713          *
3714          * We start a transaction and lock the catalog since we're going to be
3715          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3716          * do its writing via the journal, because that would be too much I/O
3717          * to fit in a transaction, and it's a pain to break it up into multiple
3718          * transactions.  (It behaves more like growing a B-tree would.)
3719          */
3720         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3721         result = hfs_start_transaction(hfsmp);
3722         if (result)
3723                 goto done;
3724         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3725         result = BTZeroUnusedNodes(catalog);
3726         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3727         hfs_systemfile_unlock(hfsmp, lockflags);
3728         hfs_end_transaction(hfsmp);
3729         if (result == 0)
3730                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3731         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3732
3733 done:
3734         return result;
3735 }
3736
3737
3738 extern time_t snapshot_timestamp;
3739
3740 int
3741 check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3742 {
3743         int snapshot_error = 0;
3744
3745         if (vp == NULL) {
3746                 return 0;
3747         }
3748
3749         /* Swap files are special; skip them */
3750         if (vnode_isswap(vp)) {
3751                 return 0;
3752         }
3753
3754         if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3755                 // the change time is within this epoch
3756                 int error;
3757
3758                 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3759                 if (error == EDEADLK) {
3760                         snapshot_error = 0;
3761                 } else if (error) {
3762                         if (error == EAGAIN) {
3763                                 printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3764                         } else if (error == EINTR) {
3765                                 // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3766                                 snapshot_error = EINTR;
3767                         }
3768                 }
3769         }
3770
3771         if (snapshot_error) return snapshot_error;
3772
3773         return 0;
3774 }
3775
3776 int
3777 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3778 {
3779         int error;
3780
3781         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3782                 // there's nothing to do, it's not dataless
3783                 return 0;
3784         }
3785
3786         /* Swap files are special; ignore them */
3787         if (vnode_isswap(vp)) {
3788                 return 0;
3789         }
3790
3791         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3792         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3793         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3794                 error = 0;
3795         } else if (error) {
3796                 if (error == EAGAIN) {
3797                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3798                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3799                         return 0;
3800                 } else if (error == EINTR) {
3801                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3802                         return EINTR;
3803                 }
3804         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3805                 //
3806                 // if we're here, the dataless bit is still set on the file
3807                 // which means it didn't get handled.  we return an error
3808                 // but it's presently ignored by all callers of this function.
3809                 //
3810                 // XXXdbg - EDATANOTPRESENT is what we really need...
3811                 //
3812                 return EBADF;
3813         }
3814
3815         return error;
3816 }
3817
3818
3819 //
3820 // NOTE: this function takes care of starting a transaction and
3821 //       acquiring the systemfile lock so that it can call
3822 //       cat_update().
3823 //
3824 // NOTE: do NOT hold and cnode locks while calling this function
3825 //       to avoid deadlocks (because we take a lock on the root
3826 //       cnode)
3827 //
3828 int
3829 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3830 {
3831         struct vnode *rvp;
3832         struct cnode *cp;
3833         int error;
3834
3835         error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3836         if (error) {
3837                 return error;
3838         }
3839
3840         cp = VTOC(rvp);
3841         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3842                 return error;
3843         }
3844         struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3845
3846         int lockflags;
3847         if ((error = hfs_start_transaction(hfsmp)) != 0) {
3848                 return error;
3849         }
3850         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3851
3852         if (extinfo->document_id == 0) {
3853                 // initialize this to start at 3 (one greater than the root-dir id)
3854                 extinfo->document_id = 3;
3855         }
3856
3857         *docid = extinfo->document_id++;
3858
3859         // mark the root cnode dirty
3860         cp->c_flag |= C_MODIFIED;
3861         hfs_update(cp->c_vp, 0);
3862
3863         hfs_systemfile_unlock (hfsmp, lockflags);
3864         (void) hfs_end_transaction(hfsmp);
3865
3866         (void) hfs_unlock(cp);
3867
3868         vnode_put(rvp);
3869         rvp = NULL;
3870
3871         return 0;
3872 }
3873
3874
3875 /*
3876  * Return information about number of file system allocation blocks
3877  * taken by metadata on a volume.
3878  *
3879  * This function populates struct hfsinfo_metadata with allocation blocks
3880  * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3881  * journal file, and sum of all of the above.
3882  */
3883 int
3884 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3885 {
3886         int lockflags = 0;
3887         int ret_lockflags = 0;
3888
3889         /* Zero out the output buffer */
3890         bzero(hinfo, sizeof(struct hfsinfo_metadata));
3891
3892         /*
3893          * Getting number of allocation blocks for all btrees
3894          * should be a quick operation, so we grab locks for
3895          * all of them at the same time
3896          */
3897         lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3898         ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3899         /*
3900          * Make sure that we were able to acquire all locks requested
3901          * to protect us against conditions like unmount in progress.
3902          */
3903         if ((lockflags & ret_lockflags) != lockflags) {
3904                 /* Release any locks that were acquired */
3905                 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3906                 return EPERM;
3907         }
3908
3909         /* Get information about all the btrees */
3910         hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3911         hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3912         hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3913         hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3914
3915         /* Done with btrees, give up the locks */
3916         hfs_systemfile_unlock(hfsmp, ret_lockflags);
3917
3918         /* Get information about journal file */
3919         hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3920
3921         /* Calculate total number of metadata blocks */
3922         hinfo->total = hinfo->extents + hinfo->catalog +
3923                         hinfo->allocation + hinfo->attribute +
3924                         hinfo->journal;
3925
3926         return 0;
3927 }
3928
3929 static int
3930 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3931 {
3932         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3933
3934         return 0;
3935 }
3936
3937 __private_extern__
3938 int hfs_freeze(struct hfsmount *hfsmp)
3939 {
3940         // First make sure some other process isn't freezing
3941         hfs_lock_mount(hfsmp);
3942         while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3943                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3944                                    PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3945                         hfs_unlock_mount(hfsmp);
3946                         return EINTR;
3947                 }
3948         }
3949
3950         // Stop new syncers from starting
3951         hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3952
3953         // Now wait for all syncers to finish
3954         while (hfsmp->hfs_syncers) {
3955                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3956                            PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3957                         hfs_thaw_locked(hfsmp);
3958                         hfs_unlock_mount(hfsmp);
3959                         return EINTR;
3960                 }
3961         }
3962         hfs_unlock_mount(hfsmp);
3963
3964         // flush things before we get started to try and prevent
3965         // dirty data from being paged out while we're frozen.
3966         // note: we can't do this once we're in the freezing state because
3967         // other threads will need to take the global lock
3968         vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3969
3970         // Block everything in hfs_lock_global now
3971         hfs_lock_mount(hfsmp);
3972         hfsmp->hfs_freeze_state = HFS_FREEZING;
3973         hfsmp->hfs_freezing_thread = current_thread();
3974         hfs_unlock_mount(hfsmp);
3975
3976         /* Take the exclusive lock to flush out anything else that
3977            might have the global lock at the moment and also so we
3978            can flush the journal. */
3979         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3980         journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
3981         hfs_unlock_global(hfsmp);
3982
3983         // don't need to iterate on all vnodes, we just need to
3984         // wait for writes to the system files and the device vnode
3985         //
3986         // Now that journal flush waits for all metadata blocks to
3987         // be written out, waiting for btree writes is probably no
3988         // longer required.
3989         if (HFSTOVCB(hfsmp)->extentsRefNum)
3990                 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3991         if (HFSTOVCB(hfsmp)->catalogRefNum)
3992                 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3993         if (HFSTOVCB(hfsmp)->allocationsRefNum)
3994                 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3995         if (hfsmp->hfs_attribute_vp)
3996                 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3997         vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3998
3999         // We're done, mark frozen
4000         hfs_lock_mount(hfsmp);
4001         hfsmp->hfs_freeze_state  = HFS_FROZEN;
4002         hfsmp->hfs_freezing_proc = current_proc();
4003         hfs_unlock_mount(hfsmp);
4004
4005         return 0;
4006 }
4007
4008 __private_extern__
4009 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
4010 {
4011         hfs_lock_mount(hfsmp);
4012
4013         if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
4014                 hfs_unlock_mount(hfsmp);
4015                 return EINVAL;
4016         }
4017         if (process && hfsmp->hfs_freezing_proc != process) {
4018                 hfs_unlock_mount(hfsmp);
4019                 return EPERM;
4020         }
4021
4022         hfs_thaw_locked(hfsmp);
4023
4024         hfs_unlock_mount(hfsmp);
4025
4026         return 0;
4027 }
4028
4029 static void hfs_thaw_locked(struct hfsmount *hfsmp)
4030 {
4031         hfsmp->hfs_freezing_proc = NULL;
4032         hfsmp->hfs_freeze_state = HFS_THAWED;
4033
4034         wakeup(&hfsmp->hfs_freeze_state);
4035 }