core/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/vm.h>
  42 #include <sys/buf.h>
  43 #include <sys/ubc.h>
  44 #include <sys/unistd.h>
  45 #include <sys/utfconv.h>
  46 #include <sys/kauth.h>
  47 #include <sys/fcntl.h>
  48 #include <sys/fsctl.h>
  49 #include <sys/mount.h>
  50 #include <sys/sysctl.h>
  51 #include <kern/clock.h>
  52 #include <stdbool.h>
  53 #include <miscfs/specfs/specdev.h>
  54 #include <libkern/OSAtomic.h>
  55 #include <IOKit/IOLib.h>
  56
  57 /* for parsing boot-args */
  58 #include <pexpert/pexpert.h>
  59 #include <kern/kalloc.h>
  60
  61 #include "hfs_iokit.h"
  62 #include "hfs.h"
  63 #include "hfs_catalog.h"
  64 #include "hfs_dbg.h"
  65 #include "hfs_mount.h"
  66 #include "hfs_endian.h"
  67 #include "hfs_cnode.h"
  68 #include "hfs_fsctl.h"
  69 #include "hfs_cprotect.h"
  70
  71 #include "FileMgrInternal.h"
  72 #include "BTreesInternal.h"
  73 #include "HFSUnicodeWrappers.h"
  74
  75 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
  76 extern int hfs_resize_debug;
  77
  78 static void ReleaseMetaFileVNode(struct vnode *vp);
  79 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  80
  81 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  82 static void hfs_thaw_locked(struct hfsmount *hfsmp);
  83
  84 #define HFS_MOUNT_DEBUG 1
  85
  86
  87 //*******************************************************************************
  88 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  89 //       hence are not in the right byte order on little endian machines. It is
  90 //       the responsibility of the finder and other clients to swap the data.
  91 //*******************************************************************************
  92
  93 //*******************************************************************************
  94 //      Routine:        hfs_MountHFSVolume
  95 //
  96 //
  97 //*******************************************************************************
  98 unsigned char hfs_catname[] = "Catalog B-tree";
  99 unsigned char hfs_extname[] = "Extents B-tree";
 100 unsigned char hfs_vbmname[] = "Volume Bitmap";
 101 unsigned char hfs_attrname[] = "Attribute B-tree";
 102 unsigned char hfs_startupname[] = "Startup File";
 103
 104 #if CONFIG_HFS_STD
 105 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 106                 __unused struct proc *p)
 107 {
 108         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 109         int error;
 110         ByteCount utf8chars;
 111         struct cat_desc cndesc;
 112         struct cat_attr cnattr;
 113         struct cat_fork fork;
 114         int newvnode_flags = 0;
 115
 116         /* Block size must be a multiple of 512 */
 117         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 118             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 119                 return (EINVAL);
 120
 121         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 122         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 123             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 124                 return (EINVAL);
 125         }
 126         hfsmp->hfs_flags |= HFS_STANDARD;
 127         /*
 128          * The MDB seems OK: transfer info from it into VCB
 129          * Note - the VCB starts out clear (all zeros)
 130          *
 131          */
 132         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 133         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 134         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 135         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 136         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 137         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 138         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 139         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 140         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 141         vcb->allocLimit         = vcb->totalBlocks;
 142         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 143         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 144         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 145         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 146         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 147         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 148         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 149         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 150         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 151         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 152         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 153         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 154                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 155
 156         /* convert hfs encoded name into UTF-8 string */
 157         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 158         /*
 159          * When an HFS name cannot be encoded with the current
 160          * volume encoding we use MacRoman as a fallback.
 161          */
 162         if (error || (utf8chars == 0)) {
 163                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 164                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 165                 if (error) {
 166                         goto MtVolErr;
 167                 }
 168         }
 169
 170         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 171         vcb->vcbVBMIOSize = kHFSBlockSize;
 172
 173         /* Generate the partition-based AVH location */
 174         hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 175                                                   hfsmp->hfs_logical_block_count);
 176
 177         /* HFS standard is read-only, so just stuff the FS location in here, too */
 178         hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 179
 180         bzero(&cndesc, sizeof(cndesc));
 181         cndesc.cd_parentcnid = kHFSRootParentID;
 182         cndesc.cd_flags |= CD_ISMETA;
 183         bzero(&cnattr, sizeof(cnattr));
 184         cnattr.ca_linkcount = 1;
 185         cnattr.ca_mode = S_IFREG;
 186         bzero(&fork, sizeof(fork));
 187
 188         /*
 189          * Set up Extents B-tree vnode
 190          */
 191         cndesc.cd_nameptr = hfs_extname;
 192         cndesc.cd_namelen = strlen((char *)hfs_extname);
 193         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 194         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 195         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 196         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 197         fork.cf_vblocks = 0;
 198         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 199         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 200         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 201         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 202         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 203         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 204         cnattr.ca_blocks = fork.cf_blocks;
 205
 206         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 207                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 208         if (error) {
 209                 if (HFS_MOUNT_DEBUG) {
 210                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 211                 }
 212                 goto MtVolErr;
 213         }
 214         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 215                                          (KeyCompareProcPtr)CompareExtentKeys));
 216         if (error) {
 217                 if (HFS_MOUNT_DEBUG) {
 218                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 219                 }
 220                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 221                 goto MtVolErr;
 222         }
 223         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 224
 225         /*
 226          * Set up Catalog B-tree vnode...
 227          */
 228         cndesc.cd_nameptr = hfs_catname;
 229         cndesc.cd_namelen = strlen((char *)hfs_catname);
 230         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 231         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 232         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 233         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 234         fork.cf_vblocks = 0;
 235         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 236         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 237         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 238         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 239         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 240         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 241         cnattr.ca_blocks = fork.cf_blocks;
 242
 243         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 244                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 245         if (error) {
 246                 if (HFS_MOUNT_DEBUG) {
 247                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 248                 }
 249                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 250                 goto MtVolErr;
 251         }
 252         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 253                                          (KeyCompareProcPtr)CompareCatalogKeys));
 254         if (error) {
 255                 if (HFS_MOUNT_DEBUG) {
 256                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 257                 }
 258                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 259                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 260                 goto MtVolErr;
 261         }
 262         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 263
 264         /*
 265          * Set up dummy Allocation file vnode (used only for locking bitmap)
 266          */
 267         cndesc.cd_nameptr = hfs_vbmname;
 268         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 269         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 270         bzero(&fork, sizeof(fork));
 271         cnattr.ca_blocks = 0;
 272
 273         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 274                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 275         if (error) {
 276                 if (HFS_MOUNT_DEBUG) {
 277                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 278                 }
 279                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 280                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 281                 goto MtVolErr;
 282         }
 283         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 284
 285         /* mark the volume dirty (clear clean unmount bit) */
 286         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 287
 288     if (error == noErr) {
 289                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 290                 if (HFS_MOUNT_DEBUG) {
 291                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 292                 }
 293         }
 294
 295     if (error == noErr) {
 296                 /* If the disk isn't write protected.. */
 297         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 298             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 299                 }
 300         }
 301
 302         /*
 303          * all done with system files so we can unlock now...
 304          */
 305         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 306         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 307         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 308
 309         if (error == noErr) {
 310                 /* If successful, then we can just return once we've unlocked the cnodes */
 311                 return error;
 312         }
 313
 314     //--        Release any resources allocated so far before exiting with an error:
 315 MtVolErr:
 316         hfsUnmount(hfsmp, NULL);
 317
 318     return (error);
 319 }
 320
 321 #endif
 322
 323 //*******************************************************************************
 324 //
 325 // Sanity check Volume Header Block:
 326 //              Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
 327 //              not been endian-swapped and represents the on-disk contents of this sector.
 328 //              This routine will not change the endianness of vhp block.
 329 //
 330 //*******************************************************************************
 331 OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
 332 {
 333         u_int16_t signature;
 334         u_int16_t hfs_version;
 335         u_int32_t blockSize;
 336
 337         signature = SWAP_BE16(vhp->signature);
 338         hfs_version = SWAP_BE16(vhp->version);
 339
 340         if (signature == kHFSPlusSigWord) {
 341                 if (hfs_version != kHFSPlusVersion) {
 342                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
 343                         return (EINVAL);
 344                 }
 345         } else if (signature == kHFSXSigWord) {
 346                 if (hfs_version != kHFSXVersion) {
 347                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
 348                         return (EINVAL);
 349                 }
 350         } else {
 351                 /* Removed printf for invalid HFS+ signature because it gives
 352                  * false error for UFS root volume
 353                  */
 354                 if (HFS_MOUNT_DEBUG) {
 355                         printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
 356                 }
 357                 return (EINVAL);
 358         }
 359
 360         /* Block size must be at least 512 and a power of 2 */
 361         blockSize = SWAP_BE32(vhp->blockSize);
 362         if (blockSize < 512 || !powerof2(blockSize)) {
 363                 if (HFS_MOUNT_DEBUG) {
 364                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
 365                 }
 366                 return (EINVAL);
 367         }
 368
 369         if (blockSize < hfsmp->hfs_logical_block_size) {
 370                 if (HFS_MOUNT_DEBUG) {
 371                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 372                                         blockSize, hfsmp->hfs_logical_block_size);
 373                 }
 374                 return (EINVAL);
 375         }
 376         return 0;
 377 }
 378
 379 //*******************************************************************************
 380 //      Routine:        hfs_MountHFSPlusVolume
 381 //
 382 //
 383 //*******************************************************************************
 384
 385 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 386         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 387 {
 388         register ExtendedVCB *vcb;
 389         struct cat_desc cndesc;
 390         struct cat_attr cnattr;
 391         struct cat_fork cfork;
 392         u_int32_t blockSize;
 393         daddr64_t spare_sectors;
 394         struct BTreeInfoRec btinfo;
 395         u_int16_t  signature;
 396         u_int16_t  hfs_version;
 397         int newvnode_flags = 0;
 398         int  i;
 399         OSErr retval;
 400         char converted_volname[256];
 401         size_t volname_length = 0;
 402         size_t conv_volname_length = 0;
 403
 404         signature = SWAP_BE16(vhp->signature);
 405         hfs_version = SWAP_BE16(vhp->version);
 406
 407         retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
 408         if (retval)
 409                 return retval;
 410
 411         if (signature == kHFSXSigWord) {
 412                 /* The in-memory signature is always 'H+'. */
 413                 signature = kHFSPlusSigWord;
 414                 hfsmp->hfs_flags |= HFS_X;
 415         }
 416
 417         blockSize = SWAP_BE32(vhp->blockSize);
 418         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 419         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 420             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 421                 if (HFS_MOUNT_DEBUG) {
 422                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 423                 }
 424                 return (EINVAL);
 425         }
 426
 427         /* Make sure we can live with the physical block size. */
 428         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 429             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
 430                 if (HFS_MOUNT_DEBUG) {
 431                         printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
 432                                         hfsmp->hfs_logical_block_size);
 433                 }
 434                 return (ENXIO);
 435         }
 436
 437         /*
 438          * If allocation block size is less than the physical block size,
 439          * same data could be cached in two places and leads to corruption.
 440          *
 441          * HFS Plus reserves one allocation block for the Volume Header.
 442          * If the physical size is larger, then when we read the volume header,
 443          * we will also end up reading in the next allocation block(s).
 444          * If those other allocation block(s) is/are modified, and then the volume
 445          * header is modified, the write of the volume header's buffer will write
 446          * out the old contents of the other allocation blocks.
 447          *
 448          * We assume that the physical block size is same as logical block size.
 449          * The physical block size value is used to round down the offsets for
 450          * reading and writing the primary and alternate volume headers.
 451          *
 452          * The same logic to ensure good hfs_physical_block_size is also in
 453          * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
 454          * later are doing the I/Os using same block size.
 455          */
 456         if (blockSize < hfsmp->hfs_physical_block_size) {
 457                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 458                 hfsmp->hfs_log_per_phys = 1;
 459         }
 460
 461         /*
 462          * The VolumeHeader seems OK: transfer info from it into VCB
 463          * Note - the VCB starts out clear (all zeros)
 464          */
 465         vcb = HFSTOVCB(hfsmp);
 466
 467         vcb->vcbSigWord = signature;
 468         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 469         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 470         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 471         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 472         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 473         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 474         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 475         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 476         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 477
 478         /* copy 32 bytes of Finder info */
 479         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 480
 481         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 482         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 483                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 484
 485         /* Now fill in the Extended VCB info */
 486         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 487         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 488         vcb->allocLimit         = vcb->totalBlocks;
 489         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 490         vcb->blockSize          = blockSize;
 491         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 492         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 493
 494         vcb->hfsPlusIOPosOffset = embeddedOffset;
 495
 496         /* Default to no free block reserve */
 497         vcb->reserveBlocks = 0;
 498
 499         /*
 500          * Update the logical block size in the mount struct
 501          * (currently set up from the wrapper MDB) using the
 502          * new blocksize value:
 503          */
 504         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 505         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 506
 507         /*
 508          * Validate and initialize the location of the alternate volume header.
 509          *
 510          * Note that there may be spare sectors beyond the end of the filesystem that still
 511          * belong to our partition.
 512          */
 513
 514         spare_sectors = hfsmp->hfs_logical_block_count -
 515                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 516                            hfsmp->hfs_logical_block_size);
 517
 518         /*
 519          * Differentiate between "innocuous" spare sectors and the more unusual
 520          * degenerate case:
 521          *
 522          * *** Innocuous spare sectors exist if:
 523          *
 524          * A) the number of bytes assigned to the partition (by multiplying logical
 525          * block size * logical block count) is greater than the filesystem size
 526          * (by multiplying allocation block count and allocation block size)
 527          *
 528          * and
 529          *
 530          * B) the remainder is less than the size of a full allocation block's worth of bytes.
 531          *
 532          * This handles the normal case where there may be a few extra sectors, but the two
 533          * are fundamentally in sync.
 534          *
 535          * *** Degenerate spare sectors exist if:
 536          * A) The number of bytes assigned to the partition (by multiplying logical
 537          * block size * logical block count) is greater than the filesystem size
 538          * (by multiplying allocation block count and block size).
 539          *
 540          * and
 541          *
 542          * B) the remainder is greater than a full allocation's block worth of bytes.
 543          * In this case,  a smaller file system exists in a larger partition.
 544          * This can happen in various ways, including when volume is resized but the
 545          * partition is yet to be resized.  Under this condition, we have to assume that
 546          * a partition management software may resize the partition to match
 547          * the file system size in the future.  Therefore we should update
 548          * alternate volume header at two locations on the disk,
 549          *   a. 1024 bytes before end of the partition
 550          *   b. 1024 bytes before end of the file system
 551          */
 552
 553         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 554                 /*
 555                  * Handle the degenerate case above. FS < partition size.
 556                  * AVH located at 1024 bytes from the end of the partition
 557                  */
 558                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 559                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 560
 561                 /* AVH located at 1024 bytes from the end of the filesystem */
 562                 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 563                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 564                                                 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
 565         }
 566         else {
 567                 /* Innocuous spare sectors; Partition & FS notion are in sync */
 568                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 569                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 570
 571                 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 572         }
 573         if (hfs_resize_debug) {
 574                 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 575                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 576         }
 577
 578         bzero(&cndesc, sizeof(cndesc));
 579         cndesc.cd_parentcnid = kHFSRootParentID;
 580         cndesc.cd_flags |= CD_ISMETA;
 581         bzero(&cnattr, sizeof(cnattr));
 582         cnattr.ca_linkcount = 1;
 583         cnattr.ca_mode = S_IFREG;
 584
 585         /*
 586          * Set up Extents B-tree vnode
 587          */
 588         cndesc.cd_nameptr = hfs_extname;
 589         cndesc.cd_namelen = strlen((char *)hfs_extname);
 590         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 591
 592         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 593         cfork.cf_new_size= 0;
 594         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 595         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 596         cfork.cf_vblocks = 0;
 597         cnattr.ca_blocks = cfork.cf_blocks;
 598         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 599                 cfork.cf_extents[i].startBlock =
 600                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 601                 cfork.cf_extents[i].blockCount =
 602                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 603         }
 604         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 605                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 606         if (retval)
 607         {
 608                 if (HFS_MOUNT_DEBUG) {
 609                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 610                 }
 611                 goto ErrorExit;
 612         }
 613
 614         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 615
 616         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 617                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 618
 619         hfs_unlock(hfsmp->hfs_extents_cp);
 620
 621         if (retval)
 622         {
 623                 if (HFS_MOUNT_DEBUG) {
 624                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 625                 }
 626                 goto ErrorExit;
 627         }
 628         /*
 629          * Set up Catalog B-tree vnode
 630          */
 631         cndesc.cd_nameptr = hfs_catname;
 632         cndesc.cd_namelen = strlen((char *)hfs_catname);
 633         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 634
 635         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 636         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 637         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 638         cfork.cf_vblocks = 0;
 639         cnattr.ca_blocks = cfork.cf_blocks;
 640         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 641                 cfork.cf_extents[i].startBlock =
 642                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 643                 cfork.cf_extents[i].blockCount =
 644                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 645         }
 646         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 647                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 648         if (retval) {
 649                 if (HFS_MOUNT_DEBUG) {
 650                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 651                 }
 652                 goto ErrorExit;
 653         }
 654         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 655
 656         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 657                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 658
 659         if (retval) {
 660                 if (HFS_MOUNT_DEBUG) {
 661                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 662                 }
 663                 hfs_unlock(hfsmp->hfs_catalog_cp);
 664                 goto ErrorExit;
 665         }
 666         if ((hfsmp->hfs_flags & HFS_X) &&
 667             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 668                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 669                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 670                         /* Install a case-sensitive key compare */
 671                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 672                                           (KeyCompareProcPtr)cat_binarykeycompare);
 673                 }
 674         }
 675
 676         hfs_unlock(hfsmp->hfs_catalog_cp);
 677
 678         /*
 679          * Set up Allocation file vnode
 680          */
 681         cndesc.cd_nameptr = hfs_vbmname;
 682         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 683         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 684
 685         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 686         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 687         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 688         cfork.cf_vblocks = 0;
 689         cnattr.ca_blocks = cfork.cf_blocks;
 690         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 691                 cfork.cf_extents[i].startBlock =
 692                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 693                 cfork.cf_extents[i].blockCount =
 694                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 695         }
 696         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 697                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 698         if (retval) {
 699                 if (HFS_MOUNT_DEBUG) {
 700                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 701                 }
 702                 goto ErrorExit;
 703         }
 704         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 705         hfs_unlock(hfsmp->hfs_allocation_cp);
 706
 707         /*
 708          * Set up Attribute B-tree vnode
 709          */
 710         if (vhp->attributesFile.totalBlocks != 0) {
 711                 cndesc.cd_nameptr = hfs_attrname;
 712                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 713                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 714
 715                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 716                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 717                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 718                 cfork.cf_vblocks = 0;
 719                 cnattr.ca_blocks = cfork.cf_blocks;
 720                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 721                         cfork.cf_extents[i].startBlock =
 722                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 723                         cfork.cf_extents[i].blockCount =
 724                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 725                 }
 726                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 727                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 728                 if (retval) {
 729                         if (HFS_MOUNT_DEBUG) {
 730                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 731                         }
 732                         goto ErrorExit;
 733                 }
 734                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 735                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 736                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 737                 hfs_unlock(hfsmp->hfs_attribute_cp);
 738                 if (retval) {
 739                         if (HFS_MOUNT_DEBUG) {
 740                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 741                         }
 742                         goto ErrorExit;
 743                 }
 744
 745                 /* Initialize vnode for virtual attribute data file that spans the
 746                  * entire file system space for performing I/O to attribute btree
 747                  * We hold iocount on the attrdata vnode for the entire duration
 748                  * of mount (similar to btree vnodes)
 749                  */
 750                 retval = init_attrdata_vnode(hfsmp);
 751                 if (retval) {
 752                         if (HFS_MOUNT_DEBUG) {
 753                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 754                         }
 755                         goto ErrorExit;
 756                 }
 757         }
 758
 759         /*
 760          * Set up Startup file vnode
 761          */
 762         if (vhp->startupFile.totalBlocks != 0) {
 763                 cndesc.cd_nameptr = hfs_startupname;
 764                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 765                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 766
 767                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 768                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 769                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 770                 cfork.cf_vblocks = 0;
 771                 cnattr.ca_blocks = cfork.cf_blocks;
 772                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 773                         cfork.cf_extents[i].startBlock =
 774                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 775                         cfork.cf_extents[i].blockCount =
 776                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 777                 }
 778                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 779                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 780                 if (retval) {
 781                         if (HFS_MOUNT_DEBUG) {
 782                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 783                         }
 784                         goto ErrorExit;
 785                 }
 786                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 787                 hfs_unlock(hfsmp->hfs_startup_cp);
 788         }
 789
 790         /*
 791          * Pick up volume name and create date
 792          *
 793          * Acquiring the volume name should not manipulate the bitmap, only the catalog
 794          * btree and possibly the extents overflow b-tree.
 795          */
 796         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 797         if (retval) {
 798                 if (HFS_MOUNT_DEBUG) {
 799                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 800                 }
 801                 goto ErrorExit;
 802         }
 803         vcb->hfs_itime = cnattr.ca_itime;
 804         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 805         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 806         volname_length = strlen ((const char*)vcb->vcbVN);
 807         cat_releasedesc(&cndesc);
 808
 809         /* Send the volume name down to CoreStorage if necessary */
 810         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 811         if (retval == 0) {
 812                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 813         }
 814
 815         /* reset retval == 0. we don't care about errors in volname conversion */
 816         retval = 0;
 817
 818
 819         /*
 820          * We now always initiate a full bitmap scan even if the volume is read-only because this is
 821          * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
 822          * expects. TRIMs will not be delivered to the underlying media if the volume is not
 823          * read-write though.
 824          */
 825         thread_t allocator_scanner;
 826         hfsmp->scan_var = 0;
 827
 828         /* Take the HFS mount mutex and wait on scan_var */
 829         hfs_lock_mount (hfsmp);
 830
 831         kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
 832
 833         /* Wait until it registers that it's got the appropriate locks (or that it is finished) */
 834         while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
 835                 msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0);
 836         }
 837
 838         hfs_unlock_mount(hfsmp);
 839
 840         thread_deallocate (allocator_scanner);
 841
 842         /* mark the volume dirty (clear clean unmount bit) */
 843         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 844         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 845                 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
 846         }
 847
 848         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 849         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 850                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 851         }
 852
 853         //
 854         // Check if we need to do late journal initialization.  This only
 855         // happens if a previous version of MacOS X (or 9) touched the disk.
 856         // In that case hfs_late_journal_init() will go re-locate the journal
 857         // and journal_info_block files and validate that they're still kosher.
 858         //
 859         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 860                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 861                 && (hfsmp->jnl == NULL)) {
 862
 863                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 864                 if (retval != 0) {
 865                         if (retval == EROFS) {
 866                                 // EROFS is a special error code that means the volume has an external
 867                                 // journal which we couldn't find.  in that case we do not want to
 868                                 // rewrite the volume header - we'll just refuse to mount the volume.
 869                                 if (HFS_MOUNT_DEBUG) {
 870                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 871                                 }
 872                                 retval = EINVAL;
 873                                 goto ErrorExit;
 874                         }
 875
 876                         hfsmp->jnl = NULL;
 877
 878                         // if the journal failed to open, then set the lastMountedVersion
 879                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 880                         // of just bailing out because the volume is journaled.
 881                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 882                                 HFSPlusVolumeHeader *jvhp;
 883                                 daddr64_t mdb_offset;
 884                                 struct buf *bp = NULL;
 885
 886                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 887
 888                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 889
 890                                 bp = NULL;
 891                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 892                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 893                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 894                                 if (retval == 0) {
 895                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 896
 897                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 898                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 899                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 900                                                 buf_bwrite(bp);
 901                                         } else {
 902                                                 buf_brelse(bp);
 903                                         }
 904                                         bp = NULL;
 905                                 } else if (bp) {
 906                                         buf_brelse(bp);
 907                                         // clear this so the error exit path won't try to use it
 908                                         bp = NULL;
 909                             }
 910                         }
 911
 912                         if (HFS_MOUNT_DEBUG) {
 913                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 914                         }
 915                         retval = EINVAL;
 916                         goto ErrorExit;
 917                 } else if (hfsmp->jnl) {
 918                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 919                 }
 920         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 921                 struct cat_attr jinfo_attr, jnl_attr;
 922
 923                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 924                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 925                 }
 926
 927                 // if we're here we need to fill in the fileid's for the
 928                 // journal and journal_info_block.
 929                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 930                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 931                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 932                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 933                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 934                 }
 935
 936                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 937                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 938                 }
 939
 940                 if (hfsmp->jnl == NULL) {
 941                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 942                 }
 943         }
 944
 945         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 946         {
 947                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 948         }
 949
 950         if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
 951                 hfs_pin_fs_metadata(hfsmp);
 952         }
 953         /*
 954          * Distinguish 3 potential cases involving content protection:
 955          * 1. mount point bit set; vcbAtrb does not support it. Fail.
 956          * 2. mount point bit set; vcbattrb supports it. we're good.
 957          * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
 958          */
 959         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 960                 /* Does the mount point support it ? */
 961                 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
 962                         /* Case 1 above */
 963                         retval = EINVAL;
 964                         goto ErrorExit;
 965                 }
 966         }
 967         else {
 968                 /* not requested in the mount point. Is it in FS? */
 969                 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
 970                         /* Case 3 above */
 971                         vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
 972                 }
 973         }
 974
 975         /* At this point, if the mount point flag is set, we can enable it. */
 976         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 977                 /* Cases 2+3 above */
 978 #if CONFIG_PROTECT
 979                 /* Get the EAs as needed. */
 980                 int cperr = 0;
 981                 struct cp_root_xattr *xattr = NULL;
 982                 xattr = hfs_malloc(sizeof(*xattr));
 983
 984                 /* go get the EA to get the version information */
 985                 cperr = cp_getrootxattr (hfsmp, xattr);
 986                 /*
 987                  * If there was no EA there, then write one out.
 988                  * Assuming EA is not present on the root means
 989                  * this is an erase install or a very old FS
 990                  */
 991
 992                 if (cperr == 0) {
 993                         /* Have to run a valid CP version. */
 994                         if (!cp_is_supported_version(xattr->major_version)) {
 995                                 cperr = EINVAL;
 996                         }
 997                 }
 998                 else if (cperr == ENOATTR) {
 999                         printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
1000                         bzero(xattr, sizeof(struct cp_root_xattr));
1001                         xattr->major_version = CP_CURRENT_VERS;
1002                         xattr->minor_version = CP_MINOR_VERS;
1003                         cperr = cp_setrootxattr (hfsmp, xattr);
1004                 }
1005
1006                 if (cperr) {
1007                         hfs_free(xattr, sizeof(*xattr));
1008                         retval = EPERM;
1009                         goto ErrorExit;
1010                 }
1011
1012                 /* If we got here, then the CP version is valid. Set it in the mount point */
1013                 hfsmp->hfs_running_cp_major_vers = xattr->major_version;
1014                 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
1015                 hfsmp->cproot_flags = xattr->flags;
1016                 hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
1017 #if HFS_CONFIG_KEY_ROLL
1018                 hfsmp->hfs_auto_roll_min_key_os_version = xattr->auto_roll_min_version;
1019                 hfsmp->hfs_auto_roll_max_key_os_version = xattr->auto_roll_max_version;
1020 #endif
1021
1022                 hfs_free(xattr, sizeof(*xattr));
1023
1024                 /*
1025                  * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
1026                  * Ensure that the boot-arg's value is valid for FILES (not directories),
1027                  * since only files are actually protected for now.
1028                  */
1029
1030                 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1031
1032                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1033                         PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1034                 }
1035
1036 #if HFS_TMPDBG
1037 #if !SECURE_KERNEL
1038                 PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
1039 #endif
1040 #endif
1041
1042                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1043                         hfsmp->default_cp_class = PROTECTION_CLASS_C;
1044                 }
1045
1046 #else
1047                 /* If CONFIG_PROTECT not built, ignore CP */
1048                 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1049 #endif
1050         }
1051
1052         /*
1053          * Establish a metadata allocation zone.
1054          */
1055         hfs_metadatazone_init(hfsmp, false);
1056
1057         /*
1058          * Make any metadata zone adjustments.
1059          */
1060         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1061                 /* Keep the roving allocator out of the metadata zone. */
1062                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1063                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1064                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1065                 }
1066         } else {
1067                 if (vcb->nextAllocation <= 1) {
1068                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1069                 }
1070         }
1071         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1072
1073         /* Setup private/hidden directories for hardlinks. */
1074         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1075         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1076
1077         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1078                 hfs_remove_orphans(hfsmp);
1079
1080         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1081         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1082         {
1083                 retval = hfs_erase_unused_nodes(hfsmp);
1084                 if (retval) {
1085                         if (HFS_MOUNT_DEBUG) {
1086                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1087                         }
1088
1089                         goto ErrorExit;
1090                 }
1091         }
1092
1093         /*
1094          * Allow hot file clustering if conditions allow.
1095          */
1096         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
1097             ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
1098                 //
1099                 // Wait until the bitmap scan completes before we initializes the
1100                 // hotfile area so that we do not run into any issues with the
1101                 // bitmap being read while hotfiles is initializing itself.  On
1102                 // some older/slower machines, without this interlock, the bitmap
1103                 // would sometimes get corrupted at boot time.
1104                 //
1105                 hfs_lock_mount(hfsmp);
1106                 while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
1107                         (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
1108                 }
1109                 hfs_unlock_mount(hfsmp);
1110
1111                 /*
1112                  * Note: at this point we are not allowed to fail the
1113                  *       mount operation because the HotFile init code
1114                  *       in hfs_recording_init() will lookup vnodes with
1115                  *       VNOP_LOOKUP() which hangs vnodes off the mount
1116                  *       (and if we were to fail, VFS is not prepared to
1117                  *       clean that up at this point.  Since HotFiles are
1118                  *       optional, this is not a big deal.
1119                  */
1120                 (void) hfs_recording_init(hfsmp);
1121         }
1122
1123         /* Force ACLs on HFS+ file systems. */
1124         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1125
1126         /* Enable extent-based extended attributes by default */
1127         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1128
1129         return (0);
1130
1131 ErrorExit:
1132         /*
1133          * A fatal error occurred and the volume cannot be mounted, so
1134          * release any resources that we acquired...
1135          */
1136         hfsUnmount(hfsmp, NULL);
1137
1138         if (HFS_MOUNT_DEBUG) {
1139                 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1140         }
1141         return (retval);
1142 }
1143
1144 static int
1145 _pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
1146 {
1147         int err;
1148
1149         err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
1150         if (err == 0) {
1151                 err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL);
1152                 hfs_unlock(VTOC(vp));
1153         }
1154
1155         return err;
1156 }
1157
1158 void
1159 hfs_pin_fs_metadata(struct hfsmount *hfsmp)
1160 {
1161         ExtendedVCB *vcb;
1162         int err;
1163
1164         vcb = HFSTOVCB(hfsmp);
1165
1166         err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
1167         if (err != 0) {
1168                 printf("hfs: failed to pin extents overflow file %d\n", err);
1169         }
1170         err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
1171         if (err != 0) {
1172                 printf("hfs: failed to pin catalog file %d\n", err);
1173         }
1174         err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
1175         if (err != 0) {
1176                 printf("hfs: failed to pin bitmap file %d\n", err);
1177         }
1178         err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
1179         if (err != 0) {
1180                 printf("hfs: failed to pin extended attr file %d\n", err);
1181         }
1182
1183         hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1);
1184         hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1);
1185
1186         if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
1187                 // and hey, if we've got a journal, let's pin that too!
1188                 hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize));
1189         }
1190 }
1191
1192 /*
1193  * ReleaseMetaFileVNode
1194  *
1195  * vp   L - -
1196  */
1197 static void ReleaseMetaFileVNode(struct vnode *vp)
1198 {
1199         struct filefork *fp;
1200
1201         if (vp && (fp = VTOF(vp))) {
1202                 if (fp->fcbBTCBPtr != NULL) {
1203                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1204                         (void) BTClosePath(fp);
1205                         hfs_unlock(VTOC(vp));
1206                 }
1207
1208                 /* release the node even if BTClosePath fails */
1209                 vnode_recycle(vp);
1210                 vnode_put(vp);
1211         }
1212 }
1213
1214
1215 /*************************************************************
1216 *
1217 * Unmounts a hfs volume.
1218 *       At this point vflush() has been called (to dump all non-metadata files)
1219 *
1220 *************************************************************/
1221
1222 int
1223 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1224 {
1225         /* Get rid of our attribute data vnode (if any).  This is done
1226          * after the vflush() during mount, so we don't need to worry
1227          * about any locks.
1228          */
1229         if (hfsmp->hfs_attrdata_vp) {
1230                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1231                 hfsmp->hfs_attrdata_vp = NULLVP;
1232         }
1233
1234         if (hfsmp->hfs_startup_vp) {
1235                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1236                 hfsmp->hfs_startup_cp = NULL;
1237                 hfsmp->hfs_startup_vp = NULL;
1238         }
1239
1240         if (hfsmp->hfs_attribute_vp) {
1241                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1242                 hfsmp->hfs_attribute_cp = NULL;
1243                 hfsmp->hfs_attribute_vp = NULL;
1244         }
1245
1246         if (hfsmp->hfs_catalog_vp) {
1247                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1248                 hfsmp->hfs_catalog_cp = NULL;
1249                 hfsmp->hfs_catalog_vp = NULL;
1250         }
1251
1252         if (hfsmp->hfs_extents_vp) {
1253                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1254                 hfsmp->hfs_extents_cp = NULL;
1255                 hfsmp->hfs_extents_vp = NULL;
1256         }
1257
1258         if (hfsmp->hfs_allocation_vp) {
1259                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1260                 hfsmp->hfs_allocation_cp = NULL;
1261                 hfsmp->hfs_allocation_vp = NULL;
1262         }
1263
1264         return (0);
1265 }
1266
1267
1268 /*
1269  * Test if fork has overflow extents.
1270  *
1271  * Returns:
1272  *      non-zero - overflow extents exist
1273  *      zero     - overflow extents do not exist
1274  */
1275 bool overflow_extents(struct filefork *fp)
1276 {
1277         u_int32_t blocks;
1278
1279         //
1280         // If the vnode pointer is NULL then we're being called
1281         // from hfs_remove_orphans() with a faked-up filefork
1282         // and therefore it has to be an HFS+ volume.  Otherwise
1283         // we check through the volume header to see what type
1284         // of volume we're on.
1285         //
1286
1287 #if CONFIG_HFS_STD
1288         if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1289                 if (fp->ff_extents[2].blockCount == 0)
1290                         return false;
1291
1292                 blocks = fp->ff_extents[0].blockCount +
1293                         fp->ff_extents[1].blockCount +
1294                         fp->ff_extents[2].blockCount;
1295
1296                 return fp->ff_blocks > blocks;
1297         }
1298 #endif
1299
1300         if (fp->ff_extents[7].blockCount == 0)
1301                 return false;
1302
1303         blocks = fp->ff_extents[0].blockCount +
1304                 fp->ff_extents[1].blockCount +
1305                 fp->ff_extents[2].blockCount +
1306                 fp->ff_extents[3].blockCount +
1307                 fp->ff_extents[4].blockCount +
1308                 fp->ff_extents[5].blockCount +
1309                 fp->ff_extents[6].blockCount +
1310                 fp->ff_extents[7].blockCount;
1311
1312         return fp->ff_blocks > blocks;
1313 }
1314
1315 static __attribute__((pure))
1316 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1317 {
1318         return (hfsmp->hfs_freeze_state == HFS_FROZEN
1319                         || (hfsmp->hfs_freeze_state == HFS_FREEZING
1320                                 && current_thread() != hfsmp->hfs_freezing_thread));
1321 }
1322
1323 /*
1324  * Lock the HFS global journal lock
1325  */
1326 int
1327 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1328 {
1329         thread_t thread = current_thread();
1330
1331         if (hfsmp->hfs_global_lockowner == thread) {
1332                 panic ("hfs_lock_global: locking against myself!");
1333         }
1334
1335         /*
1336          * This check isn't really necessary but this stops us taking
1337          * the mount lock in most cases.  The essential check is below.
1338          */
1339         if (hfs_is_frozen(hfsmp)) {
1340                 /*
1341                  * Unfortunately, there is no easy way of getting a notification
1342                  * for when a process is exiting and it's possible for the exiting
1343                  * process to get blocked somewhere else.  To catch this, we
1344                  * periodically monitor the frozen process here and thaw if
1345                  * we spot that it's exiting.
1346                  */
1347 frozen:
1348                 hfs_lock_mount(hfsmp);
1349
1350                 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1351
1352                 while (hfs_is_frozen(hfsmp)) {
1353                         if (hfsmp->hfs_freeze_state == HFS_FROZEN
1354                                 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1355                                 hfs_thaw_locked(hfsmp);
1356                                 break;
1357                         }
1358
1359                         msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1360                                PWAIT, "hfs_lock_global (frozen)", &ts);
1361                 }
1362                 hfs_unlock_mount(hfsmp);
1363         }
1364
1365         /* HFS_SHARED_LOCK */
1366         if (locktype == HFS_SHARED_LOCK) {
1367                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1368                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1369         }
1370         /* HFS_EXCLUSIVE_LOCK */
1371         else {
1372                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1373                 hfsmp->hfs_global_lockowner = thread;
1374         }
1375
1376         /*
1377          * We have to check if we're frozen again because of the time
1378          * between when we checked and when we took the global lock.
1379          */
1380         if (hfs_is_frozen(hfsmp)) {
1381                 hfs_unlock_global(hfsmp);
1382                 goto frozen;
1383         }
1384
1385         return 0;
1386 }
1387
1388
1389 /*
1390  * Unlock the HFS global journal lock
1391  */
1392 void
1393 hfs_unlock_global (struct hfsmount *hfsmp)
1394 {
1395         thread_t thread = current_thread();
1396
1397         /* HFS_LOCK_EXCLUSIVE */
1398         if (hfsmp->hfs_global_lockowner == thread) {
1399                 hfsmp->hfs_global_lockowner = NULL;
1400                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1401         }
1402         /* HFS_LOCK_SHARED */
1403         else {
1404                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1405         }
1406 }
1407
1408 /*
1409  * Lock the HFS mount lock
1410  *
1411  * Note: this is a mutex, not a rw lock!
1412  */
1413 inline
1414 void hfs_lock_mount (struct hfsmount *hfsmp) {
1415         lck_mtx_lock (&(hfsmp->hfs_mutex));
1416 }
1417
1418 /*
1419  * Unlock the HFS mount lock
1420  *
1421  * Note: this is a mutex, not a rw lock!
1422  */
1423 inline
1424 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1425         lck_mtx_unlock (&(hfsmp->hfs_mutex));
1426 }
1427
1428 /*
1429  * Lock HFS system file(s).
1430  *
1431  * This function accepts a @flags parameter which indicates which
1432  * system file locks are required.  The value it returns should be
1433  * used in a subsequent call to hfs_systemfile_unlock.  The caller
1434  * should treat this value as opaque; it may or may not have a
1435  * relation to the @flags field that is passed in.  The *only*
1436  * guarantee that we make is that a value of zero means that no locks
1437  * were taken and that there is no need to call hfs_systemfile_unlock
1438  * (although it is harmless to do so).  Recursion is supported but
1439  * care must still be taken to ensure correct lock ordering.  Note
1440  * that requests for certain locks may cause other locks to also be
1441  * taken, including locks that are not possible to ask for via the
1442  * @flags parameter.
1443  */
1444 int
1445 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1446 {
1447         /*
1448          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1449          */
1450         if (flags & SFL_CATALOG) {
1451                 if (hfsmp->hfs_catalog_cp
1452                         && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
1453 #ifdef HFS_CHECK_LOCK_ORDER
1454                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1455                                 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1456                         }
1457                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1458                                 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1459                         }
1460                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1461                                 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1462                         }
1463 #endif /* HFS_CHECK_LOCK_ORDER */
1464
1465                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1466                         /*
1467                          * When the catalog file has overflow extents then
1468                          * also acquire the extents b-tree lock if its not
1469                          * already requested.
1470                          */
1471                         if (((flags & SFL_EXTENTS) == 0) &&
1472                             (hfsmp->hfs_catalog_vp != NULL) &&
1473                             (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1474                                 flags |= SFL_EXTENTS;
1475                         }
1476                 } else {
1477                         flags &= ~SFL_CATALOG;
1478                 }
1479         }
1480
1481         if (flags & SFL_ATTRIBUTE) {
1482                 if (hfsmp->hfs_attribute_cp
1483                         && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
1484 #ifdef HFS_CHECK_LOCK_ORDER
1485                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1486                                 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1487                         }
1488                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1489                                 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1490                         }
1491 #endif /* HFS_CHECK_LOCK_ORDER */
1492
1493                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1494                         /*
1495                          * When the attribute file has overflow extents then
1496                          * also acquire the extents b-tree lock if its not
1497                          * already requested.
1498                          */
1499                         if (((flags & SFL_EXTENTS) == 0) &&
1500                             (hfsmp->hfs_attribute_vp != NULL) &&
1501                             (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1502                                 flags |= SFL_EXTENTS;
1503                         }
1504                 } else {
1505                         flags &= ~SFL_ATTRIBUTE;
1506                 }
1507         }
1508
1509         if (flags & SFL_STARTUP) {
1510                 if (hfsmp->hfs_startup_cp
1511                         && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
1512 #ifdef HFS_CHECK_LOCK_ORDER
1513                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1514                                 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1515                         }
1516 #endif /* HFS_CHECK_LOCK_ORDER */
1517
1518                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1519                         /*
1520                          * When the startup file has overflow extents then
1521                          * also acquire the extents b-tree lock if its not
1522                          * already requested.
1523                          */
1524                         if (((flags & SFL_EXTENTS) == 0) &&
1525                             (hfsmp->hfs_startup_vp != NULL) &&
1526                             (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1527                                 flags |= SFL_EXTENTS;
1528                         }
1529                 } else {
1530                         flags &= ~SFL_STARTUP;
1531                 }
1532         }
1533
1534         /*
1535          * To prevent locks being taken in the wrong order, the extent lock
1536          * gets a bitmap lock as well.
1537          */
1538         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1539                 if (hfsmp->hfs_allocation_cp) {
1540                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1541                         /*
1542                          * The bitmap lock is also grabbed when only extent lock
1543                          * was requested. Set the bitmap lock bit in the lock
1544                          * flags which callers will use during unlock.
1545                          */
1546                         flags |= SFL_BITMAP;
1547                 } else {
1548                         flags &= ~SFL_BITMAP;
1549                 }
1550         }
1551
1552         if (flags & SFL_EXTENTS) {
1553                 /*
1554                  * Since the extents btree lock is recursive we always
1555                  * need exclusive access.
1556                  */
1557                 if (hfsmp->hfs_extents_cp) {
1558                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1559
1560                         if (vfs_isswapmount(hfsmp->hfs_mp)) {
1561                                 /*
1562                                  * because we may need this lock on the pageout path (if a swapfile allocation
1563                                  * spills into the extents overflow tree), we will grant the holder of this
1564                                  * lock the privilege of dipping into the reserve free pool in order to prevent
1565                                  * a deadlock from occurring if we need those pageouts to complete before we
1566                                  * will make any new pages available on the free list... the deadlock can occur
1567                                  * if this thread needs to allocate memory while this lock is held
1568                                  */
1569                                 if (set_vm_privilege(TRUE) == FALSE) {
1570                                         /*
1571                                          * indicate that we need to drop vm_privilege
1572                                          * when we unlock
1573                                          */
1574                                         flags |= SFL_VM_PRIV;
1575                                 }
1576                         }
1577                 } else {
1578                         flags &= ~SFL_EXTENTS;
1579                 }
1580         }
1581
1582         return (flags);
1583 }
1584
1585 /*
1586  * unlock HFS system file(s).
1587  */
1588 void
1589 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1590 {
1591         if (!flags)
1592                 return;
1593
1594         struct timeval tv;
1595         u_int32_t lastfsync;
1596         int numOfLockedBuffs;
1597
1598         if (hfsmp->jnl == NULL) {
1599                 microuptime(&tv);
1600                 lastfsync = tv.tv_sec;
1601         }
1602         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1603                 hfs_unlock(hfsmp->hfs_startup_cp);
1604         }
1605         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1606                 if (hfsmp->jnl == NULL) {
1607                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1608                         numOfLockedBuffs = count_lock_queue();
1609                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1610                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1611                               kMaxSecsForFsync))) {
1612                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1613                         }
1614                 }
1615                 hfs_unlock(hfsmp->hfs_attribute_cp);
1616         }
1617         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1618                 if (hfsmp->jnl == NULL) {
1619                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1620                         numOfLockedBuffs = count_lock_queue();
1621                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1622                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1623                               kMaxSecsForFsync))) {
1624                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1625                         }
1626                 }
1627                 hfs_unlock(hfsmp->hfs_catalog_cp);
1628         }
1629         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1630                 hfs_unlock(hfsmp->hfs_allocation_cp);
1631         }
1632         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1633                 if (hfsmp->jnl == NULL) {
1634                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1635                         numOfLockedBuffs = count_lock_queue();
1636                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1637                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1638                               kMaxSecsForFsync))) {
1639                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1640                         }
1641                 }
1642                 hfs_unlock(hfsmp->hfs_extents_cp);
1643
1644                 if (flags & SFL_VM_PRIV) {
1645                         /*
1646                          * revoke the vm_privilege we granted this thread
1647                          * now that we have unlocked the overflow extents
1648                          */
1649                         set_vm_privilege(FALSE);
1650                 }
1651         }
1652 }
1653
1654
1655 /*
1656  * RequireFileLock
1657  *
1658  * Check to see if a vnode is locked in the current context
1659  * This is to be used for debugging purposes only!!
1660  */
1661 #if DEBUG
1662 void RequireFileLock(FileReference vp, int shareable)
1663 {
1664         int locked;
1665
1666         /* The extents btree and allocation bitmap are always exclusive. */
1667         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1668             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1669                 shareable = 0;
1670         }
1671
1672         locked = VTOC(vp)->c_lockowner == current_thread();
1673
1674         if (!locked && !shareable) {
1675                 switch (VTOC(vp)->c_fileid) {
1676                 case kHFSExtentsFileID:
1677                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1678                         break;
1679                 case kHFSCatalogFileID:
1680                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1681                         break;
1682                 case kHFSAllocationFileID:
1683                         /* The allocation file can hide behind the jornal lock. */
1684                         if (VTOHFS(vp)->jnl == NULL)
1685                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1686                         break;
1687                 case kHFSStartupFileID:
1688                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1689                 case kHFSAttributesFileID:
1690                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1691                         break;
1692                 }
1693         }
1694 }
1695 #endif // DEBUG
1696
1697
1698 /*
1699  * There are three ways to qualify for ownership rights on an object:
1700  *
1701  * 1. (a) Your UID matches the cnode's UID.
1702  *    (b) The object in question is owned by "unknown"
1703  * 2. (a) Permissions on the filesystem are being ignored and
1704  *        your UID matches the replacement UID.
1705  *    (b) Permissions on the filesystem are being ignored and
1706  *        the replacement UID is "unknown".
1707  * 3. You are root.
1708  *
1709  */
1710 int
1711 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1712                 __unused struct proc *p, int invokesuperuserstatus)
1713 {
1714         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1715             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1716             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1717               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1718                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1719             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1720                 return (0);
1721         } else {
1722                 return (EPERM);
1723         }
1724 }
1725
1726
1727 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1728                                u_int32_t blockSizeLimit,
1729                                u_int32_t baseMultiple) {
1730     /*
1731        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1732        specified limit but still an even multiple of the baseMultiple.
1733      */
1734     int baseBlockCount, blockCount;
1735     u_int32_t trialBlockSize;
1736
1737     if (allocationBlockSize % baseMultiple != 0) {
1738         /*
1739            Whoops: the allocation blocks aren't even multiples of the specified base:
1740            no amount of dividing them into even parts will be a multiple, either then!
1741         */
1742         return 512;             /* Hope for the best */
1743     };
1744
1745     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1746        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1747        Even though the former (the result of the loop below) is the larger allocation
1748        block size, the latter is more efficient: */
1749     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1750
1751     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1752     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1753
1754     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1755         trialBlockSize = blockCount * baseMultiple;
1756         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1757             if ((trialBlockSize <= blockSizeLimit) &&
1758                 (trialBlockSize % baseMultiple == 0)) {
1759                 return trialBlockSize;
1760             };
1761         };
1762     };
1763
1764     /* Note: we should never get here, since blockCount = 1 should always work,
1765        but this is nice and safe and makes the compiler happy, too ... */
1766     return 512;
1767 }
1768
1769
1770 u_int32_t
1771 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1772                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1773 {
1774         struct hfsmount * hfsmp;
1775         struct cat_desc jdesc;
1776         int lockflags;
1777         int error;
1778
1779         if (vcb->vcbSigWord != kHFSPlusSigWord)
1780                 return (0);
1781
1782         hfsmp = VCBTOHFS(vcb);
1783
1784         memset(&jdesc, 0, sizeof(struct cat_desc));
1785         jdesc.cd_parentcnid = kRootDirID;
1786         jdesc.cd_nameptr = (const u_int8_t *)name;
1787         jdesc.cd_namelen = strlen(name);
1788
1789         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1790         error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1791         hfs_systemfile_unlock(hfsmp, lockflags);
1792
1793         if (error == 0) {
1794                 return (fattr->ca_fileid);
1795         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1796                 return (0);
1797         }
1798
1799         return (0);     /* XXX what callers expect on an error */
1800 }
1801
1802
1803 /*
1804  * On HFS Plus Volumes, there can be orphaned files or directories
1805  * These are files or directories that were unlinked while busy.
1806  * If the volume was not cleanly unmounted then some of these may
1807  * have persisted and need to be removed.
1808  */
1809 void
1810 hfs_remove_orphans(struct hfsmount * hfsmp)
1811 {
1812         struct BTreeIterator * iterator = NULL;
1813         struct FSBufferDescriptor btdata;
1814         struct HFSPlusCatalogFile filerec;
1815         struct HFSPlusCatalogKey * keyp;
1816         struct proc *p = current_proc();
1817         FCB *fcb;
1818         ExtendedVCB *vcb;
1819         char filename[32];
1820         char tempname[32];
1821         size_t namelen;
1822         cat_cookie_t cookie;
1823         int catlock = 0;
1824         int catreserve = 0;
1825         bool started_tr = false;
1826         int lockflags;
1827         int result;
1828         int orphaned_files = 0;
1829         int orphaned_dirs = 0;
1830
1831         bzero(&cookie, sizeof(cookie));
1832
1833         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1834                 return;
1835
1836         vcb = HFSTOVCB(hfsmp);
1837         fcb = VTOF(hfsmp->hfs_catalog_vp);
1838
1839         btdata.bufferAddress = &filerec;
1840         btdata.itemSize = sizeof(filerec);
1841         btdata.itemCount = 1;
1842
1843         iterator = hfs_mallocz(sizeof(*iterator));
1844
1845         /* Build a key to "temp" */
1846         keyp = (HFSPlusCatalogKey*)&iterator->key;
1847         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1848         keyp->nodeName.length = 4;  /* "temp" */
1849         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1850         keyp->nodeName.unicode[0] = 't';
1851         keyp->nodeName.unicode[1] = 'e';
1852         keyp->nodeName.unicode[2] = 'm';
1853         keyp->nodeName.unicode[3] = 'p';
1854
1855         /*
1856          * Position the iterator just before the first real temp file/dir.
1857          */
1858         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1859         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1860         hfs_systemfile_unlock(hfsmp, lockflags);
1861
1862         /* Visit all the temp files/dirs in the HFS+ private directory. */
1863         for (;;) {
1864                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1865                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1866                 hfs_systemfile_unlock(hfsmp, lockflags);
1867                 if (result)
1868                         break;
1869                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1870                         break;
1871
1872                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1873                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1874
1875                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1876                                 HFS_DELETE_PREFIX, filerec.fileID);
1877
1878                 /*
1879                  * Delete all files (and directories) named "tempxxx",
1880                  * where xxx is the file's cnid in decimal.
1881                  *
1882                  */
1883                 if (bcmp(tempname, filename, namelen + 1) != 0)
1884                         continue;
1885
1886                 struct filefork dfork;
1887                 struct filefork rfork;
1888                 struct cnode cnode;
1889                 int mode = 0;
1890
1891                 bzero(&dfork, sizeof(dfork));
1892                 bzero(&rfork, sizeof(rfork));
1893                 bzero(&cnode, sizeof(cnode));
1894
1895                 if (hfs_start_transaction(hfsmp) != 0) {
1896                         printf("hfs_remove_orphans: failed to start transaction\n");
1897                         goto exit;
1898                 }
1899                 started_tr = true;
1900
1901                 /*
1902                  * Reserve some space in the Catalog file.
1903                  */
1904                 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1905                         printf("hfs_remove_orphans: cat_preflight failed\n");
1906                         goto exit;
1907                 }
1908                 catreserve = 1;
1909
1910                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1911                 catlock = 1;
1912
1913                 /* Build a fake cnode */
1914                 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1915                                                 &dfork.ff_data, &rfork.ff_data);
1916                 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1917                 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1918                 cnode.c_desc.cd_namelen = namelen;
1919                 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1920                 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1921
1922                 /* Position iterator at previous entry */
1923                 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1924                                                         NULL, NULL) != 0) {
1925                         break;
1926                 }
1927
1928                 /* Truncate the file to zero (both forks) */
1929                 if (dfork.ff_blocks > 0) {
1930                         u_int64_t fsize;
1931
1932                         dfork.ff_cp = &cnode;
1933                         cnode.c_datafork = &dfork;
1934                         cnode.c_rsrcfork = NULL;
1935                         fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1936                         while (fsize > 0) {
1937                                 if (fsize > HFS_BIGFILE_SIZE) {
1938                                         fsize -= HFS_BIGFILE_SIZE;
1939                                 } else {
1940                                         fsize = 0;
1941                                 }
1942
1943                                 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1944                                                                   cnode.c_attr.ca_fileid, false) != 0) {
1945                                         printf("hfs: error truncating data fork!\n");
1946                                         break;
1947                                 }
1948
1949                                 //
1950                                 // if we're iteratively truncating this file down,
1951                                 // then end the transaction and start a new one so
1952                                 // that no one transaction gets too big.
1953                                 //
1954                                 if (fsize > 0) {
1955                                         /* Drop system file locks before starting
1956                                          * another transaction to preserve lock order.
1957                                          */
1958                                         hfs_systemfile_unlock(hfsmp, lockflags);
1959                                         catlock = 0;
1960                                         hfs_end_transaction(hfsmp);
1961
1962                                         if (hfs_start_transaction(hfsmp) != 0) {
1963                                                 started_tr = false;
1964                                                 goto exit;
1965                                         }
1966                                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1967                                         catlock = 1;
1968                                 }
1969                         }
1970                 }
1971
1972                 if (rfork.ff_blocks > 0) {
1973                         rfork.ff_cp = &cnode;
1974                         cnode.c_datafork = NULL;
1975                         cnode.c_rsrcfork = &rfork;
1976                         if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1977                                 printf("hfs: error truncating rsrc fork!\n");
1978                                 break;
1979                         }
1980                 }
1981
1982                 // Deal with extended attributes
1983                 if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
1984                         // hfs_removeallattr uses its own transactions
1985                         hfs_systemfile_unlock(hfsmp, lockflags);
1986                         catlock = false;
1987                         hfs_end_transaction(hfsmp);
1988
1989                         hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
1990
1991                         if (!started_tr) {
1992                                 if (hfs_start_transaction(hfsmp) != 0) {
1993                                         printf("hfs_remove_orphans: failed to start transaction\n");
1994                                         goto exit;
1995                                 }
1996                                 started_tr = true;
1997                         }
1998
1999                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2000                         catlock = 1;
2001                 }
2002
2003                 /* Remove the file or folder record from the Catalog */
2004                 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
2005                         printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
2006                         hfs_systemfile_unlock(hfsmp, lockflags);
2007                         catlock = 0;
2008                         hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2009                         break;
2010                 }
2011
2012                 mode = cnode.c_attr.ca_mode & S_IFMT;
2013
2014                 if (mode == S_IFDIR) {
2015                         orphaned_dirs++;
2016                 }
2017                 else {
2018                         orphaned_files++;
2019                 }
2020
2021                 /* Update parent and volume counts */
2022                 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
2023                 if (mode == S_IFDIR) {
2024                         DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
2025                 }
2026
2027                 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
2028                                                  &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
2029
2030                 /* Drop locks and end the transaction */
2031                 hfs_systemfile_unlock(hfsmp, lockflags);
2032                 cat_postflight(hfsmp, &cookie, p);
2033                 catlock = catreserve = 0;
2034
2035                 /*
2036                    Now that Catalog is unlocked, update the volume info, making
2037                    sure to differentiate between files and directories
2038                 */
2039                 if (mode == S_IFDIR) {
2040                         hfs_volupdate(hfsmp, VOL_RMDIR, 0);
2041                 }
2042                 else{
2043                         hfs_volupdate(hfsmp, VOL_RMFILE, 0);
2044                 }
2045
2046                 hfs_end_transaction(hfsmp);
2047                 started_tr = false;
2048         } /* end for */
2049
2050 exit:
2051
2052         if (orphaned_files > 0 || orphaned_dirs > 0)
2053                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
2054         if (catlock) {
2055                 hfs_systemfile_unlock(hfsmp, lockflags);
2056         }
2057         if (catreserve) {
2058                 cat_postflight(hfsmp, &cookie, p);
2059         }
2060         if (started_tr) {
2061                 hfs_end_transaction(hfsmp);
2062         }
2063
2064         hfs_free(iterator, sizeof(*iterator));
2065         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
2066 }
2067
2068
2069 /*
2070  * This will return the correct logical block size for a given vnode.
2071  * For most files, it is the allocation block size, for meta data like
2072  * BTrees, this is kept as part of the BTree private nodeSize
2073  */
2074 u_int32_t
2075 GetLogicalBlockSize(struct vnode *vp)
2076 {
2077 u_int32_t logBlockSize;
2078
2079         hfs_assert(vp != NULL);
2080
2081         /* start with default */
2082         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
2083
2084         if (vnode_issystem(vp)) {
2085                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
2086                         BTreeInfoRec                    bTreeInfo;
2087
2088                         /*
2089                          * We do not lock the BTrees, because if we are getting block..then the tree
2090                          * should be locked in the first place.
2091                          * We just want the nodeSize wich will NEVER change..so even if the world
2092                          * is changing..the nodeSize should remain the same. Which argues why lock
2093                          * it in the first place??
2094                          */
2095
2096                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
2097
2098                         logBlockSize = bTreeInfo.nodeSize;
2099
2100                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
2101                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
2102                 }
2103         }
2104
2105         hfs_assert(logBlockSize > 0);
2106
2107         return logBlockSize;
2108 }
2109
2110 #if HFS_SPARSE_DEV
2111 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
2112 {
2113         struct vfsstatfs *vfsp;  /* 272 bytes */
2114         uint64_t vfreeblks;
2115         struct timeval now;
2116
2117         hfs_lock_mount(hfsmp);
2118
2119         vnode_t backing_vp = hfsmp->hfs_backingvp;
2120         if (!backing_vp) {
2121                 hfs_unlock_mount(hfsmp);
2122                 return false;
2123         }
2124
2125         // usecount is not enough; we need iocount
2126         if (vnode_get(backing_vp)) {
2127                 hfs_unlock_mount(hfsmp);
2128                 *pfree_blks = 0;
2129                 return true;
2130         }
2131
2132         uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2133         uint32_t bandblks       = hfsmp->hfs_sparsebandblks;
2134         uint64_t maxblks        = hfsmp->hfs_backingfs_maxblocks;
2135
2136         hfs_unlock_mount(hfsmp);
2137
2138         mount_t backingfs_mp = vnode_mount(backing_vp);
2139
2140         microtime(&now);
2141         if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
2142                 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
2143                 hfsmp->hfs_last_backingstatfs = now.tv_sec;
2144         }
2145
2146         if (!(vfsp = vfs_statfs(backingfs_mp))) {
2147                 vnode_put(backing_vp);
2148                 return false;
2149         }
2150
2151         vfreeblks = vfsp->f_bavail;
2152         /* Normalize block count if needed. */
2153         if (vfsp->f_bsize != hfsmp->blockSize)
2154                 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2155         if (vfreeblks > bandblks)
2156                 vfreeblks -= bandblks;
2157         else
2158                 vfreeblks = 0;
2159
2160         /*
2161          * Take into account any delayed allocations.  It is not
2162          * certain what the original reason for the "2 *" is.  Most
2163          * likely it is to allow for additional requirements in the
2164          * host file system and metadata required by disk images.  The
2165          * number of loaned blocks is likely to be small and we will
2166          * stop using them as we get close to the limit.
2167          */
2168         loanedblks = 2 * loanedblks;
2169         if (vfreeblks > loanedblks)
2170                 vfreeblks -= loanedblks;
2171         else
2172                 vfreeblks = 0;
2173
2174         if (maxblks)
2175                 vfreeblks = MIN(vfreeblks, maxblks);
2176
2177         vnode_put(backing_vp);
2178
2179         *pfree_blks = vfreeblks;
2180
2181         return true;
2182 }
2183 #endif
2184
2185 u_int32_t
2186 hfs_free_cnids(struct hfsmount * hfsmp)
2187 {
2188         return HFS_MAX_FILES - hfsmp->hfs_filecount - hfsmp->hfs_dircount;
2189 }
2190
2191 u_int32_t
2192 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2193 {
2194         u_int32_t freeblks;
2195         u_int32_t rsrvblks;
2196         u_int32_t loanblks;
2197
2198         /*
2199          * We don't bother taking the mount lock
2200          * to look at these values since the values
2201          * themselves are each updated atomically
2202          * on aligned addresses.
2203          */
2204         freeblks = hfsmp->freeBlocks;
2205         rsrvblks = hfsmp->reserveBlocks;
2206         loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2207         if (wantreserve) {
2208                 if (freeblks > rsrvblks)
2209                         freeblks -= rsrvblks;
2210                 else
2211                         freeblks = 0;
2212         }
2213         if (freeblks > loanblks)
2214                 freeblks -= loanblks;
2215         else
2216                 freeblks = 0;
2217
2218 #if HFS_SPARSE_DEV
2219         /*
2220          * When the underlying device is sparse, check the
2221          * available space on the backing store volume.
2222          */
2223         uint64_t vfreeblks;
2224         if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2225                 freeblks = MIN(freeblks, vfreeblks);
2226 #endif /* HFS_SPARSE_DEV */
2227
2228         return (freeblks);
2229 }
2230
2231 /*
2232  * Map HFS Common errors (negative) to BSD error codes (positive).
2233  * Positive errors (ie BSD errors) are passed through unchanged.
2234  */
2235 short MacToVFSError(OSErr err)
2236 {
2237         if (err >= 0)
2238                 return err;
2239
2240         /* BSD/VFS internal errnos */
2241         switch (err) {
2242                 case HFS_ERESERVEDNAME: /* -8 */
2243                         return err;
2244         }
2245
2246         switch (err) {
2247         case dskFulErr:                 /*    -34 */
2248         case btNoSpaceAvail:            /* -32733 */
2249                 return ENOSPC;
2250         case fxOvFlErr:                 /* -32750 */
2251                 return EOVERFLOW;
2252
2253         case btBadNode:                 /* -32731 */
2254                 return EIO;
2255
2256         case memFullErr:                /*  -108 */
2257                 return ENOMEM;          /*   +12 */
2258
2259         case cmExists:                  /* -32718 */
2260         case btExists:                  /* -32734 */
2261                 return EEXIST;          /*    +17 */
2262
2263         case cmNotFound:                /* -32719 */
2264         case btNotFound:                /* -32735 */
2265                 return ENOENT;          /*     28 */
2266
2267         case cmNotEmpty:                /* -32717 */
2268                 return ENOTEMPTY;       /*     66 */
2269
2270         case cmFThdDirErr:              /* -32714 */
2271                 return EISDIR;          /*     21 */
2272
2273         case fxRangeErr:                /* -32751 */
2274                 return ERANGE;
2275
2276         case bdNamErr:                  /*   -37 */
2277                 return ENAMETOOLONG;    /*    63 */
2278
2279         case paramErr:                  /*   -50 */
2280         case fileBoundsErr:             /* -1309 */
2281                 return EINVAL;          /*   +22 */
2282
2283         case fsBTBadNodeSize:
2284                 return ENXIO;
2285
2286         default:
2287                 return EIO;             /*   +5 */
2288         }
2289 }
2290
2291
2292 /*
2293  * Find the current thread's directory hint for a given index.
2294  *
2295  * Requires an exclusive lock on directory cnode.
2296  *
2297  * Use detach if the cnode lock must be dropped while the hint is still active.
2298  */
2299 directoryhint_t *
2300 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2301 {
2302         struct timeval tv;
2303         directoryhint_t *hint;
2304         boolean_t need_remove, need_init;
2305         const u_int8_t * name;
2306
2307         microuptime(&tv);
2308
2309         /*
2310          *  Look for an existing hint first.  If not found, create a new one (when
2311          *  the list is not full) or recycle the oldest hint.  Since new hints are
2312          *  always added to the head of the list, the last hint is always the
2313          *  oldest.
2314          */
2315         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2316                 if (hint->dh_index == index)
2317                         break;
2318         }
2319         if (hint != NULL) { /* found an existing hint */
2320                 need_init = false;
2321                 need_remove = true;
2322         } else { /* cannot find an existing hint */
2323                 need_init = true;
2324                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2325                         /* Create a default directory hint */
2326                         hint = hfs_malloc(sizeof(directoryhint_t));
2327                         ++dcp->c_dirhintcnt;
2328                         need_remove = false;
2329                 } else {                                /* recycle the last (i.e., the oldest) hint */
2330                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2331                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2332                             (name = hint->dh_desc.cd_nameptr)) {
2333                                 hint->dh_desc.cd_nameptr = NULL;
2334                                 hint->dh_desc.cd_namelen = 0;
2335                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2336                                 vfs_removename((const char *)name);
2337                         }
2338                         need_remove = true;
2339                 }
2340         }
2341
2342         if (need_remove)
2343                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2344
2345         if (detach)
2346                 --dcp->c_dirhintcnt;
2347         else
2348                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2349
2350         if (need_init) {
2351                 hint->dh_index = index;
2352                 hint->dh_desc.cd_flags = 0;
2353                 hint->dh_desc.cd_encoding = 0;
2354                 hint->dh_desc.cd_namelen = 0;
2355                 hint->dh_desc.cd_nameptr = NULL;
2356                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2357                 hint->dh_desc.cd_hint = dcp->c_childhint;
2358                 hint->dh_desc.cd_cnid = 0;
2359         }
2360         hint->dh_time = tv.tv_sec;
2361         return (hint);
2362 }
2363
2364 /*
2365  * Release a single directory hint.
2366  *
2367  * Requires an exclusive lock on directory cnode.
2368  */
2369 void
2370 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2371 {
2372         const u_int8_t * name;
2373         directoryhint_t *hint;
2374
2375         /* Check if item is on list (could be detached) */
2376         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2377                 if (hint == relhint) {
2378                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2379                         --dcp->c_dirhintcnt;
2380                         break;
2381                 }
2382         }
2383         name = relhint->dh_desc.cd_nameptr;
2384         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2385                 relhint->dh_desc.cd_nameptr = NULL;
2386                 relhint->dh_desc.cd_namelen = 0;
2387                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2388                 vfs_removename((const char *)name);
2389         }
2390         hfs_free(relhint, sizeof(*relhint));
2391 }
2392
2393 /*
2394  * Release directory hints for given directory
2395  *
2396  * Requires an exclusive lock on directory cnode.
2397  */
2398 void
2399 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2400 {
2401         struct timeval tv;
2402         directoryhint_t *hint, *prev;
2403         const u_int8_t * name;
2404
2405         if (stale_hints_only)
2406                 microuptime(&tv);
2407
2408         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2409         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2410                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2411                         break;  /* stop here if this entry is too new */
2412                 name = hint->dh_desc.cd_nameptr;
2413                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2414                         hint->dh_desc.cd_nameptr = NULL;
2415                         hint->dh_desc.cd_namelen = 0;
2416                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
2417                         vfs_removename((const char *)name);
2418                 }
2419                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2420                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2421                 hfs_free(hint, sizeof(*hint));
2422                 --dcp->c_dirhintcnt;
2423         }
2424 }
2425
2426 /*
2427  * Insert a detached directory hint back into the list of dirhints.
2428  *
2429  * Requires an exclusive lock on directory cnode.
2430  */
2431 void
2432 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2433 {
2434         directoryhint_t *test;
2435
2436         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2437                 if (test == hint)
2438                         panic("hfs_insertdirhint: hint %p already on list!", hint);
2439         }
2440
2441         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2442         ++dcp->c_dirhintcnt;
2443 }
2444
2445 /*
2446  * Perform a case-insensitive compare of two UTF-8 filenames.
2447  *
2448  * Returns 0 if the strings match.
2449  */
2450 int
2451 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2452 {
2453         u_int16_t *ustr1, *ustr2;
2454         size_t ulen1, ulen2;
2455         size_t maxbytes;
2456         int cmp = -1;
2457
2458         if (len1 != len2)
2459                 return (cmp);
2460
2461         maxbytes = kHFSPlusMaxFileNameChars << 1;
2462         ustr1 = hfs_malloc(maxbytes << 1);
2463         ustr2 = ustr1 + (maxbytes >> 1);
2464
2465         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2466                 goto out;
2467         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2468                 goto out;
2469
2470         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2471 out:
2472         hfs_free(ustr1, maxbytes << 1);
2473         return (cmp);
2474 }
2475
2476 typedef struct jopen_cb_info {
2477         mount_t mp;
2478         off_t   jsize;
2479         char   *desired_uuid;
2480         struct  vnode *jvp;
2481         size_t  blksize;
2482         int     need_clean;
2483         int     need_init;
2484 } jopen_cb_info;
2485
2486 static int
2487 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2488 {
2489         jopen_cb_info *ji = (jopen_cb_info *)arg;
2490         char bsd_name[256];
2491         int error;
2492
2493         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2494         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2495
2496         if ((error = vnode_lookup(bsd_name, VNODE_LOOKUP_NOFOLLOW, &ji->jvp,
2497                                                           vfs_context_kernel()))) {
2498                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2499                 return 1;   // keep iterating
2500         }
2501
2502         struct vnop_open_args oargs = {
2503                 .a_vp           = ji->jvp,
2504                 .a_mode         = FREAD | FWRITE,
2505                 .a_context      = vfs_context_kernel(),
2506         };
2507
2508         if (spec_open(&oargs)) {
2509                 vnode_put(ji->jvp);
2510                 ji->jvp = NULL;
2511                 return 1;
2512         }
2513
2514         // if the journal is dirty and we didn't specify a desired
2515         // journal device uuid, then do not use the journal.  but
2516         // if the journal is just invalid (e.g. it hasn't been
2517         // initialized) then just set the need_init flag.
2518         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2519                 error = journal_is_clean(ji->jvp, 0, ji->jsize,
2520                                                                  (void *)1, ji->blksize);
2521                 if (error == EBUSY) {
2522                         struct vnop_close_args cargs = {
2523                                 .a_vp           = ji->jvp,
2524                                 .a_fflag        = FREAD | FWRITE,
2525                                 .a_context      = vfs_context_kernel()
2526                         };
2527                         spec_close(&cargs);
2528                         vnode_put(ji->jvp);
2529                         ji->jvp = NULL;
2530                         return 1;    // keep iterating
2531                 } else if (error == EINVAL) {
2532                         ji->need_init = 1;
2533                 }
2534         }
2535
2536         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2537                 strlcpy(ji->desired_uuid, uuid_str, 128);
2538         }
2539         vnode_setmountedon(ji->jvp);
2540         return 0;   // stop iterating
2541 }
2542
2543 static vnode_t
2544 open_journal_dev(mount_t mp,
2545                                  const char *vol_device,
2546                                  int need_clean,
2547                                  char *uuid_str,
2548                                  char *machine_serial_num,
2549                                  off_t jsize,
2550                                  size_t blksize,
2551                                  int *need_init)
2552 {
2553     int retry_counter=0;
2554     jopen_cb_info ji;
2555
2556         ji.mp                   = mp;
2557     ji.jsize        = jsize;
2558     ji.desired_uuid = uuid_str;
2559     ji.jvp          = NULL;
2560     ji.blksize      = blksize;
2561     ji.need_clean   = need_clean;
2562     ji.need_init    = 0;
2563
2564 //    if (uuid_str[0] == '\0') {
2565 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2566 //    } else {
2567 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2568 //    }
2569     while (ji.jvp == NULL && retry_counter++ < 4) {
2570             if (retry_counter > 1) {
2571                     if (uuid_str[0]) {
2572                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2573                     } else {
2574                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2575                     }
2576                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2577             }
2578
2579             hfs_iterate_media_with_content(EXTJNL_CONTENT_TYPE_UUID,
2580                                                                            journal_open_cb, &ji);
2581     }
2582
2583     if (ji.jvp == NULL) {
2584             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2585                    vol_device, uuid_str, machine_serial_num);
2586     }
2587
2588     *need_init = ji.need_init;
2589
2590     return ji.jvp;
2591 }
2592
2593 void hfs_close_jvp(hfsmount_t *hfsmp)
2594 {
2595         if (!hfsmp || !hfsmp->jvp || hfsmp->jvp == hfsmp->hfs_devvp)
2596                 return;
2597
2598         vnode_clearmountedon(hfsmp->jvp);
2599         struct vnop_close_args cargs = {
2600                 .a_vp           = hfsmp->jvp,
2601                 .a_fflag        = FREAD | FWRITE,
2602                 .a_context      = vfs_context_kernel()
2603         };
2604         spec_close(&cargs);
2605         vnode_put(hfsmp->jvp);
2606         hfsmp->jvp = NULL;
2607 }
2608
2609 int
2610 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2611                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2612                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2613 {
2614         JournalInfoBlock *jibp;
2615         struct buf       *jinfo_bp, *bp;
2616         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2617         int               retval, write_jibp = 0;
2618         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2619         struct vnode     *devvp;
2620         struct hfs_mount_args *args = _args;
2621         u_int32_t         jib_flags;
2622         u_int64_t         jib_offset;
2623         u_int64_t         jib_size;
2624         const char *dev_name;
2625
2626         devvp = hfsmp->hfs_devvp;
2627         dev_name = vnode_getname_printable(devvp);
2628
2629         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2630                 arg_flags  = args->journal_flags;
2631                 arg_tbufsz = args->journal_tbuffer_size;
2632         }
2633
2634         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2635
2636         jinfo_bp = NULL;
2637         retval = (int)buf_meta_bread(devvp,
2638                                                 (daddr64_t)((embeddedOffset/blksize) +
2639                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2640                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2641         if (retval) {
2642                 if (jinfo_bp) {
2643                         buf_brelse(jinfo_bp);
2644                 }
2645                 goto cleanup_dev_name;
2646         }
2647
2648         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2649         jib_flags  = SWAP_BE32(jibp->flags);
2650         jib_size   = SWAP_BE64(jibp->size);
2651
2652         if (jib_flags & kJIJournalInFSMask) {
2653                 hfsmp->jvp = hfsmp->hfs_devvp;
2654                 jib_offset = SWAP_BE64(jibp->offset);
2655         } else {
2656             int need_init=0;
2657
2658             // if the volume was unmounted cleanly then we'll pick any
2659             // available external journal partition
2660             //
2661             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2662                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2663             }
2664
2665             hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
2666                                                                           dev_name,
2667                                                                           !(jib_flags & kJIJournalNeedInitMask),
2668                                                                           (char *)&jibp->ext_jnl_uuid[0],
2669                                                                           (char *)&jibp->machine_serial_num[0],
2670                                                                           jib_size,
2671                                                                           hfsmp->hfs_logical_block_size,
2672                                                                           &need_init);
2673             if (hfsmp->jvp == NULL) {
2674                     buf_brelse(jinfo_bp);
2675                     retval = EROFS;
2676                     goto cleanup_dev_name;
2677             } else {
2678                     if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2679                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2680                     }
2681             }
2682
2683             jib_offset = 0;
2684             write_jibp = 1;
2685             if (need_init) {
2686                     jib_flags |= kJIJournalNeedInitMask;
2687             }
2688         }
2689
2690         // save this off for the hack-y check in hfs_remove()
2691         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2692         hfsmp->jnl_size  = jib_size;
2693
2694         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2695             // if the file system is read-only, check if the journal is empty.
2696             // if it is, then we can allow the mount.  otherwise we have to
2697             // return failure.
2698             retval = journal_is_clean(hfsmp->jvp,
2699                                       jib_offset + embeddedOffset,
2700                                       jib_size,
2701                                       devvp,
2702                                       hfsmp->hfs_logical_block_size);
2703
2704             hfsmp->jnl = NULL;
2705
2706             buf_brelse(jinfo_bp);
2707
2708             if (retval) {
2709                     const char *name = vnode_getname_printable(devvp);
2710                     printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2711                     name);
2712                     vnode_putname_printable(name);
2713             }
2714
2715             goto cleanup_dev_name;
2716         }
2717
2718         if (jib_flags & kJIJournalNeedInitMask) {
2719                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2720                            jib_offset + embeddedOffset, jib_size);
2721                 hfsmp->jnl = journal_create(hfsmp->jvp,
2722                                                                         jib_offset + embeddedOffset,
2723                                                                         jib_size,
2724                                                                         devvp,
2725                                                                         blksize,
2726                                                                         arg_flags,
2727                                                                         arg_tbufsz,
2728                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2729                                                                         hfsmp->hfs_mp);
2730                 if (hfsmp->jnl)
2731                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2732
2733                 // no need to start a transaction here... if this were to fail
2734                 // we'd just re-init it on the next mount.
2735                 jib_flags &= ~kJIJournalNeedInitMask;
2736                 jibp->flags  = SWAP_BE32(jib_flags);
2737                 buf_bwrite(jinfo_bp);
2738                 jinfo_bp = NULL;
2739                 jibp     = NULL;
2740         } else {
2741                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2742                 //         jib_offset + embeddedOffset,
2743                 //         jib_size, SWAP_BE32(vhp->blockSize));
2744
2745                 hfsmp->jnl = journal_open(hfsmp->jvp,
2746                                                                   jib_offset + embeddedOffset,
2747                                                                   jib_size,
2748                                                                   devvp,
2749                                                                   blksize,
2750                                                                   arg_flags,
2751                                                                   arg_tbufsz,
2752                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2753                                                                   hfsmp->hfs_mp);
2754                 if (hfsmp->jnl)
2755                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2756
2757                 if (write_jibp) {
2758                         buf_bwrite(jinfo_bp);
2759                 } else {
2760                         buf_brelse(jinfo_bp);
2761                 }
2762                 jinfo_bp = NULL;
2763                 jibp     = NULL;
2764
2765                 if (hfsmp->jnl && mdbp) {
2766                         // reload the mdb because it could have changed
2767                         // if the journal had to be replayed.
2768                         if (mdb_offset == 0) {
2769                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2770                         }
2771                         bp = NULL;
2772                         retval = (int)buf_meta_bread(devvp,
2773                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2774                                         hfsmp->hfs_physical_block_size, cred, &bp);
2775                         if (retval) {
2776                                 if (bp) {
2777                                         buf_brelse(bp);
2778                                 }
2779                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2780                                            retval);
2781                                 goto cleanup_dev_name;
2782                         }
2783                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2784                         buf_brelse(bp);
2785                         bp = NULL;
2786                 }
2787         }
2788
2789         // if we expected the journal to be there and we couldn't
2790         // create it or open it then we have to bail out.
2791         if (hfsmp->jnl == NULL) {
2792                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2793                 retval = EINVAL;
2794                 goto cleanup_dev_name;
2795         }
2796
2797         retval = 0;
2798
2799 cleanup_dev_name:
2800         vnode_putname_printable(dev_name);
2801         return retval;
2802 }
2803
2804
2805 //
2806 // This function will go and re-locate the .journal_info_block and
2807 // the .journal files in case they moved (which can happen if you
2808 // run Norton SpeedDisk).  If we fail to find either file we just
2809 // disable journaling for this volume and return.  We turn off the
2810 // journaling bit in the vcb and assume it will get written to disk
2811 // later (if it doesn't on the next mount we'd do the same thing
2812 // again which is harmless).  If we disable journaling we don't
2813 // return an error so that the volume is still mountable.
2814 //
2815 // If the info we find for the .journal_info_block and .journal files
2816 // isn't what we had stored, we re-set our cached info and proceed
2817 // with opening the journal normally.
2818 //
2819 static int
2820 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2821 {
2822         JournalInfoBlock *jibp;
2823         struct buf       *jinfo_bp;
2824         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2825         int               retval, write_jibp = 0, recreate_journal = 0;
2826         struct vnode     *devvp;
2827         struct cat_attr   jib_attr, jattr;
2828         struct cat_fork   jib_fork, jfork;
2829         ExtendedVCB      *vcb;
2830         u_int32_t            fid;
2831         struct hfs_mount_args *args = _args;
2832         u_int32_t         jib_flags;
2833         u_int64_t         jib_offset;
2834         u_int64_t         jib_size;
2835
2836         devvp = hfsmp->hfs_devvp;
2837         vcb = HFSTOVCB(hfsmp);
2838
2839         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2840                 if (args->journal_disable) {
2841                         return 0;
2842                 }
2843
2844                 arg_flags  = args->journal_flags;
2845                 arg_tbufsz = args->journal_tbuffer_size;
2846         }
2847
2848         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2849         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2850                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2851                            fid ? jib_fork.cf_extents[0].startBlock : 0);
2852                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2853                 return 0;
2854         }
2855         hfsmp->hfs_jnlinfoblkid = fid;
2856
2857         // make sure the journal_info_block begins where we think it should.
2858         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2859                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2860                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2861
2862                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2863                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2864                 recreate_journal = 1;
2865         }
2866
2867
2868         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2869         jinfo_bp = NULL;
2870         retval = (int)buf_meta_bread(devvp,
2871                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2872                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2873                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2874         if (retval) {
2875                 if (jinfo_bp) {
2876                         buf_brelse(jinfo_bp);
2877                 }
2878                 printf("hfs: can't read journal info block. disabling journaling.\n");
2879                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2880                 return 0;
2881         }
2882
2883         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2884         jib_flags  = SWAP_BE32(jibp->flags);
2885         jib_offset = SWAP_BE64(jibp->offset);
2886         jib_size   = SWAP_BE64(jibp->size);
2887
2888         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2889         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2890                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2891                            fid ? jfork.cf_extents[0].startBlock : 0);
2892                 buf_brelse(jinfo_bp);
2893                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2894                 return 0;
2895         }
2896         hfsmp->hfs_jnlfileid = fid;
2897
2898         // make sure the journal file begins where we think it should.
2899         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2900                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2901                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2902
2903                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2904                 write_jibp   = 1;
2905                 recreate_journal = 1;
2906         }
2907
2908         // check the size of the journal file.
2909         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2910                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2911                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2912
2913                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2914                 write_jibp = 1;
2915                 recreate_journal = 1;
2916         }
2917
2918         if (jib_flags & kJIJournalInFSMask) {
2919                 hfsmp->jvp = hfsmp->hfs_devvp;
2920                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2921         } else {
2922             const char *dev_name;
2923             int need_init = 0;
2924
2925             dev_name = vnode_getname_printable(devvp);
2926
2927             // since the journal is empty, just use any available external journal
2928             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2929
2930             // this fills in the uuid of the device we actually get
2931             hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
2932                                                                           dev_name,
2933                                                                           !(jib_flags & kJIJournalNeedInitMask),
2934                                                                           (char *)&jibp->ext_jnl_uuid[0],
2935                                                                           (char *)&jibp->machine_serial_num[0],
2936                                                                           jib_size,
2937                                                                           hfsmp->hfs_logical_block_size,
2938                                                                           &need_init);
2939             if (hfsmp->jvp == NULL) {
2940                     buf_brelse(jinfo_bp);
2941                     vnode_putname_printable(dev_name);
2942                     return EROFS;
2943             } else {
2944                     if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2945                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2946                     }
2947             }
2948             jib_offset = 0;
2949             recreate_journal = 1;
2950             write_jibp = 1;
2951             if (need_init) {
2952                     jib_flags |= kJIJournalNeedInitMask;
2953             }
2954             vnode_putname_printable(dev_name);
2955         }
2956
2957         // save this off for the hack-y check in hfs_remove()
2958         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2959         hfsmp->jnl_size  = jib_size;
2960
2961         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2962             // if the file system is read-only, check if the journal is empty.
2963             // if it is, then we can allow the mount.  otherwise we have to
2964             // return failure.
2965             retval = journal_is_clean(hfsmp->jvp,
2966                                       jib_offset,
2967                                       jib_size,
2968                                       devvp,
2969                                       hfsmp->hfs_logical_block_size);
2970
2971             hfsmp->jnl = NULL;
2972
2973             buf_brelse(jinfo_bp);
2974
2975             if (retval) {
2976                     const char *name = vnode_getname_printable(devvp);
2977                     printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2978                     name);
2979                     vnode_putname_printable(name);
2980             }
2981
2982             return retval;
2983         }
2984
2985         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2986                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2987                            jib_offset, jib_size);
2988                 hfsmp->jnl = journal_create(hfsmp->jvp,
2989                                                                         jib_offset,
2990                                                                         jib_size,
2991                                                                         devvp,
2992                                                                         hfsmp->hfs_logical_block_size,
2993                                                                         arg_flags,
2994                                                                         arg_tbufsz,
2995                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2996                                                                         hfsmp->hfs_mp);
2997                 if (hfsmp->jnl)
2998                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2999
3000                 // no need to start a transaction here... if this were to fail
3001                 // we'd just re-init it on the next mount.
3002                 jib_flags &= ~kJIJournalNeedInitMask;
3003                 write_jibp   = 1;
3004
3005         } else {
3006                 //
3007                 // if we weren't the last person to mount this volume
3008                 // then we need to throw away the journal because it
3009                 // is likely that someone else mucked with the disk.
3010                 // if the journal is empty this is no big deal.  if the
3011                 // disk is dirty this prevents us from replaying the
3012                 // journal over top of changes that someone else made.
3013                 //
3014                 arg_flags |= JOURNAL_RESET;
3015
3016                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
3017                 //         jib_offset,
3018                 //         jib_size, SWAP_BE32(vhp->blockSize));
3019
3020                 hfsmp->jnl = journal_open(hfsmp->jvp,
3021                                                                   jib_offset,
3022                                                                   jib_size,
3023                                                                   devvp,
3024                                                                   hfsmp->hfs_logical_block_size,
3025                                                                   arg_flags,
3026                                                                   arg_tbufsz,
3027                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
3028                                                                   hfsmp->hfs_mp);
3029                 if (hfsmp->jnl)
3030                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3031         }
3032
3033
3034         if (write_jibp) {
3035                 jibp->flags  = SWAP_BE32(jib_flags);
3036                 jibp->offset = SWAP_BE64(jib_offset);
3037                 jibp->size   = SWAP_BE64(jib_size);
3038
3039                 buf_bwrite(jinfo_bp);
3040         } else {
3041                 buf_brelse(jinfo_bp);
3042         }
3043         jinfo_bp = NULL;
3044         jibp     = NULL;
3045
3046         // if we expected the journal to be there and we couldn't
3047         // create it or open it then we have to bail out.
3048         if (hfsmp->jnl == NULL) {
3049                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
3050                 return EINVAL;
3051         }
3052
3053         return 0;
3054 }
3055
3056 /*
3057  * Calculate the allocation zone for metadata.
3058  *
3059  * This zone includes the following:
3060  *      Allocation Bitmap file
3061  *      Overflow Extents file
3062  *      Journal file
3063  *      Quota files
3064  *      Clustered Hot files
3065  *      Catalog file
3066  *
3067  *                          METADATA ALLOCATION ZONE
3068  * ____________________________________________________________________________
3069  * |    |    |     |               |                              |           |
3070  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
3071  * |____|____|_____|_______________|______________________________|___________|
3072  *
3073  * <------------------------------- N * 128 MB ------------------------------->
3074  *
3075  */
3076 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
3077
3078 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
3079 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
3080
3081 /* Initialize the metadata zone.
3082  *
3083  * If the size of  the volume is less than the minimum size for
3084  * metadata zone, metadata zone is disabled.
3085  *
3086  * If disable is true, disable metadata zone unconditionally.
3087  */
3088 void
3089 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
3090 {
3091         ExtendedVCB  *vcb;
3092         u_int64_t  fs_size;
3093         u_int64_t  zonesize;
3094         u_int64_t  temp;
3095         u_int64_t  filesize;
3096         u_int32_t  blk;
3097         int  items, really_do_it=1;
3098
3099         vcb = HFSTOVCB(hfsmp);
3100         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
3101
3102         /*
3103          * For volumes less than 10 GB, don't bother.
3104          */
3105         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
3106                 really_do_it = 0;
3107         }
3108
3109         /*
3110          * Skip non-journaled volumes as well.
3111          */
3112         if (hfsmp->jnl == NULL) {
3113                 really_do_it = 0;
3114         }
3115
3116         /* If caller wants to disable metadata zone, do it */
3117         if (disable == true) {
3118                 really_do_it = 0;
3119         }
3120
3121         /*
3122          * Start with space for the boot blocks and Volume Header.
3123          * 1536 = byte offset from start of volume to end of volume header:
3124          * 1024 bytes is the offset from the start of the volume to the
3125          * start of the volume header (defined by the volume format)
3126          * + 512 bytes (the size of the volume header).
3127          */
3128         zonesize = roundup(1536, hfsmp->blockSize);
3129
3130         /*
3131          * Add the on-disk size of allocation bitmap.
3132          */
3133         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3134
3135         /*
3136          * Add space for the Journal Info Block and Journal (if they're in
3137          * this file system).
3138          */
3139         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3140                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3141         }
3142
3143         /*
3144          * Add the existing size of the Extents Overflow B-tree.
3145          * (It rarely grows, so don't bother reserving additional room for it.)
3146          */
3147         zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
3148
3149         /*
3150          * If there is an Attributes B-tree, leave room for 11 clumps worth.
3151          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3152          * When installing a full OS install onto a 20GB volume, we use
3153          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3154          * us with another 3 or 4 clumps worth before we need another extent.
3155          */
3156         if (hfsmp->hfs_attribute_cp) {
3157                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3158         }
3159
3160         /*
3161          * Leave room for 11 clumps of the Catalog B-tree.
3162          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3163          * When installing a full OS install onto a 20GB volume, we use
3164          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3165          * us with another 3 or 4 clumps worth before we need another extent.
3166          */
3167         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3168
3169         /*
3170          * Add space for hot file region.
3171          *
3172          * ...for now, use 5 MB per 1 GB (0.5 %)
3173          */
3174         filesize = (fs_size / 1024) * 5;
3175         if (filesize > HOTBAND_MAXIMUM_SIZE)
3176                 filesize = HOTBAND_MAXIMUM_SIZE;
3177         else if (filesize < HOTBAND_MINIMUM_SIZE)
3178                 filesize = HOTBAND_MINIMUM_SIZE;
3179         /*
3180          * Calculate user quota file requirements.
3181          */
3182         if (hfsmp->hfs_flags & HFS_QUOTAS) {
3183                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3184                 if (items < QF_MIN_USERS)
3185                         items = QF_MIN_USERS;
3186                 else if (items > QF_MAX_USERS)
3187                         items = QF_MAX_USERS;
3188                 if (!powerof2(items)) {
3189                         int x = items;
3190                         items = 4;
3191                         while (x>>1 != 1) {
3192                                 x = x >> 1;
3193                                 items = items << 1;
3194                         }
3195                 }
3196                 filesize += (items + 1) * sizeof(struct dqblk);
3197                 /*
3198                  * Calculate group quota file requirements.
3199                  *
3200                  */
3201                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3202                 if (items < QF_MIN_GROUPS)
3203                         items = QF_MIN_GROUPS;
3204                 else if (items > QF_MAX_GROUPS)
3205                         items = QF_MAX_GROUPS;
3206                 if (!powerof2(items)) {
3207                         int x = items;
3208                         items = 4;
3209                         while (x>>1 != 1) {
3210                                 x = x >> 1;
3211                                 items = items << 1;
3212                         }
3213                 }
3214                 filesize += (items + 1) * sizeof(struct dqblk);
3215         }
3216         zonesize += filesize;
3217
3218         /*
3219          * Round up entire zone to a bitmap block's worth.
3220          * The extra space goes to the catalog file and hot file area.
3221          */
3222         temp = zonesize;
3223         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3224         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3225         /*
3226          * If doing the round up for hfs_min_alloc_start would push us past
3227          * allocLimit, then just reset it back to 0.  Though using a value
3228          * bigger than allocLimit would not cause damage in the block allocator
3229          * code, this value could get stored in the volume header and make it out
3230          * to disk, making the volume header technically corrupt.
3231          */
3232         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3233                 hfsmp->hfs_min_alloc_start = 0;
3234         }
3235
3236         if (really_do_it == 0) {
3237                 /* If metadata zone needs to be disabled because the
3238                  * volume was truncated, clear the bit and zero out
3239                  * the values that are no longer needed.
3240                  */
3241                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3242                         /* Disable metadata zone */
3243                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3244
3245                         /* Zero out mount point values that are not required */
3246                         hfsmp->hfs_catalog_maxblks = 0;
3247                         hfsmp->hfs_hotfile_maxblks = 0;
3248                         hfsmp->hfs_hotfile_start = 0;
3249                         hfsmp->hfs_hotfile_end = 0;
3250                         hfsmp->hfs_hotfile_freeblks = 0;
3251                         hfsmp->hfs_metazone_start = 0;
3252                         hfsmp->hfs_metazone_end = 0;
3253                 }
3254
3255                 return;
3256         }
3257
3258         temp = zonesize - temp;  /* temp has extra space */
3259         filesize += temp / 3;
3260         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3261
3262         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3263                 hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
3264         } else {
3265                 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3266         }
3267
3268         /* Convert to allocation blocks. */
3269         blk = zonesize / vcb->blockSize;
3270
3271         /* The default metadata zone location is at the start of volume. */
3272         hfsmp->hfs_metazone_start = 1;
3273         hfsmp->hfs_metazone_end = blk - 1;
3274
3275         /* The default hotfile area is at the end of the zone. */
3276         if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3277                 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3278                 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3279                 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3280         }
3281         else {
3282                 hfsmp->hfs_hotfile_start = 0;
3283                 hfsmp->hfs_hotfile_end = 0;
3284                 hfsmp->hfs_hotfile_freeblks = 0;
3285         }
3286 #if DEBUG
3287         printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3288         printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3289         printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
3290 #endif
3291
3292         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3293 }
3294
3295
3296 static u_int32_t
3297 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3298 {
3299         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
3300         int  lockflags;
3301         int  freeblocks;
3302
3303         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3304                 //
3305                 // This is only used at initialization time and on an ssd
3306                 // we'll get the real info from the hotfile btree user
3307                 // info
3308                 //
3309                 return 0;
3310         }
3311
3312         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3313         freeblocks = MetaZoneFreeBlocks(vcb);
3314         hfs_systemfile_unlock(hfsmp, lockflags);
3315
3316         /* Minus Extents overflow file reserve. */
3317         if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
3318                 freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3319         }
3320
3321         /* Minus catalog file reserve. */
3322         if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
3323                 freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3324         }
3325
3326         if (freeblocks < 0)
3327                 freeblocks = 0;
3328
3329         // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
3330         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3331 }
3332
3333 /*
3334  * Determine if a file is a "virtual" metadata file.
3335  * This includes journal and quota files.
3336  */
3337 int
3338 hfs_virtualmetafile(struct cnode *cp)
3339 {
3340         const char * filename;
3341
3342
3343         if (cp->c_parentcnid != kHFSRootFolderID)
3344                 return (0);
3345
3346         filename = (const char *)cp->c_desc.cd_nameptr;
3347         if (filename == NULL)
3348                 return (0);
3349
3350         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3351             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3352             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3353             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3354             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3355                 return (1);
3356
3357         return (0);
3358 }
3359
3360 void hfs_syncer_lock(struct hfsmount *hfsmp)
3361 {
3362     hfs_lock_mount(hfsmp);
3363 }
3364
3365 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3366 {
3367     hfs_unlock_mount(hfsmp);
3368 }
3369
3370 void hfs_syncer_wait(struct hfsmount *hfsmp, struct timespec *ts)
3371 {
3372     msleep(&hfsmp->hfs_syncer_thread, &hfsmp->hfs_mutex, PWAIT,
3373            "hfs_syncer_wait", ts);
3374 }
3375
3376 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3377 {
3378     wakeup(&hfsmp->hfs_syncer_thread);
3379 }
3380
3381 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3382 {
3383     uint64_t deadline;
3384     clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3385     return deadline;
3386 }
3387
3388 //
3389 // Fire off a timed callback to sync the disk if the
3390 // volume is on ejectable media.
3391 //
3392 void hfs_sync_ejectable(struct hfsmount *hfsmp)
3393 {
3394     // If we don't have a syncer or we get called by the syncer, just return
3395     if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3396                 || current_thread() == hfsmp->hfs_syncer_thread) {
3397         return;
3398         }
3399
3400     hfs_syncer_lock(hfsmp);
3401
3402     if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3403         microuptime(&hfsmp->hfs_sync_req_oldest);
3404
3405     /* If hfs_unmount is running, it will clear the HFS_RUN_SYNCER
3406            flag. Also, we don't want to queue again if there is a sync
3407            outstanding. */
3408     if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3409                 || hfsmp->hfs_syncer_thread) {
3410         hfs_syncer_unlock(hfsmp);
3411         return;
3412     }
3413
3414     hfsmp->hfs_syncer_thread = (void *)1;
3415
3416     hfs_syncer_unlock(hfsmp);
3417
3418         kernel_thread_start(hfs_syncer, hfsmp, &hfsmp->hfs_syncer_thread);
3419         thread_deallocate(hfsmp->hfs_syncer_thread);
3420 }
3421
3422 int
3423 hfs_start_transaction(struct hfsmount *hfsmp)
3424 {
3425         int ret = 0, unlock_on_err = 0;
3426         thread_t thread = current_thread();
3427
3428 #ifdef HFS_CHECK_LOCK_ORDER
3429         /*
3430          * You cannot start a transaction while holding a system
3431          * file lock. (unless the transaction is nested.)
3432          */
3433         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3434                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3435                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3436                 }
3437                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3438                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3439                 }
3440                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3441                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3442                 }
3443         }
3444 #endif /* HFS_CHECK_LOCK_ORDER */
3445
3446 again:
3447
3448         if (hfsmp->jnl) {
3449                 if (journal_owner(hfsmp->jnl) != thread) {
3450                         /*
3451                          * The global lock should be held shared if journal is
3452                          * active to prevent disabling.  If we're not the owner
3453                          * of the journal lock, verify that we're not already
3454                          * holding the global lock exclusive before moving on.
3455                          */
3456                         if (hfsmp->hfs_global_lockowner == thread) {
3457                                 ret = EBUSY;
3458                                 goto out;
3459                         }
3460
3461                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3462
3463                         // Things could have changed
3464                         if (!hfsmp->jnl) {
3465                                 hfs_unlock_global(hfsmp);
3466                                 goto again;
3467                         }
3468
3469                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3470                         unlock_on_err = 1;
3471                 }
3472         } else {
3473                 // No journal
3474                 if (hfsmp->hfs_global_lockowner != thread) {
3475                         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3476
3477                         // Things could have changed
3478                         if (hfsmp->jnl) {
3479                                 hfs_unlock_global(hfsmp);
3480                                 goto again;
3481                         }
3482
3483                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3484                         unlock_on_err = 1;
3485                 }
3486         }
3487
3488         /* If a downgrade to read-only mount is in progress, no other
3489          * thread than the downgrade thread is allowed to modify
3490          * the file system.
3491          */
3492         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3493             hfsmp->hfs_downgrading_thread != thread) {
3494                 ret = EROFS;
3495                 goto out;
3496         }
3497
3498         if (hfsmp->jnl) {
3499                 ret = journal_start_transaction(hfsmp->jnl);
3500         } else {
3501                 ret = 0;
3502         }
3503
3504         if (ret == 0)
3505                 ++hfsmp->hfs_transaction_nesting;
3506
3507 out:
3508         if (ret != 0 && unlock_on_err) {
3509                 hfs_unlock_global (hfsmp);
3510                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3511         }
3512
3513     return ret;
3514 }
3515
3516 int
3517 hfs_end_transaction(struct hfsmount *hfsmp)
3518 {
3519     int ret;
3520
3521         hfs_assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
3522         hfs_assert(hfsmp->hfs_transaction_nesting > 0);
3523
3524         if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
3525                 hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
3526
3527         bool need_unlock = !--hfsmp->hfs_transaction_nesting;
3528
3529         if (hfsmp->jnl) {
3530                 ret = journal_end_transaction(hfsmp->jnl);
3531         } else {
3532                 ret = 0;
3533         }
3534
3535         if (need_unlock) {
3536                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3537                 hfs_unlock_global (hfsmp);
3538                 hfs_sync_ejectable(hfsmp);
3539         }
3540
3541     return ret;
3542 }
3543
3544
3545 void
3546 hfs_journal_lock(struct hfsmount *hfsmp)
3547 {
3548         /* Only peek at hfsmp->jnl while holding the global lock */
3549         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3550         if (hfsmp->jnl) {
3551                 journal_lock(hfsmp->jnl);
3552         }
3553         hfs_unlock_global (hfsmp);
3554 }
3555
3556 void
3557 hfs_journal_unlock(struct hfsmount *hfsmp)
3558 {
3559         /* Only peek at hfsmp->jnl while holding the global lock */
3560         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3561         if (hfsmp->jnl) {
3562                 journal_unlock(hfsmp->jnl);
3563         }
3564         hfs_unlock_global (hfsmp);
3565 }
3566
3567 /*
3568  * Flush the contents of the journal to the disk.
3569  *
3570  *  - HFS_FLUSH_JOURNAL
3571  *      Wait to write in-memory journal to the disk consistently.
3572  *      This means that the journal still contains uncommitted
3573  *      transactions and the file system metadata blocks in
3574  *      the journal transactions might be written asynchronously
3575  *      to the disk.  But there is no guarantee that they are
3576  *      written to the disk before returning to the caller.
3577  *      Note that this option is sufficient for file system
3578  *      data integrity as it guarantees consistent journal
3579  *      content on the disk.
3580  *
3581  *  - HFS_FLUSH_JOURNAL_META
3582  *      Wait to write in-memory journal to the disk
3583  *      consistently, and also wait to write all asynchronous
3584  *      metadata blocks to its corresponding locations
3585  *      consistently on the disk. This is overkill in normal
3586  *      scenarios but is useful whenever the metadata blocks
3587  *      are required to be consistent on-disk instead of
3588  *      just the journalbeing consistent; like before live
3589  *      verification and live volume resizing.  The update of the
3590  *      metadata doesn't include a barrier of track cache flush.
3591  *
3592  *  - HFS_FLUSH_FULL
3593  *      HFS_FLUSH_JOURNAL + force a track cache flush to media
3594  *
3595  *  - HFS_FLUSH_CACHE
3596  *      Force a track cache flush to media.
3597  *
3598  *  - HFS_FLUSH_BARRIER
3599  *      Barrier-only flush to ensure write order
3600  *
3601  */
3602 errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
3603 {
3604         errno_t error = 0;
3605         int options = 0;
3606         dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
3607
3608         switch (mode) {
3609                 case HFS_FLUSH_JOURNAL_META:
3610                         // wait for journal, metadata blocks and previous async flush to finish
3611                         SET(options, JOURNAL_WAIT_FOR_IO);
3612
3613                         // no break
3614
3615                 case HFS_FLUSH_JOURNAL:
3616                 case HFS_FLUSH_JOURNAL_BARRIER:
3617                 case HFS_FLUSH_FULL:
3618
3619                         if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
3620                             !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3621                                 mode = HFS_FLUSH_FULL;
3622
3623                         if (mode == HFS_FLUSH_FULL)
3624                                 SET(options, JOURNAL_FLUSH_FULL);
3625
3626                         /* Only peek at hfsmp->jnl while holding the global lock */
3627                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3628
3629                         if (hfsmp->jnl)
3630                                 error = journal_flush(hfsmp->jnl, options);
3631
3632                         hfs_unlock_global (hfsmp);
3633
3634                         /*
3635                          * This may result in a double barrier as
3636                          * journal_flush may have issued a barrier itself
3637                          */
3638                         if (mode == HFS_FLUSH_JOURNAL_BARRIER)
3639                                 error = VNOP_IOCTL(hfsmp->hfs_devvp,
3640                                     DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3641                                     FWRITE, NULL);
3642
3643                         break;
3644
3645                 case HFS_FLUSH_CACHE:
3646                         // Do a full sync
3647                         sync_req.options = 0;
3648
3649                         // no break
3650
3651                 case HFS_FLUSH_BARRIER:
3652                         // If barrier only flush doesn't support, fall back to use full flush.
3653                         if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3654                                 sync_req.options = 0;
3655
3656                         error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3657                                            FWRITE, NULL);
3658                         break;
3659
3660                 default:
3661                         error = EINVAL;
3662         }
3663
3664         return error;
3665 }
3666
3667 /*
3668  * hfs_erase_unused_nodes
3669  *
3670  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3671  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3672  * zeroes to the unused nodes.
3673  *
3674  * How do we detect when a volume needs this repair?  We can't always be
3675  * certain.  If a volume was created after a certain date, then it may have
3676  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3677  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3678  * that means that the entire first clump must have been written to, which means
3679  * there shouldn't be unused and unwritten nodes in that first clump, and this
3680  * repair is not needed.
3681  *
3682  * We have defined a bit in the Volume Header's attributes to indicate when the
3683  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3684  * As will fsck_hfs when it repairs the unused nodes.
3685  */
3686 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3687 {
3688         int result;
3689         struct filefork *catalog;
3690         int lockflags;
3691
3692         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3693         {
3694                 /* This volume has already been checked and repaired. */
3695                 return 0;
3696         }
3697
3698         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3699         {
3700                 /* This volume is too old to have had the problem. */
3701                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3702                 return 0;
3703         }
3704
3705         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3706         if (catalog->ff_size > catalog->ff_clumpsize)
3707         {
3708                 /* The entire first clump must have been in use at some point. */
3709                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3710                 return 0;
3711         }
3712
3713         /*
3714          * If we get here, we need to zero out those unused nodes.
3715          *
3716          * We start a transaction and lock the catalog since we're going to be
3717          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3718          * do its writing via the journal, because that would be too much I/O
3719          * to fit in a transaction, and it's a pain to break it up into multiple
3720          * transactions.  (It behaves more like growing a B-tree would.)
3721          */
3722         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3723         result = hfs_start_transaction(hfsmp);
3724         if (result)
3725                 goto done;
3726         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3727         result = BTZeroUnusedNodes(catalog);
3728         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3729         hfs_systemfile_unlock(hfsmp, lockflags);
3730         hfs_end_transaction(hfsmp);
3731         if (result == 0)
3732                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3733         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3734
3735 done:
3736         return result;
3737 }
3738
3739
3740 int
3741 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3742 {
3743         int error;
3744
3745         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || decmpfs_cnode_cmp_type(VTOCMP(vp)) != DATALESS_CMPFS_TYPE) {
3746                 // there's nothing to do, it's not dataless
3747                 return 0;
3748         }
3749
3750         /* Swap files are special; ignore them */
3751         if (vnode_isswap(vp)) {
3752                 return 0;
3753         }
3754
3755         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3756         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3757         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3758                 error = 0;
3759         } else if (error) {
3760                 if (error == EAGAIN) {
3761                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3762                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3763                         return 0;
3764                 } else if (error == EINTR) {
3765                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3766                         return EINTR;
3767                 }
3768         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3769                 //
3770                 // if we're here, the dataless bit is still set on the file
3771                 // which means it didn't get handled.  we return an error
3772                 // but it's presently ignored by all callers of this function.
3773                 //
3774                 // XXXdbg - EDATANOTPRESENT is what we really need...
3775                 //
3776                 return EBADF;
3777         }
3778
3779         return error;
3780 }
3781
3782
3783 //
3784 // NOTE: this function takes care of starting a transaction and
3785 //       acquiring the systemfile lock so that it can call
3786 //       cat_update().
3787 //
3788 // NOTE: do NOT hold and cnode locks while calling this function
3789 //       to avoid deadlocks (because we take a lock on the root
3790 //       cnode)
3791 //
3792 int
3793 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3794 {
3795         struct vnode *rvp;
3796         struct cnode *cp;
3797         int error;
3798
3799         error = hfs_vfs_root(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3800         if (error) {
3801                 return error;
3802         }
3803
3804         cp = VTOC(rvp);
3805         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3806                 return error;
3807         }
3808         struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3809
3810         int lockflags;
3811         if ((error = hfs_start_transaction(hfsmp)) != 0) {
3812                 return error;
3813         }
3814         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3815
3816         if (extinfo->document_id == 0) {
3817                 // initialize this to start at 3 (one greater than the root-dir id)
3818                 extinfo->document_id = 3;
3819         }
3820
3821         *docid = extinfo->document_id++;
3822
3823         // mark the root cnode dirty
3824         cp->c_flag |= C_MODIFIED;
3825         hfs_update(cp->c_vp, 0);
3826
3827         hfs_systemfile_unlock (hfsmp, lockflags);
3828         (void) hfs_end_transaction(hfsmp);
3829
3830         (void) hfs_unlock(cp);
3831
3832         vnode_put(rvp);
3833         rvp = NULL;
3834
3835         return 0;
3836 }
3837
3838
3839 /*
3840  * Return information about number of file system allocation blocks
3841  * taken by metadata on a volume.
3842  *
3843  * This function populates struct hfsinfo_metadata with allocation blocks
3844  * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3845  * journal file, and sum of all of the above.
3846  */
3847 int
3848 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3849 {
3850         int lockflags = 0;
3851         int ret_lockflags = 0;
3852
3853         /* Zero out the output buffer */
3854         bzero(hinfo, sizeof(struct hfsinfo_metadata));
3855
3856         /*
3857          * Getting number of allocation blocks for all btrees
3858          * should be a quick operation, so we grab locks for
3859          * all of them at the same time
3860          */
3861         lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3862         ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3863         /*
3864          * Make sure that we were able to acquire all locks requested
3865          * to protect us against conditions like unmount in progress.
3866          */
3867         if ((lockflags & ret_lockflags) != lockflags) {
3868                 /* Release any locks that were acquired */
3869                 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3870                 return EPERM;
3871         }
3872
3873         /* Get information about all the btrees */
3874         hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3875         hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3876         hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3877         hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3878
3879         /* Done with btrees, give up the locks */
3880         hfs_systemfile_unlock(hfsmp, ret_lockflags);
3881
3882         /* Get information about journal file */
3883         hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3884
3885         /* Calculate total number of metadata blocks */
3886         hinfo->total = hinfo->extents + hinfo->catalog +
3887                         hinfo->allocation + hinfo->attribute +
3888                         hinfo->journal;
3889
3890         return 0;
3891 }
3892
3893 static int
3894 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3895 {
3896         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3897
3898         return 0;
3899 }
3900
3901 int hfs_freeze(struct hfsmount *hfsmp)
3902 {
3903         // First make sure some other process isn't freezing
3904         hfs_lock_mount(hfsmp);
3905         while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3906                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3907                                    PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3908                         hfs_unlock_mount(hfsmp);
3909                         return EINTR;
3910                 }
3911         }
3912
3913         // Stop new syncers from starting
3914         hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3915
3916         // Now wait for all syncers to finish
3917         while (hfsmp->hfs_syncers) {
3918                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3919                            PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3920                         hfs_thaw_locked(hfsmp);
3921                         hfs_unlock_mount(hfsmp);
3922                         return EINTR;
3923                 }
3924         }
3925         hfs_unlock_mount(hfsmp);
3926
3927         // flush things before we get started to try and prevent
3928         // dirty data from being paged out while we're frozen.
3929         // note: we can't do this once we're in the freezing state because
3930         // other threads will need to take the global lock
3931         vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3932
3933         // Block everything in hfs_lock_global now
3934         hfs_lock_mount(hfsmp);
3935         hfsmp->hfs_freeze_state = HFS_FREEZING;
3936         hfsmp->hfs_freezing_thread = current_thread();
3937         hfs_unlock_mount(hfsmp);
3938
3939         /* Take the exclusive lock to flush out anything else that
3940            might have the global lock at the moment and also so we
3941            can flush the journal. */
3942         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3943         journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
3944         hfs_unlock_global(hfsmp);
3945
3946         // don't need to iterate on all vnodes, we just need to
3947         // wait for writes to the system files and the device vnode
3948         //
3949         // Now that journal flush waits for all metadata blocks to
3950         // be written out, waiting for btree writes is probably no
3951         // longer required.
3952         if (HFSTOVCB(hfsmp)->extentsRefNum)
3953                 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3954         if (HFSTOVCB(hfsmp)->catalogRefNum)
3955                 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3956         if (HFSTOVCB(hfsmp)->allocationsRefNum)
3957                 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3958         if (hfsmp->hfs_attribute_vp)
3959                 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3960         vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3961
3962         // We're done, mark frozen
3963         hfs_lock_mount(hfsmp);
3964         hfsmp->hfs_freeze_state  = HFS_FROZEN;
3965         hfsmp->hfs_freezing_proc = current_proc();
3966         hfs_unlock_mount(hfsmp);
3967
3968         return 0;
3969 }
3970
3971 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
3972 {
3973         hfs_lock_mount(hfsmp);
3974
3975         if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
3976                 hfs_unlock_mount(hfsmp);
3977                 return EINVAL;
3978         }
3979         if (process && hfsmp->hfs_freezing_proc != process) {
3980                 hfs_unlock_mount(hfsmp);
3981                 return EPERM;
3982         }
3983
3984         hfs_thaw_locked(hfsmp);
3985
3986         hfs_unlock_mount(hfsmp);
3987
3988         return 0;
3989 }
3990
3991 static void hfs_thaw_locked(struct hfsmount *hfsmp)
3992 {
3993         hfsmp->hfs_freezing_proc = NULL;
3994         hfsmp->hfs_freeze_state = HFS_THAWED;
3995
3996         wakeup(&hfsmp->hfs_freeze_state);
3997 }
3998
3999 uintptr_t obfuscate_addr(void *addr)
4000 {
4001         vm_offset_t new_addr;
4002         vm_kernel_addrperm_external((vm_offset_t)addr, &new_addr);
4003         return new_addr;
4004 }
4005
4006 #if CONFIG_HFS_STD
4007 /*
4008  * Convert HFS encoded string into UTF-8
4009  *
4010  * Unicode output is fully decomposed
4011  * '/' chars are converted to ':'
4012  */
4013 int
4014 hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
4015 {
4016         int error;
4017         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4018         ItemCount uniCount;
4019         size_t utf8len;
4020         hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
4021         u_int8_t pascal_length = 0;
4022
4023         /*
4024          * Validate the length of the Pascal-style string before passing it
4025          * down to the decoding engine.
4026          */
4027         pascal_length = *((const u_int8_t*)(hfs_str));
4028         if (pascal_length > 31) {
4029                 /* invalid string; longer than 31 bytes */
4030                 error = EINVAL;
4031                 return error;
4032         }
4033
4034         error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
4035
4036         if (uniCount == 0)
4037                 error = EINVAL;
4038
4039         if (error == 0) {
4040                 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
4041                 if (error == ENAMETOOLONG)
4042                         *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
4043                 else
4044                         *actualDstLen = utf8len;
4045         }
4046
4047         return error;
4048 }
4049
4050 /*
4051  * Convert UTF-8 string into HFS encoding
4052  *
4053  * ':' chars are converted to '/'
4054  * Assumes input represents fully decomposed Unicode
4055  */
4056 int
4057 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
4058 {
4059         int error;
4060         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4061         size_t ucslen;
4062
4063         error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
4064         if (error == 0)
4065                 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
4066
4067         return error;
4068 }
4069
4070 /*
4071  * Convert Unicode string into HFS encoding
4072  *
4073  * ':' chars are converted to '/'
4074  * Assumes input represents fully decomposed Unicode
4075  */
4076 int
4077 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
4078 {
4079         int error;
4080         unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
4081
4082         error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
4083         if (error && retry) {
4084                 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
4085         }
4086         return error;
4087 }
4088
4089 #endif // CONFIG_HFS_STD
4090
4091 static uint64_t hfs_allocated __attribute__((aligned(8)));
4092
4093 #if HFS_MALLOC_DEBUG
4094
4095 #warning HFS_MALLOC_DEBUG is on
4096
4097 #include <libkern/OSDebug.h>
4098 #include "hfs_alloc_trace.h"
4099
4100 struct alloc_debug_header {
4101         uint32_t magic;
4102         uint32_t size;
4103         uint64_t sequence;
4104         LIST_ENTRY(alloc_debug_header) chain;
4105         void *backtrace[HFS_ALLOC_BACKTRACE_LEN];
4106 };
4107
4108 enum {
4109         HFS_ALLOC_MAGIC = 0x68667361,   // "hfsa"
4110         HFS_ALLOC_DEAD  = 0x68667364,   // "hfsd"
4111 };
4112
4113 static LIST_HEAD(, alloc_debug_header) hfs_alloc_list;
4114 static lck_mtx_t *hfs_alloc_mtx;
4115 static int hfs_alloc_tracing;
4116 static uint64_t hfs_alloc_sequence;
4117
4118 void hfs_alloc_trace_enable(void)
4119 {
4120         if (hfs_alloc_tracing)
4121                 return;
4122
4123         // Not thread-safe, but this is debug so who cares
4124         extern lck_grp_t *hfs_mutex_group;
4125         extern lck_attr_t *hfs_lock_attr;
4126
4127         if (!hfs_alloc_mtx) {
4128                 hfs_alloc_mtx = lck_mtx_alloc_init(hfs_mutex_group, hfs_lock_attr);
4129                 LIST_INIT(&hfs_alloc_list);
4130         }
4131
4132         // Using OSCompareAndSwap in lieu of a barrier
4133         OSCompareAndSwap(hfs_alloc_tracing, true, &hfs_alloc_tracing);
4134 }
4135
4136 void hfs_alloc_trace_disable(void)
4137 {
4138         if (!hfs_alloc_tracing)
4139                 return;
4140
4141         hfs_alloc_tracing = false;
4142
4143         lck_mtx_lock_spin(hfs_alloc_mtx);
4144
4145         struct alloc_debug_header *hdr;
4146         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4147                 hdr->chain.le_prev = NULL;
4148         }
4149         LIST_INIT(&hfs_alloc_list);
4150
4151         lck_mtx_unlock(hfs_alloc_mtx);
4152 }
4153
4154 static int hfs_handle_alloc_tracing SYSCTL_HANDLER_ARGS
4155 {
4156         int v = hfs_alloc_tracing;
4157
4158         int err = sysctl_handle_int(oidp, &v, 0, req);
4159         if (err || req->newptr == USER_ADDR_NULL || v == hfs_alloc_tracing)
4160                 return err;
4161
4162         if (v)
4163                 hfs_alloc_trace_enable();
4164         else
4165                 hfs_alloc_trace_disable();
4166
4167         return 0;
4168 }
4169
4170 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_tracing,
4171                    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0,
4172                    hfs_handle_alloc_tracing, "I", "Allocation tracing")
4173
4174 static int hfs_handle_alloc_trace_info SYSCTL_HANDLER_ARGS
4175 {
4176         if (!hfs_alloc_tracing) {
4177                 struct hfs_alloc_trace_info info = {};
4178                 return sysctl_handle_opaque(oidp, &info, sizeof(info), req);
4179         }
4180
4181         const int size = 128 * 1024;
4182         struct hfs_alloc_trace_info *info = kalloc(size);
4183
4184         const int max_entries = ((size - sizeof(*info))
4185                                                          / sizeof(struct hfs_alloc_info_entry));
4186
4187         info->entry_count = 0;
4188         info->more = false;
4189
4190         lck_mtx_lock_spin(hfs_alloc_mtx);
4191
4192         struct alloc_debug_header *hdr;
4193         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4194                 if (info->entry_count == max_entries) {
4195                         info->more = true;
4196                         break;
4197                 }
4198                 vm_offset_t o;
4199                 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4200                 info->entries[info->entry_count].ptr = o;
4201                 info->entries[info->entry_count].size = hdr->size;
4202                 info->entries[info->entry_count].sequence = hdr->sequence;
4203                 for (int i = 0; i < HFS_ALLOC_BACKTRACE_LEN; ++i) {
4204                         vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[i], &o);
4205                         info->entries[info->entry_count].backtrace[i] = o;
4206                 }
4207                 ++info->entry_count;
4208         }
4209
4210         lck_mtx_unlock(hfs_alloc_mtx);
4211
4212         int err = sysctl_handle_opaque(oidp, info,
4213                                                                    sizeof(*info) + info->entry_count
4214                                                                    * sizeof(struct hfs_alloc_info_entry),
4215                                                                    req);
4216
4217         kfree(info, size);
4218
4219         return err;
4220 }
4221
4222 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_trace_info,
4223                    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED, NULL, 0,
4224                    hfs_handle_alloc_trace_info, "-", "Allocation trace info")
4225
4226 bool hfs_dump_allocations(void)
4227 {
4228         if (!hfs_allocated)
4229                 return false;
4230
4231         lck_mtx_lock(hfs_alloc_mtx);
4232
4233         struct alloc_debug_header *hdr;
4234         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4235                 vm_offset_t o;
4236                 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4237                 printf(" -- 0x%lx:%llu <%u> --\n", o, hdr->sequence, hdr->size);
4238                 for (int j = 0; j < HFS_ALLOC_BACKTRACE_LEN && hdr->backtrace[j]; ++j) {
4239                         vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[j], &o);
4240                         printf("0x%lx\n", o);
4241                 }
4242         }
4243
4244         lck_mtx_unlock(hfs_alloc_mtx);
4245
4246         return true;
4247 }
4248
4249 #endif
4250
4251 HFS_SYSCTL(QUAD, _vfs_generic_hfs, OID_AUTO, allocated,
4252                    CTLFLAG_RD | CTLFLAG_LOCKED, &hfs_allocated, "Memory allocated")
4253
4254 // Any allocation >= PAGE_SIZE will be page aligned
4255 void *hfs_malloc(size_t size)
4256 {
4257 #if HFS_MALLOC_DEBUG
4258         hfs_assert(size <= 0xffffffff);
4259
4260         struct alloc_debug_header *hdr;
4261
4262         void *ptr;
4263
4264         if (size >= PAGE_SIZE)
4265                 ptr = IOMallocAligned(size + sizeof(*hdr), PAGE_SIZE);
4266         else
4267                 ptr = kalloc(size + sizeof(*hdr));
4268
4269         hdr = ptr + size;
4270
4271         hdr->magic = HFS_ALLOC_MAGIC;
4272         hdr->size = size;
4273
4274         if (hfs_alloc_tracing) {
4275                 OSBacktrace(hdr->backtrace, HFS_ALLOC_BACKTRACE_LEN);
4276                 lck_mtx_lock_spin(hfs_alloc_mtx);
4277                 LIST_INSERT_HEAD(&hfs_alloc_list, hdr, chain);
4278                 hdr->sequence = ++hfs_alloc_sequence;
4279                 lck_mtx_unlock(hfs_alloc_mtx);
4280         } else
4281                 hdr->chain.le_prev = NULL;
4282 #else
4283         void *ptr;
4284         if (size >= PAGE_SIZE)
4285                 ptr = IOMallocAligned(size, PAGE_SIZE);
4286         else
4287                 ptr = kalloc(size);
4288 #endif
4289
4290         OSAddAtomic64(size, &hfs_allocated);
4291
4292         return ptr;
4293 }
4294
4295 void hfs_free(void *ptr, size_t size)
4296 {
4297         if (!ptr)
4298                 return;
4299
4300         OSAddAtomic64(-(int64_t)size, &hfs_allocated);
4301
4302 #if HFS_MALLOC_DEBUG
4303         struct alloc_debug_header *hdr = ptr + size;
4304
4305         hfs_assert(hdr->magic == HFS_ALLOC_MAGIC);
4306         hfs_assert(hdr->size == size);
4307
4308         hdr->magic = HFS_ALLOC_DEAD;
4309
4310         if (hdr->chain.le_prev) {
4311                 lck_mtx_lock_spin(hfs_alloc_mtx);
4312                 LIST_REMOVE(hdr, chain);
4313                 lck_mtx_unlock(hfs_alloc_mtx);
4314         }
4315
4316         if (size >= PAGE_SIZE)
4317                 IOFreeAligned(ptr, size + sizeof(*hdr));
4318         else
4319                 kfree(ptr, size + sizeof(*hdr));
4320 #else
4321         if (size >= PAGE_SIZE)
4322                 IOFreeAligned(ptr, size);
4323         else
4324                 kfree(ptr, size);
4325 #endif
4326 }
4327
4328 void *hfs_mallocz(size_t size)
4329 {
4330         void *ptr = hfs_malloc(size);
4331         bzero(ptr, size);
4332         return ptr;
4333 }
4334
4335 struct hfs_sysctl_chain *sysctl_list;
4336
4337 void hfs_sysctl_register(void)
4338 {
4339         struct hfs_sysctl_chain *e = sysctl_list;
4340         while (e) {
4341                 sysctl_register_oid(e->oid);
4342                 e = e->next;
4343         }
4344 }
4345
4346 void hfs_sysctl_unregister(void)
4347 {
4348         struct hfs_sysctl_chain *e = sysctl_list;
4349         while (e) {
4350                 sysctl_unregister_oid(e->oid);
4351                 e = e->next;
4352         }
4353 }
4354
4355 void hfs_assert_fail(const char *file, unsigned line, const char *expr)
4356 {
4357         Assert(file, line, expr);
4358         __builtin_unreachable();
4359 }