core/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/vm.h>
  42 #include <sys/buf.h>
  43 #include <sys/ubc.h>
  44 #include <sys/unistd.h>
  45 #include <sys/utfconv.h>
  46 #include <sys/kauth.h>
  47 #include <sys/fcntl.h>
  48 #include <sys/fsctl.h>
  49 #include <sys/mount.h>
  50 #include <sys/sysctl.h>
  51 #include <kern/clock.h>
  52 #include <stdbool.h>
  53 #include <miscfs/specfs/specdev.h>
  54 #include <libkern/OSAtomic.h>
  55 #include <IOKit/IOLib.h>
  56
  57 /* for parsing boot-args */
  58 #include <pexpert/pexpert.h>
  59 #include <kern/kalloc.h>
  60 #include <kern/zalloc.h>
  61
  62 #include "hfs_iokit.h"
  63 #include "hfs.h"
  64 #include "hfs_catalog.h"
  65 #include "hfs_dbg.h"
  66 #include "hfs_mount.h"
  67 #include "hfs_endian.h"
  68 #include "hfs_cnode.h"
  69 #include "hfs_fsctl.h"
  70 #include "hfs_cprotect.h"
  71
  72 #include "FileMgrInternal.h"
  73 #include "BTreesInternal.h"
  74 #include "HFSUnicodeWrappers.h"
  75
  76 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
  77 extern int hfs_resize_debug;
  78
  79 static void ReleaseMetaFileVNode(struct vnode *vp);
  80 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  81
  82 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  83 static void hfs_thaw_locked(struct hfsmount *hfsmp);
  84
  85 #define HFS_MOUNT_DEBUG 1
  86
  87
  88 //*******************************************************************************
  89 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  90 //       hence are not in the right byte order on little endian machines. It is
  91 //       the responsibility of the finder and other clients to swap the data.
  92 //*******************************************************************************
  93
  94 //*******************************************************************************
  95 //      Routine:        hfs_MountHFSVolume
  96 //
  97 //
  98 //*******************************************************************************
  99 unsigned char hfs_catname[] = "Catalog B-tree";
 100 unsigned char hfs_extname[] = "Extents B-tree";
 101 unsigned char hfs_vbmname[] = "Volume Bitmap";
 102 unsigned char hfs_attrname[] = "Attribute B-tree";
 103 unsigned char hfs_startupname[] = "Startup File";
 104
 105 #if CONFIG_HFS_STD
 106 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 107                 __unused struct proc *p)
 108 {
 109         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 110         int error;
 111         ByteCount utf8chars;
 112         struct cat_desc cndesc;
 113         struct cat_attr cnattr;
 114         struct cat_fork fork;
 115         int newvnode_flags = 0;
 116
 117         /* Block size must be a multiple of 512 */
 118         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 119             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 120                 return (EINVAL);
 121
 122         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 123         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 124             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 125                 return (EINVAL);
 126         }
 127         hfsmp->hfs_flags |= HFS_STANDARD;
 128         /*
 129          * The MDB seems OK: transfer info from it into VCB
 130          * Note - the VCB starts out clear (all zeros)
 131          *
 132          */
 133         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 134         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 135         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 136         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 137         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 138         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 139         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 140         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 141         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 142         vcb->allocLimit         = vcb->totalBlocks;
 143         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 144         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 145         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 146         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 147         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 148         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 149         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 150         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 151         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 152         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 153         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 154         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 155                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 156
 157         /* convert hfs encoded name into UTF-8 string */
 158         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 159         /*
 160          * When an HFS name cannot be encoded with the current
 161          * volume encoding we use MacRoman as a fallback.
 162          */
 163         if (error || (utf8chars == 0)) {
 164                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 165                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 166                 if (error) {
 167                         goto MtVolErr;
 168                 }
 169         }
 170
 171         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 172         vcb->vcbVBMIOSize = kHFSBlockSize;
 173
 174         /* Generate the partition-based AVH location */
 175         hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 176                                                   hfsmp->hfs_logical_block_count);
 177
 178         /* HFS standard is read-only, so just stuff the FS location in here, too */
 179         hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 180
 181         bzero(&cndesc, sizeof(cndesc));
 182         cndesc.cd_parentcnid = kHFSRootParentID;
 183         cndesc.cd_flags |= CD_ISMETA;
 184         bzero(&cnattr, sizeof(cnattr));
 185         cnattr.ca_linkcount = 1;
 186         cnattr.ca_mode = S_IFREG;
 187         bzero(&fork, sizeof(fork));
 188
 189         /*
 190          * Set up Extents B-tree vnode
 191          */
 192         cndesc.cd_nameptr = hfs_extname;
 193         cndesc.cd_namelen = strlen((char *)hfs_extname);
 194         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 195         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 196         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 197         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 198         fork.cf_vblocks = 0;
 199         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 200         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 201         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 202         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 203         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 204         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 205         cnattr.ca_blocks = fork.cf_blocks;
 206
 207         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 208                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 209         if (error) {
 210                 if (HFS_MOUNT_DEBUG) {
 211                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 212                 }
 213                 goto MtVolErr;
 214         }
 215         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 216                                          (KeyCompareProcPtr)CompareExtentKeys));
 217         if (error) {
 218                 if (HFS_MOUNT_DEBUG) {
 219                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 220                 }
 221                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 222                 goto MtVolErr;
 223         }
 224         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 225
 226         /*
 227          * Set up Catalog B-tree vnode...
 228          */
 229         cndesc.cd_nameptr = hfs_catname;
 230         cndesc.cd_namelen = strlen((char *)hfs_catname);
 231         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 232         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 233         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 234         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 235         fork.cf_vblocks = 0;
 236         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 237         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 238         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 239         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 240         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 241         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 242         cnattr.ca_blocks = fork.cf_blocks;
 243
 244         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 245                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 246         if (error) {
 247                 if (HFS_MOUNT_DEBUG) {
 248                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 249                 }
 250                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 251                 goto MtVolErr;
 252         }
 253         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 254                                          (KeyCompareProcPtr)CompareCatalogKeys));
 255         if (error) {
 256                 if (HFS_MOUNT_DEBUG) {
 257                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 258                 }
 259                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 260                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 261                 goto MtVolErr;
 262         }
 263         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 264
 265         /*
 266          * Set up dummy Allocation file vnode (used only for locking bitmap)
 267          */
 268         cndesc.cd_nameptr = hfs_vbmname;
 269         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 270         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 271         bzero(&fork, sizeof(fork));
 272         cnattr.ca_blocks = 0;
 273
 274         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 275                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 276         if (error) {
 277                 if (HFS_MOUNT_DEBUG) {
 278                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 279                 }
 280                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 281                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 282                 goto MtVolErr;
 283         }
 284         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 285
 286         /* mark the volume dirty (clear clean unmount bit) */
 287         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 288
 289     if (error == noErr) {
 290                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 291                 if (HFS_MOUNT_DEBUG) {
 292                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 293                 }
 294         }
 295
 296     if (error == noErr) {
 297                 /* If the disk isn't write protected.. */
 298         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 299             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 300                 }
 301         }
 302
 303         /*
 304          * all done with system files so we can unlock now...
 305          */
 306         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 307         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 308         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 309
 310         if (error == noErr) {
 311                 /* If successful, then we can just return once we've unlocked the cnodes */
 312                 return error;
 313         }
 314
 315     //--        Release any resources allocated so far before exiting with an error:
 316 MtVolErr:
 317         hfsUnmount(hfsmp, NULL);
 318
 319     return (error);
 320 }
 321
 322 #endif
 323
 324 //*******************************************************************************
 325 //
 326 // Sanity check Volume Header Block:
 327 //              Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
 328 //              not been endian-swapped and represents the on-disk contents of this sector.
 329 //              This routine will not change the endianness of vhp block.
 330 //
 331 //*******************************************************************************
 332 OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
 333 {
 334         u_int16_t signature;
 335         u_int16_t hfs_version;
 336         u_int32_t blockSize;
 337
 338         signature = SWAP_BE16(vhp->signature);
 339         hfs_version = SWAP_BE16(vhp->version);
 340
 341         if (signature == kHFSPlusSigWord) {
 342                 if (hfs_version != kHFSPlusVersion) {
 343                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
 344                         return (EINVAL);
 345                 }
 346         } else if (signature == kHFSXSigWord) {
 347                 if (hfs_version != kHFSXVersion) {
 348                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
 349                         return (EINVAL);
 350                 }
 351         } else {
 352                 /* Removed printf for invalid HFS+ signature because it gives
 353                  * false error for UFS root volume
 354                  */
 355                 if (HFS_MOUNT_DEBUG) {
 356                         printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
 357                 }
 358                 return (EINVAL);
 359         }
 360
 361         /* Block size must be at least 512 and a power of 2 */
 362         blockSize = SWAP_BE32(vhp->blockSize);
 363         if (blockSize < 512 || !powerof2(blockSize)) {
 364                 if (HFS_MOUNT_DEBUG) {
 365                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
 366                 }
 367                 return (EINVAL);
 368         }
 369
 370         if (blockSize < hfsmp->hfs_logical_block_size) {
 371                 if (HFS_MOUNT_DEBUG) {
 372                         printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 373                                         blockSize, hfsmp->hfs_logical_block_size);
 374                 }
 375                 return (EINVAL);
 376         }
 377         return 0;
 378 }
 379
 380 //*******************************************************************************
 381 //      Routine:        hfs_MountHFSPlusVolume
 382 //
 383 //
 384 //*******************************************************************************
 385
 386 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 387         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 388 {
 389         register ExtendedVCB *vcb;
 390         struct cat_desc cndesc;
 391         struct cat_attr cnattr;
 392         struct cat_fork cfork;
 393         u_int32_t blockSize;
 394         daddr64_t spare_sectors;
 395         struct BTreeInfoRec btinfo;
 396         u_int16_t  signature;
 397         u_int16_t  hfs_version;
 398         int newvnode_flags = 0;
 399         int  i;
 400         OSErr retval;
 401         char converted_volname[256];
 402         size_t volname_length = 0;
 403         size_t conv_volname_length = 0;
 404
 405         signature = SWAP_BE16(vhp->signature);
 406         hfs_version = SWAP_BE16(vhp->version);
 407
 408         retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
 409         if (retval)
 410                 return retval;
 411
 412         if (signature == kHFSXSigWord) {
 413                 /* The in-memory signature is always 'H+'. */
 414                 signature = kHFSPlusSigWord;
 415                 hfsmp->hfs_flags |= HFS_X;
 416         }
 417
 418         blockSize = SWAP_BE32(vhp->blockSize);
 419         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 420         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 421             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 422                 if (HFS_MOUNT_DEBUG) {
 423                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 424                 }
 425                 return (EINVAL);
 426         }
 427
 428         /* Make sure we can live with the physical block size. */
 429         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 430             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
 431                 if (HFS_MOUNT_DEBUG) {
 432                         printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
 433                                         hfsmp->hfs_logical_block_size);
 434                 }
 435                 return (ENXIO);
 436         }
 437
 438         /*
 439          * If allocation block size is less than the physical block size,
 440          * same data could be cached in two places and leads to corruption.
 441          *
 442          * HFS Plus reserves one allocation block for the Volume Header.
 443          * If the physical size is larger, then when we read the volume header,
 444          * we will also end up reading in the next allocation block(s).
 445          * If those other allocation block(s) is/are modified, and then the volume
 446          * header is modified, the write of the volume header's buffer will write
 447          * out the old contents of the other allocation blocks.
 448          *
 449          * We assume that the physical block size is same as logical block size.
 450          * The physical block size value is used to round down the offsets for
 451          * reading and writing the primary and alternate volume headers.
 452          *
 453          * The same logic to ensure good hfs_physical_block_size is also in
 454          * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
 455          * later are doing the I/Os using same block size.
 456          */
 457         if (blockSize < hfsmp->hfs_physical_block_size) {
 458                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 459                 hfsmp->hfs_log_per_phys = 1;
 460         }
 461
 462         /*
 463          * The VolumeHeader seems OK: transfer info from it into VCB
 464          * Note - the VCB starts out clear (all zeros)
 465          */
 466         vcb = HFSTOVCB(hfsmp);
 467
 468         vcb->vcbSigWord = signature;
 469         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 470         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 471         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 472         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 473         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 474         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 475         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 476         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 477         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 478
 479         /* copy 32 bytes of Finder info */
 480         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 481
 482         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 483         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 484                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 485
 486         /* Now fill in the Extended VCB info */
 487         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 488         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 489         vcb->allocLimit         = vcb->totalBlocks;
 490         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 491         vcb->blockSize          = blockSize;
 492         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 493         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 494
 495         vcb->hfsPlusIOPosOffset = embeddedOffset;
 496
 497         /* Default to no free block reserve */
 498         vcb->reserveBlocks = 0;
 499
 500         /*
 501          * Update the logical block size in the mount struct
 502          * (currently set up from the wrapper MDB) using the
 503          * new blocksize value:
 504          */
 505         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 506         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 507
 508         /*
 509          * Validate and initialize the location of the alternate volume header.
 510          *
 511          * Note that there may be spare sectors beyond the end of the filesystem that still
 512          * belong to our partition.
 513          */
 514
 515         spare_sectors = hfsmp->hfs_logical_block_count -
 516                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 517                            hfsmp->hfs_logical_block_size);
 518
 519         /*
 520          * Differentiate between "innocuous" spare sectors and the more unusual
 521          * degenerate case:
 522          *
 523          * *** Innocuous spare sectors exist if:
 524          *
 525          * A) the number of bytes assigned to the partition (by multiplying logical
 526          * block size * logical block count) is greater than the filesystem size
 527          * (by multiplying allocation block count and allocation block size)
 528          *
 529          * and
 530          *
 531          * B) the remainder is less than the size of a full allocation block's worth of bytes.
 532          *
 533          * This handles the normal case where there may be a few extra sectors, but the two
 534          * are fundamentally in sync.
 535          *
 536          * *** Degenerate spare sectors exist if:
 537          * A) The number of bytes assigned to the partition (by multiplying logical
 538          * block size * logical block count) is greater than the filesystem size
 539          * (by multiplying allocation block count and block size).
 540          *
 541          * and
 542          *
 543          * B) the remainder is greater than a full allocation's block worth of bytes.
 544          * In this case,  a smaller file system exists in a larger partition.
 545          * This can happen in various ways, including when volume is resized but the
 546          * partition is yet to be resized.  Under this condition, we have to assume that
 547          * a partition management software may resize the partition to match
 548          * the file system size in the future.  Therefore we should update
 549          * alternate volume header at two locations on the disk,
 550          *   a. 1024 bytes before end of the partition
 551          *   b. 1024 bytes before end of the file system
 552          */
 553
 554         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 555                 /*
 556                  * Handle the degenerate case above. FS < partition size.
 557                  * AVH located at 1024 bytes from the end of the partition
 558                  */
 559                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 560                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 561
 562                 /* AVH located at 1024 bytes from the end of the filesystem */
 563                 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 564                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 565                                                 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
 566         }
 567         else {
 568                 /* Innocuous spare sectors; Partition & FS notion are in sync */
 569                 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 570                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 571
 572                 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
 573         }
 574         if (hfs_resize_debug) {
 575                 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 576                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 577         }
 578
 579         bzero(&cndesc, sizeof(cndesc));
 580         cndesc.cd_parentcnid = kHFSRootParentID;
 581         cndesc.cd_flags |= CD_ISMETA;
 582         bzero(&cnattr, sizeof(cnattr));
 583         cnattr.ca_linkcount = 1;
 584         cnattr.ca_mode = S_IFREG;
 585
 586         /*
 587          * Set up Extents B-tree vnode
 588          */
 589         cndesc.cd_nameptr = hfs_extname;
 590         cndesc.cd_namelen = strlen((char *)hfs_extname);
 591         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 592
 593         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 594         cfork.cf_new_size= 0;
 595         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 596         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 597         cfork.cf_vblocks = 0;
 598         cnattr.ca_blocks = cfork.cf_blocks;
 599         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 600                 cfork.cf_extents[i].startBlock =
 601                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 602                 cfork.cf_extents[i].blockCount =
 603                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 604         }
 605         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 606                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 607         if (retval)
 608         {
 609                 if (HFS_MOUNT_DEBUG) {
 610                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 611                 }
 612                 goto ErrorExit;
 613         }
 614
 615         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 616
 617         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 618                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 619
 620         hfs_unlock(hfsmp->hfs_extents_cp);
 621
 622         if (retval)
 623         {
 624                 if (HFS_MOUNT_DEBUG) {
 625                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 626                 }
 627                 goto ErrorExit;
 628         }
 629         /*
 630          * Set up Catalog B-tree vnode
 631          */
 632         cndesc.cd_nameptr = hfs_catname;
 633         cndesc.cd_namelen = strlen((char *)hfs_catname);
 634         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 635
 636         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 637         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 638         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 639         cfork.cf_vblocks = 0;
 640         cnattr.ca_blocks = cfork.cf_blocks;
 641         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 642                 cfork.cf_extents[i].startBlock =
 643                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 644                 cfork.cf_extents[i].blockCount =
 645                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 646         }
 647         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 648                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 649         if (retval) {
 650                 if (HFS_MOUNT_DEBUG) {
 651                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 652                 }
 653                 goto ErrorExit;
 654         }
 655         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 656
 657         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 658                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 659
 660         if (retval) {
 661                 if (HFS_MOUNT_DEBUG) {
 662                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 663                 }
 664                 hfs_unlock(hfsmp->hfs_catalog_cp);
 665                 goto ErrorExit;
 666         }
 667         if ((hfsmp->hfs_flags & HFS_X) &&
 668             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 669                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 670                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 671                         /* Install a case-sensitive key compare */
 672                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 673                                           (KeyCompareProcPtr)cat_binarykeycompare);
 674                 }
 675         }
 676
 677         hfs_unlock(hfsmp->hfs_catalog_cp);
 678
 679         /*
 680          * Set up Allocation file vnode
 681          */
 682         cndesc.cd_nameptr = hfs_vbmname;
 683         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 684         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 685
 686         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 687         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 688         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 689         cfork.cf_vblocks = 0;
 690         cnattr.ca_blocks = cfork.cf_blocks;
 691         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 692                 cfork.cf_extents[i].startBlock =
 693                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 694                 cfork.cf_extents[i].blockCount =
 695                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 696         }
 697         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 698                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 699         if (retval) {
 700                 if (HFS_MOUNT_DEBUG) {
 701                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 702                 }
 703                 goto ErrorExit;
 704         }
 705         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 706         hfs_unlock(hfsmp->hfs_allocation_cp);
 707
 708         /*
 709          * Set up Attribute B-tree vnode
 710          */
 711         if (vhp->attributesFile.totalBlocks != 0) {
 712                 cndesc.cd_nameptr = hfs_attrname;
 713                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 714                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 715
 716                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 717                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 718                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 719                 cfork.cf_vblocks = 0;
 720                 cnattr.ca_blocks = cfork.cf_blocks;
 721                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 722                         cfork.cf_extents[i].startBlock =
 723                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 724                         cfork.cf_extents[i].blockCount =
 725                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 726                 }
 727                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 728                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 729                 if (retval) {
 730                         if (HFS_MOUNT_DEBUG) {
 731                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 732                         }
 733                         goto ErrorExit;
 734                 }
 735                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 736                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 737                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 738                 hfs_unlock(hfsmp->hfs_attribute_cp);
 739                 if (retval) {
 740                         if (HFS_MOUNT_DEBUG) {
 741                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 742                         }
 743                         goto ErrorExit;
 744                 }
 745
 746                 /* Initialize vnode for virtual attribute data file that spans the
 747                  * entire file system space for performing I/O to attribute btree
 748                  * We hold iocount on the attrdata vnode for the entire duration
 749                  * of mount (similar to btree vnodes)
 750                  */
 751                 retval = init_attrdata_vnode(hfsmp);
 752                 if (retval) {
 753                         if (HFS_MOUNT_DEBUG) {
 754                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 755                         }
 756                         goto ErrorExit;
 757                 }
 758         }
 759
 760         /*
 761          * Set up Startup file vnode
 762          */
 763         if (vhp->startupFile.totalBlocks != 0) {
 764                 cndesc.cd_nameptr = hfs_startupname;
 765                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 766                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 767
 768                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 769                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 770                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 771                 cfork.cf_vblocks = 0;
 772                 cnattr.ca_blocks = cfork.cf_blocks;
 773                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 774                         cfork.cf_extents[i].startBlock =
 775                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 776                         cfork.cf_extents[i].blockCount =
 777                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 778                 }
 779                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 780                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 781                 if (retval) {
 782                         if (HFS_MOUNT_DEBUG) {
 783                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 784                         }
 785                         goto ErrorExit;
 786                 }
 787                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 788                 hfs_unlock(hfsmp->hfs_startup_cp);
 789         }
 790
 791         /*
 792          * Pick up volume name and create date
 793          *
 794          * Acquiring the volume name should not manipulate the bitmap, only the catalog
 795          * btree and possibly the extents overflow b-tree.
 796          */
 797         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 798         if (retval) {
 799                 if (HFS_MOUNT_DEBUG) {
 800                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 801                 }
 802                 goto ErrorExit;
 803         }
 804         vcb->hfs_itime = cnattr.ca_itime;
 805         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 806         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 807         volname_length = strlen ((const char*)vcb->vcbVN);
 808         cat_releasedesc(&cndesc);
 809
 810         /* Send the volume name down to CoreStorage if necessary */
 811         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 812         if (retval == 0) {
 813                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 814         }
 815
 816         /* reset retval == 0. we don't care about errors in volname conversion */
 817         retval = 0;
 818
 819         /*
 820          * pull in the volume UUID while we are still single-threaded.
 821          * This brings the volume UUID into the cached one dangling off of the HFSMP
 822          * Otherwise it would have to be computed on first access.
 823          */
 824         uuid_t throwaway;
 825         hfs_getvoluuid (hfsmp, throwaway);
 826
 827         /*
 828          * We now always initiate a full bitmap scan even if the volume is read-only because this is
 829          * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
 830          * expects. TRIMs will not be delivered to the underlying media if the volume is not
 831          * read-write though.
 832          */
 833         thread_t allocator_scanner;
 834         hfsmp->scan_var = 0;
 835
 836         /* Take the HFS mount mutex and wait on scan_var */
 837         hfs_lock_mount (hfsmp);
 838
 839         kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
 840
 841         /* Wait until it registers that it's got the appropriate locks (or that it is finished) */
 842         while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
 843                 msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_scan_blocks", 0);
 844         }
 845
 846         hfs_unlock_mount(hfsmp);
 847
 848         thread_deallocate (allocator_scanner);
 849
 850         /* mark the volume dirty (clear clean unmount bit) */
 851         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 852         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 853                 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
 854         }
 855
 856         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 857         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 858                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 859         }
 860
 861         //
 862         // Check if we need to do late journal initialization.  This only
 863         // happens if a previous version of MacOS X (or 9) touched the disk.
 864         // In that case hfs_late_journal_init() will go re-locate the journal
 865         // and journal_info_block files and validate that they're still kosher.
 866         //
 867         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 868                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 869                 && (hfsmp->jnl == NULL)) {
 870
 871                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 872                 if (retval != 0) {
 873                         if (retval == EROFS) {
 874                                 // EROFS is a special error code that means the volume has an external
 875                                 // journal which we couldn't find.  in that case we do not want to
 876                                 // rewrite the volume header - we'll just refuse to mount the volume.
 877                                 if (HFS_MOUNT_DEBUG) {
 878                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 879                                 }
 880                                 retval = EINVAL;
 881                                 goto ErrorExit;
 882                         }
 883
 884                         hfsmp->jnl = NULL;
 885
 886                         // if the journal failed to open, then set the lastMountedVersion
 887                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 888                         // of just bailing out because the volume is journaled.
 889                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 890                                 HFSPlusVolumeHeader *jvhp;
 891                                 daddr64_t mdb_offset;
 892                                 struct buf *bp = NULL;
 893
 894                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 895
 896                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 897
 898                                 bp = NULL;
 899                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 900                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 901                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 902                                 if (retval == 0) {
 903                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 904
 905                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 906                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 907                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 908                                                 buf_bwrite(bp);
 909                                         } else {
 910                                                 buf_brelse(bp);
 911                                         }
 912                                         bp = NULL;
 913                                 } else if (bp) {
 914                                         buf_brelse(bp);
 915                                         // clear this so the error exit path won't try to use it
 916                                         bp = NULL;
 917                             }
 918                         }
 919
 920                         if (HFS_MOUNT_DEBUG) {
 921                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 922                         }
 923                         retval = EINVAL;
 924                         goto ErrorExit;
 925                 } else if (hfsmp->jnl) {
 926                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 927                 }
 928         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 929                 struct cat_attr jinfo_attr, jnl_attr;
 930
 931                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 932                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 933                 }
 934
 935                 // if we're here we need to fill in the fileid's for the
 936                 // journal and journal_info_block.
 937                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 938                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 939                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 940                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 941                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 942                 }
 943
 944                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 945                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 946                 }
 947
 948                 if (hfsmp->jnl == NULL) {
 949                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 950                 }
 951         }
 952
 953         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 954         {
 955                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 956         }
 957
 958         if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
 959                 hfs_pin_fs_metadata(hfsmp);
 960         }
 961         /*
 962          * Distinguish 3 potential cases involving content protection:
 963          * 1. mount point bit set; vcbAtrb does not support it. Fail.
 964          * 2. mount point bit set; vcbattrb supports it. we're good.
 965          * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
 966          */
 967         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 968                 /* Does the mount point support it ? */
 969                 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
 970                         /* Case 1 above */
 971                         retval = EINVAL;
 972                         goto ErrorExit;
 973                 }
 974         }
 975         else {
 976                 /* not requested in the mount point. Is it in FS? */
 977                 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
 978                         /* Case 3 above */
 979                         vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
 980                 }
 981         }
 982
 983         /* At this point, if the mount point flag is set, we can enable it. */
 984         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 985                 /* Cases 2+3 above */
 986 #if CONFIG_PROTECT
 987                 /* Get the EAs as needed. */
 988                 int cperr = 0;
 989                 struct cp_root_xattr *xattr = NULL;
 990                 xattr = hfs_malloc(sizeof(*xattr));
 991
 992                 /* go get the EA to get the version information */
 993                 cperr = cp_getrootxattr (hfsmp, xattr);
 994                 /*
 995                  * If there was no EA there, then write one out.
 996                  * Assuming EA is not present on the root means
 997                  * this is an erase install or a very old FS
 998                  */
 999
1000                 if (cperr == 0) {
1001                         /* Have to run a valid CP version. */
1002                         if (!cp_is_supported_version(xattr->major_version)) {
1003                                 cperr = EINVAL;
1004                         }
1005                 }
1006                 else if (cperr == ENOATTR) {
1007                         printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
1008                         bzero(xattr, sizeof(struct cp_root_xattr));
1009                         xattr->major_version = CP_CURRENT_VERS;
1010                         xattr->minor_version = CP_MINOR_VERS;
1011                         cperr = cp_setrootxattr (hfsmp, xattr);
1012                 }
1013
1014                 if (cperr) {
1015                         hfs_free(xattr, sizeof(*xattr));
1016                         retval = EPERM;
1017                         goto ErrorExit;
1018                 }
1019
1020                 /* If we got here, then the CP version is valid. Set it in the mount point */
1021                 hfsmp->hfs_running_cp_major_vers = xattr->major_version;
1022                 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
1023                 hfsmp->cproot_flags = xattr->flags;
1024                 hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
1025 #if HFS_CONFIG_KEY_ROLL
1026                 hfsmp->hfs_auto_roll_min_key_os_version = xattr->auto_roll_min_version;
1027                 hfsmp->hfs_auto_roll_max_key_os_version = xattr->auto_roll_max_version;
1028 #endif
1029
1030                 hfs_free(xattr, sizeof(*xattr));
1031
1032                 /*
1033                  * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
1034                  * Ensure that the boot-arg's value is valid for FILES (not directories),
1035                  * since only files are actually protected for now.
1036                  */
1037
1038                 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1039
1040                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1041                         PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1042                 }
1043
1044 #if HFS_TMPDBG
1045 #if !SECURE_KERNEL
1046                 PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
1047 #endif
1048 #endif
1049
1050                 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1051                         hfsmp->default_cp_class = PROTECTION_CLASS_C;
1052                 }
1053
1054 #else
1055                 /* If CONFIG_PROTECT not built, ignore CP */
1056                 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1057 #endif
1058         }
1059
1060         /*
1061          * Establish a metadata allocation zone.
1062          */
1063         hfs_metadatazone_init(hfsmp, false);
1064
1065         /*
1066          * Make any metadata zone adjustments.
1067          */
1068         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1069                 /* Keep the roving allocator out of the metadata zone. */
1070                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1071                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1072                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1073                 }
1074         } else {
1075                 if (vcb->nextAllocation <= 1) {
1076                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1077                 }
1078         }
1079         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1080
1081         /* Setup private/hidden directories for hardlinks. */
1082         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1083         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1084
1085         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1086                 hfs_remove_orphans(hfsmp);
1087
1088         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1089         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1090         {
1091                 retval = hfs_erase_unused_nodes(hfsmp);
1092                 if (retval) {
1093                         if (HFS_MOUNT_DEBUG) {
1094                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1095                         }
1096
1097                         goto ErrorExit;
1098                 }
1099         }
1100
1101         /*
1102          * Allow hot file clustering if conditions allow.
1103          */
1104         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
1105             ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
1106                 //
1107                 // Wait until the bitmap scan completes before we initializes the
1108                 // hotfile area so that we do not run into any issues with the
1109                 // bitmap being read while hotfiles is initializing itself.  On
1110                 // some older/slower machines, without this interlock, the bitmap
1111                 // would sometimes get corrupted at boot time.
1112                 //
1113                 hfs_lock_mount(hfsmp);
1114                 while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
1115                         (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
1116                 }
1117                 hfs_unlock_mount(hfsmp);
1118
1119                 /*
1120                  * Note: at this point we are not allowed to fail the
1121                  *       mount operation because the HotFile init code
1122                  *       in hfs_recording_init() will lookup vnodes with
1123                  *       VNOP_LOOKUP() which hangs vnodes off the mount
1124                  *       (and if we were to fail, VFS is not prepared to
1125                  *       clean that up at this point.  Since HotFiles are
1126                  *       optional, this is not a big deal.
1127                  */
1128                 (void) hfs_recording_init(hfsmp);
1129         }
1130
1131         /* Force ACLs on HFS+ file systems. */
1132         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1133
1134         /* Enable extent-based extended attributes by default */
1135         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1136
1137         return (0);
1138
1139 ErrorExit:
1140         /*
1141          * A fatal error occurred and the volume cannot be mounted, so
1142          * release any resources that we acquired...
1143          */
1144         hfsUnmount(hfsmp, NULL);
1145
1146         if (HFS_MOUNT_DEBUG) {
1147                 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1148         }
1149         return (retval);
1150 }
1151
1152 static int
1153 _pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
1154 {
1155         int err;
1156
1157         err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
1158         if (err == 0) {
1159                 err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL);
1160                 hfs_unlock(VTOC(vp));
1161         }
1162
1163         return err;
1164 }
1165
1166 void
1167 hfs_pin_fs_metadata(struct hfsmount *hfsmp)
1168 {
1169         ExtendedVCB *vcb;
1170         int err;
1171
1172         vcb = HFSTOVCB(hfsmp);
1173
1174         err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
1175         if (err != 0) {
1176                 printf("hfs: failed to pin extents overflow file %d\n", err);
1177         }
1178         err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
1179         if (err != 0) {
1180                 printf("hfs: failed to pin catalog file %d\n", err);
1181         }
1182         err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
1183         if (err != 0) {
1184                 printf("hfs: failed to pin bitmap file %d\n", err);
1185         }
1186         err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
1187         if (err != 0) {
1188                 printf("hfs: failed to pin extended attr file %d\n", err);
1189         }
1190
1191         hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1);
1192         hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1);
1193
1194         if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
1195                 // and hey, if we've got a journal, let's pin that too!
1196                 hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize));
1197         }
1198 }
1199
1200 /*
1201  * ReleaseMetaFileVNode
1202  *
1203  * vp   L - -
1204  */
1205 static void ReleaseMetaFileVNode(struct vnode *vp)
1206 {
1207         struct filefork *fp;
1208
1209         if (vp && (fp = VTOF(vp))) {
1210                 if (fp->fcbBTCBPtr != NULL) {
1211                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1212                         (void) BTClosePath(fp);
1213                         hfs_unlock(VTOC(vp));
1214                 }
1215
1216                 /* release the node even if BTClosePath fails */
1217                 vnode_recycle(vp);
1218                 vnode_put(vp);
1219         }
1220 }
1221
1222
1223 /*************************************************************
1224 *
1225 * Unmounts a hfs volume.
1226 *       At this point vflush() has been called (to dump all non-metadata files)
1227 *
1228 *************************************************************/
1229
1230 int
1231 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1232 {
1233         /* Get rid of our attribute data vnode (if any).  This is done
1234          * after the vflush() during mount, so we don't need to worry
1235          * about any locks.
1236          */
1237         if (hfsmp->hfs_attrdata_vp) {
1238                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1239                 hfsmp->hfs_attrdata_vp = NULLVP;
1240         }
1241
1242         if (hfsmp->hfs_startup_vp) {
1243                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1244                 hfsmp->hfs_startup_cp = NULL;
1245                 hfsmp->hfs_startup_vp = NULL;
1246         }
1247
1248         if (hfsmp->hfs_attribute_vp) {
1249                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1250                 hfsmp->hfs_attribute_cp = NULL;
1251                 hfsmp->hfs_attribute_vp = NULL;
1252         }
1253
1254         if (hfsmp->hfs_catalog_vp) {
1255                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1256                 hfsmp->hfs_catalog_cp = NULL;
1257                 hfsmp->hfs_catalog_vp = NULL;
1258         }
1259
1260         if (hfsmp->hfs_extents_vp) {
1261                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1262                 hfsmp->hfs_extents_cp = NULL;
1263                 hfsmp->hfs_extents_vp = NULL;
1264         }
1265
1266         if (hfsmp->hfs_allocation_vp) {
1267                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1268                 hfsmp->hfs_allocation_cp = NULL;
1269                 hfsmp->hfs_allocation_vp = NULL;
1270         }
1271
1272         return (0);
1273 }
1274
1275
1276 /*
1277  * Test if fork has overflow extents.
1278  *
1279  * Returns:
1280  *      non-zero - overflow extents exist
1281  *      zero     - overflow extents do not exist
1282  */
1283 bool overflow_extents(struct filefork *fp)
1284 {
1285         u_int32_t blocks;
1286
1287         //
1288         // If the vnode pointer is NULL then we're being called
1289         // from hfs_remove_orphans() with a faked-up filefork
1290         // and therefore it has to be an HFS+ volume.  Otherwise
1291         // we check through the volume header to see what type
1292         // of volume we're on.
1293         //
1294
1295 #if CONFIG_HFS_STD
1296         if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1297                 if (fp->ff_extents[2].blockCount == 0)
1298                         return false;
1299
1300                 blocks = fp->ff_extents[0].blockCount +
1301                         fp->ff_extents[1].blockCount +
1302                         fp->ff_extents[2].blockCount;
1303
1304                 return fp->ff_blocks > blocks;
1305         }
1306 #endif
1307
1308         if (fp->ff_extents[7].blockCount == 0)
1309                 return false;
1310
1311         blocks = fp->ff_extents[0].blockCount +
1312                 fp->ff_extents[1].blockCount +
1313                 fp->ff_extents[2].blockCount +
1314                 fp->ff_extents[3].blockCount +
1315                 fp->ff_extents[4].blockCount +
1316                 fp->ff_extents[5].blockCount +
1317                 fp->ff_extents[6].blockCount +
1318                 fp->ff_extents[7].blockCount;
1319
1320         return fp->ff_blocks > blocks;
1321 }
1322
1323 static __attribute__((pure))
1324 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1325 {
1326         return (hfsmp->hfs_freeze_state == HFS_FROZEN
1327                         || (hfsmp->hfs_freeze_state == HFS_FREEZING
1328                                 && current_thread() != hfsmp->hfs_freezing_thread));
1329 }
1330
1331 /*
1332  * Lock the HFS global journal lock
1333  */
1334 int
1335 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1336 {
1337         thread_t thread = current_thread();
1338
1339         if (hfsmp->hfs_global_lockowner == thread) {
1340                 panic ("hfs_lock_global: locking against myself!");
1341         }
1342
1343         /*
1344          * This check isn't really necessary but this stops us taking
1345          * the mount lock in most cases.  The essential check is below.
1346          */
1347         if (hfs_is_frozen(hfsmp)) {
1348                 /*
1349                  * Unfortunately, there is no easy way of getting a notification
1350                  * for when a process is exiting and it's possible for the exiting
1351                  * process to get blocked somewhere else.  To catch this, we
1352                  * periodically monitor the frozen process here and thaw if
1353                  * we spot that it's exiting.
1354                  */
1355 frozen:
1356                 hfs_lock_mount(hfsmp);
1357
1358                 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1359
1360                 while (hfs_is_frozen(hfsmp)) {
1361                         if (hfsmp->hfs_freeze_state == HFS_FROZEN
1362                                 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1363                                 hfs_thaw_locked(hfsmp);
1364                                 break;
1365                         }
1366
1367                         msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1368                                PWAIT, "hfs_lock_global (frozen)", &ts);
1369                 }
1370                 hfs_unlock_mount(hfsmp);
1371         }
1372
1373         /* HFS_SHARED_LOCK */
1374         if (locktype == HFS_SHARED_LOCK) {
1375                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1376                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1377         }
1378         /* HFS_EXCLUSIVE_LOCK */
1379         else {
1380                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1381                 hfsmp->hfs_global_lockowner = thread;
1382         }
1383
1384         /*
1385          * We have to check if we're frozen again because of the time
1386          * between when we checked and when we took the global lock.
1387          */
1388         if (hfs_is_frozen(hfsmp)) {
1389                 hfs_unlock_global(hfsmp);
1390                 goto frozen;
1391         }
1392
1393         return 0;
1394 }
1395
1396
1397 /*
1398  * Unlock the HFS global journal lock
1399  */
1400 void
1401 hfs_unlock_global (struct hfsmount *hfsmp)
1402 {
1403         thread_t thread = current_thread();
1404
1405         /* HFS_LOCK_EXCLUSIVE */
1406         if (hfsmp->hfs_global_lockowner == thread) {
1407                 hfsmp->hfs_global_lockowner = NULL;
1408                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1409         }
1410         /* HFS_LOCK_SHARED */
1411         else {
1412                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1413         }
1414 }
1415
1416 /*
1417  * Lock the HFS mount lock
1418  *
1419  * Note: this is a mutex, not a rw lock!
1420  */
1421 inline
1422 void hfs_lock_mount (struct hfsmount *hfsmp) {
1423         lck_mtx_lock (&(hfsmp->hfs_mutex));
1424 }
1425
1426 /*
1427  * Unlock the HFS mount lock
1428  *
1429  * Note: this is a mutex, not a rw lock!
1430  */
1431 inline
1432 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1433         lck_mtx_unlock (&(hfsmp->hfs_mutex));
1434 }
1435
1436 /*
1437  * Lock HFS system file(s).
1438  *
1439  * This function accepts a @flags parameter which indicates which
1440  * system file locks are required.  The value it returns should be
1441  * used in a subsequent call to hfs_systemfile_unlock.  The caller
1442  * should treat this value as opaque; it may or may not have a
1443  * relation to the @flags field that is passed in.  The *only*
1444  * guarantee that we make is that a value of zero means that no locks
1445  * were taken and that there is no need to call hfs_systemfile_unlock
1446  * (although it is harmless to do so).  Recursion is supported but
1447  * care must still be taken to ensure correct lock ordering.  Note
1448  * that requests for certain locks may cause other locks to also be
1449  * taken, including locks that are not possible to ask for via the
1450  * @flags parameter.
1451  */
1452 int
1453 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1454 {
1455         /*
1456          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1457          */
1458         if (flags & SFL_CATALOG) {
1459                 if (hfsmp->hfs_catalog_cp
1460                         && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
1461 #ifdef HFS_CHECK_LOCK_ORDER
1462                         if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1463                                 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1464                         }
1465                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1466                                 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1467                         }
1468                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1469                                 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1470                         }
1471 #endif /* HFS_CHECK_LOCK_ORDER */
1472
1473                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1474                         /*
1475                          * When the catalog file has overflow extents then
1476                          * also acquire the extents b-tree lock if its not
1477                          * already requested.
1478                          */
1479                         if (((flags & SFL_EXTENTS) == 0) &&
1480                             (hfsmp->hfs_catalog_vp != NULL) &&
1481                             (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1482                                 flags |= SFL_EXTENTS;
1483                         }
1484                 } else {
1485                         flags &= ~SFL_CATALOG;
1486                 }
1487         }
1488
1489         if (flags & SFL_ATTRIBUTE) {
1490                 if (hfsmp->hfs_attribute_cp
1491                         && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
1492 #ifdef HFS_CHECK_LOCK_ORDER
1493                         if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1494                                 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1495                         }
1496                         if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1497                                 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1498                         }
1499 #endif /* HFS_CHECK_LOCK_ORDER */
1500
1501                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1502                         /*
1503                          * When the attribute file has overflow extents then
1504                          * also acquire the extents b-tree lock if its not
1505                          * already requested.
1506                          */
1507                         if (((flags & SFL_EXTENTS) == 0) &&
1508                             (hfsmp->hfs_attribute_vp != NULL) &&
1509                             (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1510                                 flags |= SFL_EXTENTS;
1511                         }
1512                 } else {
1513                         flags &= ~SFL_ATTRIBUTE;
1514                 }
1515         }
1516
1517         if (flags & SFL_STARTUP) {
1518                 if (hfsmp->hfs_startup_cp
1519                         && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
1520 #ifdef HFS_CHECK_LOCK_ORDER
1521                         if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1522                                 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1523                         }
1524 #endif /* HFS_CHECK_LOCK_ORDER */
1525
1526                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1527                         /*
1528                          * When the startup file has overflow extents then
1529                          * also acquire the extents b-tree lock if its not
1530                          * already requested.
1531                          */
1532                         if (((flags & SFL_EXTENTS) == 0) &&
1533                             (hfsmp->hfs_startup_vp != NULL) &&
1534                             (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1535                                 flags |= SFL_EXTENTS;
1536                         }
1537                 } else {
1538                         flags &= ~SFL_STARTUP;
1539                 }
1540         }
1541
1542         /*
1543          * To prevent locks being taken in the wrong order, the extent lock
1544          * gets a bitmap lock as well.
1545          */
1546         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1547                 if (hfsmp->hfs_allocation_cp) {
1548                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1549                         /*
1550                          * The bitmap lock is also grabbed when only extent lock
1551                          * was requested. Set the bitmap lock bit in the lock
1552                          * flags which callers will use during unlock.
1553                          */
1554                         flags |= SFL_BITMAP;
1555                 } else {
1556                         flags &= ~SFL_BITMAP;
1557                 }
1558         }
1559
1560         if (flags & SFL_EXTENTS) {
1561                 /*
1562                  * Since the extents btree lock is recursive we always
1563                  * need exclusive access.
1564                  */
1565                 if (hfsmp->hfs_extents_cp) {
1566                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1567
1568                         if (vfs_isswapmount(hfsmp->hfs_mp)) {
1569                                 /*
1570                                  * because we may need this lock on the pageout path (if a swapfile allocation
1571                                  * spills into the extents overflow tree), we will grant the holder of this
1572                                  * lock the privilege of dipping into the reserve free pool in order to prevent
1573                                  * a deadlock from occurring if we need those pageouts to complete before we
1574                                  * will make any new pages available on the free list... the deadlock can occur
1575                                  * if this thread needs to allocate memory while this lock is held
1576                                  */
1577                                 if (set_vm_privilege(TRUE) == FALSE) {
1578                                         /*
1579                                          * indicate that we need to drop vm_privilege
1580                                          * when we unlock
1581                                          */
1582                                         flags |= SFL_VM_PRIV;
1583                                 }
1584                         }
1585                 } else {
1586                         flags &= ~SFL_EXTENTS;
1587                 }
1588         }
1589
1590         return (flags);
1591 }
1592
1593 /*
1594  * unlock HFS system file(s).
1595  */
1596 void
1597 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1598 {
1599         if (!flags)
1600                 return;
1601
1602         struct timeval tv;
1603         u_int32_t lastfsync;
1604         int numOfLockedBuffs;
1605
1606         if (hfsmp->jnl == NULL) {
1607                 microuptime(&tv);
1608                 lastfsync = tv.tv_sec;
1609         }
1610         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1611                 hfs_unlock(hfsmp->hfs_startup_cp);
1612         }
1613         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1614                 if (hfsmp->jnl == NULL) {
1615                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1616                         numOfLockedBuffs = count_lock_queue();
1617                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1618                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1619                               kMaxSecsForFsync))) {
1620                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1621                         }
1622                 }
1623                 hfs_unlock(hfsmp->hfs_attribute_cp);
1624         }
1625         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1626                 if (hfsmp->jnl == NULL) {
1627                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1628                         numOfLockedBuffs = count_lock_queue();
1629                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1630                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1631                               kMaxSecsForFsync))) {
1632                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1633                         }
1634                 }
1635                 hfs_unlock(hfsmp->hfs_catalog_cp);
1636         }
1637         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1638                 hfs_unlock(hfsmp->hfs_allocation_cp);
1639         }
1640         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1641                 if (hfsmp->jnl == NULL) {
1642                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1643                         numOfLockedBuffs = count_lock_queue();
1644                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1645                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1646                               kMaxSecsForFsync))) {
1647                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1648                         }
1649                 }
1650                 hfs_unlock(hfsmp->hfs_extents_cp);
1651
1652                 if (flags & SFL_VM_PRIV) {
1653                         /*
1654                          * revoke the vm_privilege we granted this thread
1655                          * now that we have unlocked the overflow extents
1656                          */
1657                         set_vm_privilege(FALSE);
1658                 }
1659         }
1660 }
1661
1662
1663 /*
1664  * RequireFileLock
1665  *
1666  * Check to see if a vnode is locked in the current context
1667  * This is to be used for debugging purposes only!!
1668  */
1669 #if DEBUG
1670 void RequireFileLock(FileReference vp, int shareable)
1671 {
1672         int locked;
1673
1674         /* The extents btree and allocation bitmap are always exclusive. */
1675         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1676             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1677                 shareable = 0;
1678         }
1679
1680         locked = VTOC(vp)->c_lockowner == current_thread();
1681
1682         if (!locked && !shareable) {
1683                 switch (VTOC(vp)->c_fileid) {
1684                 case kHFSExtentsFileID:
1685                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1686                         break;
1687                 case kHFSCatalogFileID:
1688                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1689                         break;
1690                 case kHFSAllocationFileID:
1691                         /* The allocation file can hide behind the jornal lock. */
1692                         if (VTOHFS(vp)->jnl == NULL)
1693                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1694                         break;
1695                 case kHFSStartupFileID:
1696                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1697                 case kHFSAttributesFileID:
1698                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1699                         break;
1700                 }
1701         }
1702 }
1703 #endif // DEBUG
1704
1705
1706 /*
1707  * There are three ways to qualify for ownership rights on an object:
1708  *
1709  * 1. (a) Your UID matches the cnode's UID.
1710  *    (b) The object in question is owned by "unknown"
1711  * 2. (a) Permissions on the filesystem are being ignored and
1712  *        your UID matches the replacement UID.
1713  *    (b) Permissions on the filesystem are being ignored and
1714  *        the replacement UID is "unknown".
1715  * 3. You are root.
1716  *
1717  */
1718 int
1719 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1720                 __unused struct proc *p, int invokesuperuserstatus)
1721 {
1722         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1723             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1724             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1725               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1726                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1727             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1728                 return (0);
1729         } else {
1730                 return (EPERM);
1731         }
1732 }
1733
1734
1735 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1736                                u_int32_t blockSizeLimit,
1737                                u_int32_t baseMultiple) {
1738     /*
1739        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1740        specified limit but still an even multiple of the baseMultiple.
1741      */
1742     int baseBlockCount, blockCount;
1743     u_int32_t trialBlockSize;
1744
1745     if (allocationBlockSize % baseMultiple != 0) {
1746         /*
1747            Whoops: the allocation blocks aren't even multiples of the specified base:
1748            no amount of dividing them into even parts will be a multiple, either then!
1749         */
1750         return 512;             /* Hope for the best */
1751     };
1752
1753     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1754        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1755        Even though the former (the result of the loop below) is the larger allocation
1756        block size, the latter is more efficient: */
1757     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1758
1759     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1760     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1761
1762     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1763         trialBlockSize = blockCount * baseMultiple;
1764         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1765             if ((trialBlockSize <= blockSizeLimit) &&
1766                 (trialBlockSize % baseMultiple == 0)) {
1767                 return trialBlockSize;
1768             };
1769         };
1770     };
1771
1772     /* Note: we should never get here, since blockCount = 1 should always work,
1773        but this is nice and safe and makes the compiler happy, too ... */
1774     return 512;
1775 }
1776
1777
1778 u_int32_t
1779 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1780                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1781 {
1782         struct hfsmount * hfsmp;
1783         struct cat_desc jdesc;
1784         int lockflags;
1785         int error;
1786
1787         if (vcb->vcbSigWord != kHFSPlusSigWord)
1788                 return (0);
1789
1790         hfsmp = VCBTOHFS(vcb);
1791
1792         memset(&jdesc, 0, sizeof(struct cat_desc));
1793         jdesc.cd_parentcnid = kRootDirID;
1794         jdesc.cd_nameptr = (const u_int8_t *)name;
1795         jdesc.cd_namelen = strlen(name);
1796
1797         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1798         error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1799         hfs_systemfile_unlock(hfsmp, lockflags);
1800
1801         if (error == 0) {
1802                 return (fattr->ca_fileid);
1803         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1804                 return (0);
1805         }
1806
1807         return (0);     /* XXX what callers expect on an error */
1808 }
1809
1810
1811 /*
1812  * On HFS Plus Volumes, there can be orphaned files or directories
1813  * These are files or directories that were unlinked while busy.
1814  * If the volume was not cleanly unmounted then some of these may
1815  * have persisted and need to be removed.
1816  */
1817 void
1818 hfs_remove_orphans(struct hfsmount * hfsmp)
1819 {
1820         struct BTreeIterator * iterator = NULL;
1821         struct FSBufferDescriptor btdata;
1822         struct HFSPlusCatalogFile filerec;
1823         struct HFSPlusCatalogKey * keyp;
1824         struct proc *p = current_proc();
1825         FCB *fcb;
1826         ExtendedVCB *vcb;
1827         char filename[32];
1828         char tempname[32];
1829         size_t namelen;
1830         cat_cookie_t cookie;
1831         int catlock = 0;
1832         int catreserve = 0;
1833         bool started_tr = false;
1834         int lockflags;
1835         int result;
1836         int orphaned_files = 0;
1837         int orphaned_dirs = 0;
1838
1839         bzero(&cookie, sizeof(cookie));
1840
1841         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1842                 return;
1843
1844         vcb = HFSTOVCB(hfsmp);
1845         fcb = VTOF(hfsmp->hfs_catalog_vp);
1846
1847         btdata.bufferAddress = &filerec;
1848         btdata.itemSize = sizeof(filerec);
1849         btdata.itemCount = 1;
1850
1851         iterator = hfs_mallocz(sizeof(*iterator));
1852
1853         /* Build a key to "temp" */
1854         keyp = (HFSPlusCatalogKey*)&iterator->key;
1855         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1856         keyp->nodeName.length = 4;  /* "temp" */
1857         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1858         keyp->nodeName.unicode[0] = 't';
1859         keyp->nodeName.unicode[1] = 'e';
1860         keyp->nodeName.unicode[2] = 'm';
1861         keyp->nodeName.unicode[3] = 'p';
1862
1863         /*
1864          * Position the iterator just before the first real temp file/dir.
1865          */
1866         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1867         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1868         hfs_systemfile_unlock(hfsmp, lockflags);
1869
1870         /* Visit all the temp files/dirs in the HFS+ private directory. */
1871         for (;;) {
1872                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1873                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1874                 hfs_systemfile_unlock(hfsmp, lockflags);
1875                 if (result)
1876                         break;
1877                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1878                         break;
1879
1880                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1881                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1882
1883                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1884                                 HFS_DELETE_PREFIX, filerec.fileID);
1885
1886                 /*
1887                  * Delete all files (and directories) named "tempxxx",
1888                  * where xxx is the file's cnid in decimal.
1889                  *
1890                  */
1891                 if (bcmp(tempname, filename, namelen + 1) != 0)
1892                         continue;
1893
1894                 struct filefork dfork;
1895                 struct filefork rfork;
1896                 struct cnode cnode;
1897                 int mode = 0;
1898
1899                 bzero(&dfork, sizeof(dfork));
1900                 bzero(&rfork, sizeof(rfork));
1901                 bzero(&cnode, sizeof(cnode));
1902
1903                 if (hfs_start_transaction(hfsmp) != 0) {
1904                         printf("hfs_remove_orphans: failed to start transaction\n");
1905                         goto exit;
1906                 }
1907                 started_tr = true;
1908
1909                 /*
1910                  * Reserve some space in the Catalog file.
1911                  */
1912                 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1913                         printf("hfs_remove_orphans: cat_preflight failed\n");
1914                         goto exit;
1915                 }
1916                 catreserve = 1;
1917
1918                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1919                 catlock = 1;
1920
1921                 /* Build a fake cnode */
1922                 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1923                                                 &dfork.ff_data, &rfork.ff_data);
1924                 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1925                 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1926                 cnode.c_desc.cd_namelen = namelen;
1927                 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1928                 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1929
1930                 /* Position iterator at previous entry */
1931                 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1932                                                         NULL, NULL) != 0) {
1933                         break;
1934                 }
1935
1936                 /* Truncate the file to zero (both forks) */
1937                 if (dfork.ff_blocks > 0) {
1938                         u_int64_t fsize;
1939
1940                         dfork.ff_cp = &cnode;
1941                         cnode.c_datafork = &dfork;
1942                         cnode.c_rsrcfork = NULL;
1943                         fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1944                         while (fsize > 0) {
1945                                 if (fsize > HFS_BIGFILE_SIZE) {
1946                                         fsize -= HFS_BIGFILE_SIZE;
1947                                 } else {
1948                                         fsize = 0;
1949                                 }
1950
1951                                 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1952                                                                   cnode.c_attr.ca_fileid, false) != 0) {
1953                                         printf("hfs: error truncating data fork!\n");
1954                                         break;
1955                                 }
1956
1957                                 //
1958                                 // if we're iteratively truncating this file down,
1959                                 // then end the transaction and start a new one so
1960                                 // that no one transaction gets too big.
1961                                 //
1962                                 if (fsize > 0) {
1963                                         /* Drop system file locks before starting
1964                                          * another transaction to preserve lock order.
1965                                          */
1966                                         hfs_systemfile_unlock(hfsmp, lockflags);
1967                                         catlock = 0;
1968                                         hfs_end_transaction(hfsmp);
1969
1970                                         if (hfs_start_transaction(hfsmp) != 0) {
1971                                                 started_tr = false;
1972                                                 goto exit;
1973                                         }
1974                                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1975                                         catlock = 1;
1976                                 }
1977                         }
1978                 }
1979
1980                 if (rfork.ff_blocks > 0) {
1981                         rfork.ff_cp = &cnode;
1982                         cnode.c_datafork = NULL;
1983                         cnode.c_rsrcfork = &rfork;
1984                         if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1985                                 printf("hfs: error truncating rsrc fork!\n");
1986                                 break;
1987                         }
1988                 }
1989
1990                 // Deal with extended attributes
1991                 if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
1992                         // hfs_removeallattr uses its own transactions
1993                         hfs_systemfile_unlock(hfsmp, lockflags);
1994                         catlock = false;
1995                         hfs_end_transaction(hfsmp);
1996
1997                         hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
1998
1999                         if (!started_tr) {
2000                                 if (hfs_start_transaction(hfsmp) != 0) {
2001                                         printf("hfs_remove_orphans: failed to start transaction\n");
2002                                         goto exit;
2003                                 }
2004                                 started_tr = true;
2005                         }
2006
2007                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2008                         catlock = 1;
2009                 }
2010
2011                 /* Remove the file or folder record from the Catalog */
2012                 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
2013                         printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
2014                         hfs_systemfile_unlock(hfsmp, lockflags);
2015                         catlock = 0;
2016                         hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2017                         break;
2018                 }
2019
2020                 mode = cnode.c_attr.ca_mode & S_IFMT;
2021
2022                 if (mode == S_IFDIR) {
2023                         orphaned_dirs++;
2024                 }
2025                 else {
2026                         orphaned_files++;
2027                 }
2028
2029                 /* Update parent and volume counts */
2030                 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
2031                 if (mode == S_IFDIR) {
2032                         DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
2033                 }
2034
2035                 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
2036                                                  &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
2037
2038                 /* Drop locks and end the transaction */
2039                 hfs_systemfile_unlock(hfsmp, lockflags);
2040                 cat_postflight(hfsmp, &cookie, p);
2041                 catlock = catreserve = 0;
2042
2043                 /*
2044                    Now that Catalog is unlocked, update the volume info, making
2045                    sure to differentiate between files and directories
2046                 */
2047                 if (mode == S_IFDIR) {
2048                         hfs_volupdate(hfsmp, VOL_RMDIR, 0);
2049                 }
2050                 else{
2051                         hfs_volupdate(hfsmp, VOL_RMFILE, 0);
2052                 }
2053
2054                 hfs_end_transaction(hfsmp);
2055                 started_tr = false;
2056         } /* end for */
2057
2058 exit:
2059
2060         if (orphaned_files > 0 || orphaned_dirs > 0)
2061                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
2062         if (catlock) {
2063                 hfs_systemfile_unlock(hfsmp, lockflags);
2064         }
2065         if (catreserve) {
2066                 cat_postflight(hfsmp, &cookie, p);
2067         }
2068         if (started_tr) {
2069                 hfs_end_transaction(hfsmp);
2070         }
2071
2072         hfs_free(iterator, sizeof(*iterator));
2073         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
2074 }
2075
2076
2077 /*
2078  * This will return the correct logical block size for a given vnode.
2079  * For most files, it is the allocation block size, for meta data like
2080  * BTrees, this is kept as part of the BTree private nodeSize
2081  */
2082 u_int32_t
2083 GetLogicalBlockSize(struct vnode *vp)
2084 {
2085 u_int32_t logBlockSize;
2086
2087         hfs_assert(vp != NULL);
2088
2089         /* start with default */
2090         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
2091
2092         if (vnode_issystem(vp)) {
2093                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
2094                         BTreeInfoRec                    bTreeInfo;
2095
2096                         /*
2097                          * We do not lock the BTrees, because if we are getting block..then the tree
2098                          * should be locked in the first place.
2099                          * We just want the nodeSize wich will NEVER change..so even if the world
2100                          * is changing..the nodeSize should remain the same. Which argues why lock
2101                          * it in the first place??
2102                          */
2103
2104                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
2105
2106                         logBlockSize = bTreeInfo.nodeSize;
2107
2108                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
2109                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
2110                 }
2111         }
2112
2113         hfs_assert(logBlockSize > 0);
2114
2115         return logBlockSize;
2116 }
2117
2118 #if HFS_SPARSE_DEV
2119 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
2120 {
2121         struct vfsstatfs *vfsp;  /* 272 bytes */
2122         uint64_t vfreeblks;
2123         struct timeval now;
2124
2125         hfs_lock_mount(hfsmp);
2126
2127         vnode_t backing_vp = hfsmp->hfs_backingvp;
2128         if (!backing_vp) {
2129                 hfs_unlock_mount(hfsmp);
2130                 return false;
2131         }
2132
2133         // usecount is not enough; we need iocount
2134         if (vnode_get(backing_vp)) {
2135                 hfs_unlock_mount(hfsmp);
2136                 *pfree_blks = 0;
2137                 return true;
2138         }
2139
2140         uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2141         uint32_t bandblks       = hfsmp->hfs_sparsebandblks;
2142         uint64_t maxblks        = hfsmp->hfs_backingfs_maxblocks;
2143
2144         hfs_unlock_mount(hfsmp);
2145
2146         mount_t backingfs_mp = vnode_mount(backing_vp);
2147
2148         microtime(&now);
2149         if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
2150                 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
2151                 hfsmp->hfs_last_backingstatfs = now.tv_sec;
2152         }
2153
2154         if (!(vfsp = vfs_statfs(backingfs_mp))) {
2155                 vnode_put(backing_vp);
2156                 return false;
2157         }
2158
2159         vfreeblks = vfsp->f_bavail;
2160         /* Normalize block count if needed. */
2161         if (vfsp->f_bsize != hfsmp->blockSize)
2162                 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2163         if (vfreeblks > bandblks)
2164                 vfreeblks -= bandblks;
2165         else
2166                 vfreeblks = 0;
2167
2168         /*
2169          * Take into account any delayed allocations.  It is not
2170          * certain what the original reason for the "2 *" is.  Most
2171          * likely it is to allow for additional requirements in the
2172          * host file system and metadata required by disk images.  The
2173          * number of loaned blocks is likely to be small and we will
2174          * stop using them as we get close to the limit.
2175          */
2176         loanedblks = 2 * loanedblks;
2177         if (vfreeblks > loanedblks)
2178                 vfreeblks -= loanedblks;
2179         else
2180                 vfreeblks = 0;
2181
2182         if (maxblks)
2183                 vfreeblks = MIN(vfreeblks, maxblks);
2184
2185         vnode_put(backing_vp);
2186
2187         *pfree_blks = vfreeblks;
2188
2189         return true;
2190 }
2191 #endif
2192
2193 u_int32_t
2194 hfs_free_cnids(struct hfsmount * hfsmp)
2195 {
2196         return HFS_MAX_FILES - hfsmp->hfs_filecount - hfsmp->hfs_dircount;
2197 }
2198
2199 u_int32_t
2200 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2201 {
2202         u_int32_t freeblks;
2203         u_int32_t rsrvblks;
2204         u_int32_t loanblks;
2205
2206         /*
2207          * We don't bother taking the mount lock
2208          * to look at these values since the values
2209          * themselves are each updated atomically
2210          * on aligned addresses.
2211          */
2212         freeblks = hfsmp->freeBlocks;
2213         rsrvblks = hfsmp->reserveBlocks;
2214         loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2215         if (wantreserve) {
2216                 if (freeblks > rsrvblks)
2217                         freeblks -= rsrvblks;
2218                 else
2219                         freeblks = 0;
2220         }
2221         if (freeblks > loanblks)
2222                 freeblks -= loanblks;
2223         else
2224                 freeblks = 0;
2225
2226 #if HFS_SPARSE_DEV
2227         /*
2228          * When the underlying device is sparse, check the
2229          * available space on the backing store volume.
2230          */
2231         uint64_t vfreeblks;
2232         if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2233                 freeblks = MIN(freeblks, vfreeblks);
2234 #endif /* HFS_SPARSE_DEV */
2235
2236         return (freeblks);
2237 }
2238
2239 /*
2240  * Map HFS Common errors (negative) to BSD error codes (positive).
2241  * Positive errors (ie BSD errors) are passed through unchanged.
2242  */
2243 short MacToVFSError(OSErr err)
2244 {
2245         if (err >= 0)
2246                 return err;
2247
2248         /* BSD/VFS internal errnos */
2249         switch (err) {
2250                 case HFS_ERESERVEDNAME: /* -8 */
2251                         return err;
2252         }
2253
2254         switch (err) {
2255         case dskFulErr:                 /*    -34 */
2256         case btNoSpaceAvail:            /* -32733 */
2257                 return ENOSPC;
2258         case fxOvFlErr:                 /* -32750 */
2259                 return EOVERFLOW;
2260
2261         case btBadNode:                 /* -32731 */
2262                 return EIO;
2263
2264         case memFullErr:                /*  -108 */
2265                 return ENOMEM;          /*   +12 */
2266
2267         case cmExists:                  /* -32718 */
2268         case btExists:                  /* -32734 */
2269                 return EEXIST;          /*    +17 */
2270
2271         case cmNotFound:                /* -32719 */
2272         case btNotFound:                /* -32735 */
2273                 return ENOENT;          /*     28 */
2274
2275         case cmNotEmpty:                /* -32717 */
2276                 return ENOTEMPTY;       /*     66 */
2277
2278         case cmFThdDirErr:              /* -32714 */
2279                 return EISDIR;          /*     21 */
2280
2281         case fxRangeErr:                /* -32751 */
2282                 return ERANGE;
2283
2284         case bdNamErr:                  /*   -37 */
2285                 return ENAMETOOLONG;    /*    63 */
2286
2287         case paramErr:                  /*   -50 */
2288         case fileBoundsErr:             /* -1309 */
2289                 return EINVAL;          /*   +22 */
2290
2291         case fsBTBadNodeSize:
2292                 return ENXIO;
2293
2294         default:
2295                 return EIO;             /*   +5 */
2296         }
2297 }
2298
2299
2300 /*
2301  * Find the current thread's directory hint for a given index.
2302  *
2303  * Requires an exclusive lock on directory cnode.
2304  *
2305  * Use detach if the cnode lock must be dropped while the hint is still active.
2306  */
2307 directoryhint_t *
2308 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2309 {
2310         struct timeval tv;
2311         directoryhint_t *hint;
2312         boolean_t need_remove, need_init;
2313         const u_int8_t * name;
2314
2315         microuptime(&tv);
2316
2317         /*
2318          *  Look for an existing hint first.  If not found, create a new one (when
2319          *  the list is not full) or recycle the oldest hint.  Since new hints are
2320          *  always added to the head of the list, the last hint is always the
2321          *  oldest.
2322          */
2323         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2324                 if (hint->dh_index == index)
2325                         break;
2326         }
2327         if (hint != NULL) { /* found an existing hint */
2328                 need_init = false;
2329                 need_remove = true;
2330         } else { /* cannot find an existing hint */
2331                 need_init = true;
2332                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2333                         /* Create a default directory hint */
2334                         hint = hfs_zalloc(HFS_DIRHINT_ZONE);
2335                         ++dcp->c_dirhintcnt;
2336                         need_remove = false;
2337                 } else {                                /* recycle the last (i.e., the oldest) hint */
2338                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2339                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2340                             (name = hint->dh_desc.cd_nameptr)) {
2341                                 hint->dh_desc.cd_nameptr = NULL;
2342                                 hint->dh_desc.cd_namelen = 0;
2343                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2344                                 vfs_removename((const char *)name);
2345                         }
2346                         need_remove = true;
2347                 }
2348         }
2349
2350         if (need_remove)
2351                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2352
2353         if (detach)
2354                 --dcp->c_dirhintcnt;
2355         else
2356                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2357
2358         if (need_init) {
2359                 hint->dh_index = index;
2360                 hint->dh_desc.cd_flags = 0;
2361                 hint->dh_desc.cd_encoding = 0;
2362                 hint->dh_desc.cd_namelen = 0;
2363                 hint->dh_desc.cd_nameptr = NULL;
2364                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2365                 hint->dh_desc.cd_hint = dcp->c_childhint;
2366                 hint->dh_desc.cd_cnid = 0;
2367         }
2368         hint->dh_time = tv.tv_sec;
2369         return (hint);
2370 }
2371
2372 /*
2373  * Release a single directory hint.
2374  *
2375  * Requires an exclusive lock on directory cnode.
2376  */
2377 void
2378 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2379 {
2380         const u_int8_t * name;
2381         directoryhint_t *hint;
2382
2383         /* Check if item is on list (could be detached) */
2384         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2385                 if (hint == relhint) {
2386                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2387                         --dcp->c_dirhintcnt;
2388                         break;
2389                 }
2390         }
2391         name = relhint->dh_desc.cd_nameptr;
2392         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2393                 relhint->dh_desc.cd_nameptr = NULL;
2394                 relhint->dh_desc.cd_namelen = 0;
2395                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2396                 vfs_removename((const char *)name);
2397         }
2398         hfs_zfree(relhint, HFS_DIRHINT_ZONE);
2399 }
2400
2401 /*
2402  * Release directory hints for given directory
2403  *
2404  * Requires an exclusive lock on directory cnode.
2405  */
2406 void
2407 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2408 {
2409         struct timeval tv;
2410         directoryhint_t *hint, *prev;
2411         const u_int8_t * name;
2412
2413         if (stale_hints_only)
2414                 microuptime(&tv);
2415
2416         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2417         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2418                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2419                         break;  /* stop here if this entry is too new */
2420                 name = hint->dh_desc.cd_nameptr;
2421                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2422                         hint->dh_desc.cd_nameptr = NULL;
2423                         hint->dh_desc.cd_namelen = 0;
2424                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
2425                         vfs_removename((const char *)name);
2426                 }
2427                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2428                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2429                 hfs_zfree(hint, HFS_DIRHINT_ZONE);
2430                 --dcp->c_dirhintcnt;
2431         }
2432 }
2433
2434 /*
2435  * Insert a detached directory hint back into the list of dirhints.
2436  *
2437  * Requires an exclusive lock on directory cnode.
2438  */
2439 void
2440 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2441 {
2442         directoryhint_t *test;
2443
2444         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2445                 if (test == hint)
2446                         panic("hfs_insertdirhint: hint %p already on list!", hint);
2447         }
2448
2449         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2450         ++dcp->c_dirhintcnt;
2451 }
2452
2453 /*
2454  * Perform a case-insensitive compare of two UTF-8 filenames.
2455  *
2456  * Returns 0 if the strings match.
2457  */
2458 int
2459 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2460 {
2461         u_int16_t *ustr1, *ustr2;
2462         size_t ulen1, ulen2;
2463         size_t maxbytes;
2464         int cmp = -1;
2465
2466         if (len1 != len2)
2467                 return (cmp);
2468
2469         maxbytes = kHFSPlusMaxFileNameChars << 1;
2470         ustr1 = hfs_malloc(maxbytes << 1);
2471         ustr2 = ustr1 + (maxbytes >> 1);
2472
2473         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2474                 goto out;
2475         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2476                 goto out;
2477
2478         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2479 out:
2480         hfs_free(ustr1, maxbytes << 1);
2481         return (cmp);
2482 }
2483
2484 typedef struct jopen_cb_info {
2485         mount_t mp;
2486         off_t   jsize;
2487         char   *desired_uuid;
2488         struct  vnode *jvp;
2489         size_t  blksize;
2490         int     need_clean;
2491         int     need_init;
2492 } jopen_cb_info;
2493
2494 static int
2495 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2496 {
2497         jopen_cb_info *ji = (jopen_cb_info *)arg;
2498         char bsd_name[256];
2499         int error;
2500
2501         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2502         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2503
2504         if ((error = vnode_lookup(bsd_name, VNODE_LOOKUP_NOFOLLOW, &ji->jvp,
2505                                                           vfs_context_kernel()))) {
2506                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2507                 return 1;   // keep iterating
2508         }
2509
2510         struct vnop_open_args oargs = {
2511                 .a_vp           = ji->jvp,
2512                 .a_mode         = FREAD | FWRITE,
2513                 .a_context      = vfs_context_kernel(),
2514         };
2515
2516         if (spec_open(&oargs)) {
2517                 vnode_put(ji->jvp);
2518                 ji->jvp = NULL;
2519                 return 1;
2520         }
2521
2522         // if the journal is dirty and we didn't specify a desired
2523         // journal device uuid, then do not use the journal.  but
2524         // if the journal is just invalid (e.g. it hasn't been
2525         // initialized) then just set the need_init flag.
2526         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2527                 error = journal_is_clean(ji->jvp, 0, ji->jsize,
2528                                                                  (void *)1, ji->blksize);
2529                 if (error == EBUSY) {
2530                         struct vnop_close_args cargs = {
2531                                 .a_vp           = ji->jvp,
2532                                 .a_fflag        = FREAD | FWRITE,
2533                                 .a_context      = vfs_context_kernel()
2534                         };
2535                         spec_close(&cargs);
2536                         vnode_put(ji->jvp);
2537                         ji->jvp = NULL;
2538                         return 1;    // keep iterating
2539                 } else if (error == EINVAL) {
2540                         ji->need_init = 1;
2541                 }
2542         }
2543
2544         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2545                 strlcpy(ji->desired_uuid, uuid_str, 128);
2546         }
2547         vnode_setmountedon(ji->jvp);
2548         return 0;   // stop iterating
2549 }
2550
2551 static vnode_t
2552 open_journal_dev(mount_t mp,
2553                                  const char *vol_device,
2554                                  int need_clean,
2555                                  char *uuid_str,
2556                                  char *machine_serial_num,
2557                                  off_t jsize,
2558                                  size_t blksize,
2559                                  int *need_init)
2560 {
2561     int retry_counter=0;
2562     jopen_cb_info ji;
2563
2564         ji.mp                   = mp;
2565     ji.jsize        = jsize;
2566     ji.desired_uuid = uuid_str;
2567     ji.jvp          = NULL;
2568     ji.blksize      = blksize;
2569     ji.need_clean   = need_clean;
2570     ji.need_init    = 0;
2571
2572 //    if (uuid_str[0] == '\0') {
2573 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2574 //    } else {
2575 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2576 //    }
2577     while (ji.jvp == NULL && retry_counter++ < 4) {
2578             if (retry_counter > 1) {
2579                     if (uuid_str[0]) {
2580                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2581                     } else {
2582                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2583                     }
2584                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2585             }
2586
2587             hfs_iterate_media_with_content(EXTJNL_CONTENT_TYPE_UUID,
2588                                                                            journal_open_cb, &ji);
2589     }
2590
2591     if (ji.jvp == NULL) {
2592             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2593                    vol_device, uuid_str, machine_serial_num);
2594     }
2595
2596     *need_init = ji.need_init;
2597
2598     return ji.jvp;
2599 }
2600
2601 void hfs_close_jvp(hfsmount_t *hfsmp)
2602 {
2603         if (!hfsmp || !hfsmp->jvp || hfsmp->jvp == hfsmp->hfs_devvp)
2604                 return;
2605
2606         vnode_clearmountedon(hfsmp->jvp);
2607         struct vnop_close_args cargs = {
2608                 .a_vp           = hfsmp->jvp,
2609                 .a_fflag        = FREAD | FWRITE,
2610                 .a_context      = vfs_context_kernel()
2611         };
2612         spec_close(&cargs);
2613         vnode_put(hfsmp->jvp);
2614         hfsmp->jvp = NULL;
2615 }
2616
2617 int
2618 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2619                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2620                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2621 {
2622         JournalInfoBlock *jibp;
2623         struct buf       *jinfo_bp, *bp;
2624         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2625         int               retval, write_jibp = 0;
2626         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2627         struct vnode     *devvp;
2628         struct hfs_mount_args *args = _args;
2629         u_int32_t         jib_flags;
2630         u_int64_t         jib_offset;
2631         u_int64_t         jib_size;
2632         const char *dev_name;
2633
2634         devvp = hfsmp->hfs_devvp;
2635         dev_name = vnode_getname_printable(devvp);
2636
2637         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2638                 arg_flags  = args->journal_flags;
2639                 arg_tbufsz = args->journal_tbuffer_size;
2640         }
2641
2642         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2643
2644         jinfo_bp = NULL;
2645         retval = (int)buf_meta_bread(devvp,
2646                                                 (daddr64_t)((embeddedOffset/blksize) +
2647                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2648                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2649         if (retval) {
2650                 if (jinfo_bp) {
2651                         buf_brelse(jinfo_bp);
2652                 }
2653                 goto cleanup_dev_name;
2654         }
2655
2656         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2657         jib_flags  = SWAP_BE32(jibp->flags);
2658         jib_size   = SWAP_BE64(jibp->size);
2659
2660         if (jib_flags & kJIJournalInFSMask) {
2661                 hfsmp->jvp = hfsmp->hfs_devvp;
2662                 jib_offset = SWAP_BE64(jibp->offset);
2663         } else {
2664             int need_init=0;
2665
2666             // if the volume was unmounted cleanly then we'll pick any
2667             // available external journal partition
2668             //
2669             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2670                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2671             }
2672
2673             hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
2674                                                                           dev_name,
2675                                                                           !(jib_flags & kJIJournalNeedInitMask),
2676                                                                           (char *)&jibp->ext_jnl_uuid[0],
2677                                                                           (char *)&jibp->machine_serial_num[0],
2678                                                                           jib_size,
2679                                                                           hfsmp->hfs_logical_block_size,
2680                                                                           &need_init);
2681             if (hfsmp->jvp == NULL) {
2682                     buf_brelse(jinfo_bp);
2683                     retval = EROFS;
2684                     goto cleanup_dev_name;
2685             } else {
2686                     if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2687                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2688                     }
2689             }
2690
2691             jib_offset = 0;
2692             write_jibp = 1;
2693             if (need_init) {
2694                     jib_flags |= kJIJournalNeedInitMask;
2695             }
2696         }
2697
2698         // save this off for the hack-y check in hfs_remove()
2699         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2700         hfsmp->jnl_size  = jib_size;
2701
2702         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2703             // if the file system is read-only, check if the journal is empty.
2704             // if it is, then we can allow the mount.  otherwise we have to
2705             // return failure.
2706             retval = journal_is_clean(hfsmp->jvp,
2707                                       jib_offset + embeddedOffset,
2708                                       jib_size,
2709                                       devvp,
2710                                       hfsmp->hfs_logical_block_size);
2711
2712             hfsmp->jnl = NULL;
2713
2714             buf_brelse(jinfo_bp);
2715
2716             if (retval) {
2717                     const char *name = vnode_getname_printable(devvp);
2718                     printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2719                     name);
2720                     vnode_putname_printable(name);
2721             }
2722
2723             goto cleanup_dev_name;
2724         }
2725
2726         if (jib_flags & kJIJournalNeedInitMask) {
2727                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2728                            jib_offset + embeddedOffset, jib_size);
2729                 hfsmp->jnl = journal_create(hfsmp->jvp,
2730                                                                         jib_offset + embeddedOffset,
2731                                                                         jib_size,
2732                                                                         devvp,
2733                                                                         blksize,
2734                                                                         arg_flags,
2735                                                                         arg_tbufsz,
2736                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2737                                                                         hfsmp->hfs_mp);
2738                 if (hfsmp->jnl)
2739                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2740
2741                 // no need to start a transaction here... if this were to fail
2742                 // we'd just re-init it on the next mount.
2743                 jib_flags &= ~kJIJournalNeedInitMask;
2744                 jibp->flags  = SWAP_BE32(jib_flags);
2745                 buf_bwrite(jinfo_bp);
2746                 jinfo_bp = NULL;
2747                 jibp     = NULL;
2748         } else {
2749                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2750                 //         jib_offset + embeddedOffset,
2751                 //         jib_size, SWAP_BE32(vhp->blockSize));
2752
2753                 hfsmp->jnl = journal_open(hfsmp->jvp,
2754                                                                   jib_offset + embeddedOffset,
2755                                                                   jib_size,
2756                                                                   devvp,
2757                                                                   blksize,
2758                                                                   arg_flags,
2759                                                                   arg_tbufsz,
2760                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2761                                                                   hfsmp->hfs_mp);
2762                 if (hfsmp->jnl)
2763                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2764
2765                 if (write_jibp) {
2766                         buf_bwrite(jinfo_bp);
2767                 } else {
2768                         buf_brelse(jinfo_bp);
2769                 }
2770                 jinfo_bp = NULL;
2771                 jibp     = NULL;
2772
2773                 if (hfsmp->jnl && mdbp) {
2774                         // reload the mdb because it could have changed
2775                         // if the journal had to be replayed.
2776                         if (mdb_offset == 0) {
2777                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2778                         }
2779                         bp = NULL;
2780                         retval = (int)buf_meta_bread(devvp,
2781                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2782                                         hfsmp->hfs_physical_block_size, cred, &bp);
2783                         if (retval) {
2784                                 if (bp) {
2785                                         buf_brelse(bp);
2786                                 }
2787                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2788                                            retval);
2789                                 goto cleanup_dev_name;
2790                         }
2791                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2792                         buf_brelse(bp);
2793                         bp = NULL;
2794                 }
2795         }
2796
2797         // if we expected the journal to be there and we couldn't
2798         // create it or open it then we have to bail out.
2799         if (hfsmp->jnl == NULL) {
2800                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2801                 retval = EINVAL;
2802                 goto cleanup_dev_name;
2803         }
2804
2805         retval = 0;
2806
2807 cleanup_dev_name:
2808         vnode_putname_printable(dev_name);
2809         return retval;
2810 }
2811
2812
2813 //
2814 // This function will go and re-locate the .journal_info_block and
2815 // the .journal files in case they moved (which can happen if you
2816 // run Norton SpeedDisk).  If we fail to find either file we just
2817 // disable journaling for this volume and return.  We turn off the
2818 // journaling bit in the vcb and assume it will get written to disk
2819 // later (if it doesn't on the next mount we'd do the same thing
2820 // again which is harmless).  If we disable journaling we don't
2821 // return an error so that the volume is still mountable.
2822 //
2823 // If the info we find for the .journal_info_block and .journal files
2824 // isn't what we had stored, we re-set our cached info and proceed
2825 // with opening the journal normally.
2826 //
2827 static int
2828 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2829 {
2830         JournalInfoBlock *jibp;
2831         struct buf       *jinfo_bp;
2832         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2833         int               retval, write_jibp = 0, recreate_journal = 0;
2834         struct vnode     *devvp;
2835         struct cat_attr   jib_attr, jattr;
2836         struct cat_fork   jib_fork, jfork;
2837         ExtendedVCB      *vcb;
2838         u_int32_t            fid;
2839         struct hfs_mount_args *args = _args;
2840         u_int32_t         jib_flags;
2841         u_int64_t         jib_offset;
2842         u_int64_t         jib_size;
2843
2844         devvp = hfsmp->hfs_devvp;
2845         vcb = HFSTOVCB(hfsmp);
2846
2847         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2848                 if (args->journal_disable) {
2849                         return 0;
2850                 }
2851
2852                 arg_flags  = args->journal_flags;
2853                 arg_tbufsz = args->journal_tbuffer_size;
2854         }
2855
2856         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2857         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2858                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2859                            fid ? jib_fork.cf_extents[0].startBlock : 0);
2860                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2861                 return 0;
2862         }
2863         hfsmp->hfs_jnlinfoblkid = fid;
2864
2865         // make sure the journal_info_block begins where we think it should.
2866         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2867                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2868                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2869
2870                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2871                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2872                 recreate_journal = 1;
2873         }
2874
2875
2876         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2877         jinfo_bp = NULL;
2878         retval = (int)buf_meta_bread(devvp,
2879                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2880                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2881                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2882         if (retval) {
2883                 if (jinfo_bp) {
2884                         buf_brelse(jinfo_bp);
2885                 }
2886                 printf("hfs: can't read journal info block. disabling journaling.\n");
2887                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2888                 return 0;
2889         }
2890
2891         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2892         jib_flags  = SWAP_BE32(jibp->flags);
2893         jib_offset = SWAP_BE64(jibp->offset);
2894         jib_size   = SWAP_BE64(jibp->size);
2895
2896         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2897         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2898                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2899                            fid ? jfork.cf_extents[0].startBlock : 0);
2900                 buf_brelse(jinfo_bp);
2901                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2902                 return 0;
2903         }
2904         hfsmp->hfs_jnlfileid = fid;
2905
2906         // make sure the journal file begins where we think it should.
2907         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2908                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2909                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2910
2911                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2912                 write_jibp   = 1;
2913                 recreate_journal = 1;
2914         }
2915
2916         // check the size of the journal file.
2917         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2918                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2919                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2920
2921                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2922                 write_jibp = 1;
2923                 recreate_journal = 1;
2924         }
2925
2926         if (jib_flags & kJIJournalInFSMask) {
2927                 hfsmp->jvp = hfsmp->hfs_devvp;
2928                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2929         } else {
2930             const char *dev_name;
2931             int need_init = 0;
2932
2933             dev_name = vnode_getname_printable(devvp);
2934
2935             // since the journal is empty, just use any available external journal
2936             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2937
2938             // this fills in the uuid of the device we actually get
2939             hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
2940                                                                           dev_name,
2941                                                                           !(jib_flags & kJIJournalNeedInitMask),
2942                                                                           (char *)&jibp->ext_jnl_uuid[0],
2943                                                                           (char *)&jibp->machine_serial_num[0],
2944                                                                           jib_size,
2945                                                                           hfsmp->hfs_logical_block_size,
2946                                                                           &need_init);
2947             if (hfsmp->jvp == NULL) {
2948                     buf_brelse(jinfo_bp);
2949                     vnode_putname_printable(dev_name);
2950                     return EROFS;
2951             } else {
2952                     if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2953                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2954                     }
2955             }
2956             jib_offset = 0;
2957             recreate_journal = 1;
2958             write_jibp = 1;
2959             if (need_init) {
2960                     jib_flags |= kJIJournalNeedInitMask;
2961             }
2962             vnode_putname_printable(dev_name);
2963         }
2964
2965         // save this off for the hack-y check in hfs_remove()
2966         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2967         hfsmp->jnl_size  = jib_size;
2968
2969         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2970             // if the file system is read-only, check if the journal is empty.
2971             // if it is, then we can allow the mount.  otherwise we have to
2972             // return failure.
2973             retval = journal_is_clean(hfsmp->jvp,
2974                                       jib_offset,
2975                                       jib_size,
2976                                       devvp,
2977                                       hfsmp->hfs_logical_block_size);
2978
2979             hfsmp->jnl = NULL;
2980
2981             buf_brelse(jinfo_bp);
2982
2983             if (retval) {
2984                     const char *name = vnode_getname_printable(devvp);
2985                     printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2986                     name);
2987                     vnode_putname_printable(name);
2988             }
2989
2990             return retval;
2991         }
2992
2993         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2994                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2995                            jib_offset, jib_size);
2996                 hfsmp->jnl = journal_create(hfsmp->jvp,
2997                                                                         jib_offset,
2998                                                                         jib_size,
2999                                                                         devvp,
3000                                                                         hfsmp->hfs_logical_block_size,
3001                                                                         arg_flags,
3002                                                                         arg_tbufsz,
3003                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
3004                                                                         hfsmp->hfs_mp);
3005                 if (hfsmp->jnl)
3006                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3007
3008                 // no need to start a transaction here... if this were to fail
3009                 // we'd just re-init it on the next mount.
3010                 jib_flags &= ~kJIJournalNeedInitMask;
3011                 write_jibp   = 1;
3012
3013         } else {
3014                 //
3015                 // if we weren't the last person to mount this volume
3016                 // then we need to throw away the journal because it
3017                 // is likely that someone else mucked with the disk.
3018                 // if the journal is empty this is no big deal.  if the
3019                 // disk is dirty this prevents us from replaying the
3020                 // journal over top of changes that someone else made.
3021                 //
3022                 arg_flags |= JOURNAL_RESET;
3023
3024                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
3025                 //         jib_offset,
3026                 //         jib_size, SWAP_BE32(vhp->blockSize));
3027
3028                 hfsmp->jnl = journal_open(hfsmp->jvp,
3029                                                                   jib_offset,
3030                                                                   jib_size,
3031                                                                   devvp,
3032                                                                   hfsmp->hfs_logical_block_size,
3033                                                                   arg_flags,
3034                                                                   arg_tbufsz,
3035                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
3036                                                                   hfsmp->hfs_mp);
3037                 if (hfsmp->jnl)
3038                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3039         }
3040
3041
3042         if (write_jibp) {
3043                 jibp->flags  = SWAP_BE32(jib_flags);
3044                 jibp->offset = SWAP_BE64(jib_offset);
3045                 jibp->size   = SWAP_BE64(jib_size);
3046
3047                 buf_bwrite(jinfo_bp);
3048         } else {
3049                 buf_brelse(jinfo_bp);
3050         }
3051         jinfo_bp = NULL;
3052         jibp     = NULL;
3053
3054         // if we expected the journal to be there and we couldn't
3055         // create it or open it then we have to bail out.
3056         if (hfsmp->jnl == NULL) {
3057                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
3058                 return EINVAL;
3059         }
3060
3061         return 0;
3062 }
3063
3064 /*
3065  * Calculate the allocation zone for metadata.
3066  *
3067  * This zone includes the following:
3068  *      Allocation Bitmap file
3069  *      Overflow Extents file
3070  *      Journal file
3071  *      Quota files
3072  *      Clustered Hot files
3073  *      Catalog file
3074  *
3075  *                          METADATA ALLOCATION ZONE
3076  * ____________________________________________________________________________
3077  * |    |    |     |               |                              |           |
3078  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
3079  * |____|____|_____|_______________|______________________________|___________|
3080  *
3081  * <------------------------------- N * 128 MB ------------------------------->
3082  *
3083  */
3084 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
3085
3086 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
3087 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
3088
3089 /* Initialize the metadata zone.
3090  *
3091  * If the size of  the volume is less than the minimum size for
3092  * metadata zone, metadata zone is disabled.
3093  *
3094  * If disable is true, disable metadata zone unconditionally.
3095  */
3096 void
3097 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
3098 {
3099         ExtendedVCB  *vcb;
3100         u_int64_t  fs_size;
3101         u_int64_t  zonesize;
3102         u_int64_t  temp;
3103         u_int64_t  filesize;
3104         u_int32_t  blk;
3105         int  items, really_do_it=1;
3106
3107         vcb = HFSTOVCB(hfsmp);
3108         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
3109
3110         /*
3111          * For volumes less than 10 GB, don't bother.
3112          */
3113         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
3114                 really_do_it = 0;
3115         }
3116
3117         /*
3118          * Skip non-journaled volumes as well.
3119          */
3120         if (hfsmp->jnl == NULL) {
3121                 really_do_it = 0;
3122         }
3123
3124         /* If caller wants to disable metadata zone, do it */
3125         if (disable == true) {
3126                 really_do_it = 0;
3127         }
3128
3129         /*
3130          * Start with space for the boot blocks and Volume Header.
3131          * 1536 = byte offset from start of volume to end of volume header:
3132          * 1024 bytes is the offset from the start of the volume to the
3133          * start of the volume header (defined by the volume format)
3134          * + 512 bytes (the size of the volume header).
3135          */
3136         zonesize = roundup(1536, hfsmp->blockSize);
3137
3138         /*
3139          * Add the on-disk size of allocation bitmap.
3140          */
3141         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3142
3143         /*
3144          * Add space for the Journal Info Block and Journal (if they're in
3145          * this file system).
3146          */
3147         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3148                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3149         }
3150
3151         /*
3152          * Add the existing size of the Extents Overflow B-tree.
3153          * (It rarely grows, so don't bother reserving additional room for it.)
3154          */
3155         zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
3156
3157         /*
3158          * If there is an Attributes B-tree, leave room for 11 clumps worth.
3159          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3160          * When installing a full OS install onto a 20GB volume, we use
3161          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3162          * us with another 3 or 4 clumps worth before we need another extent.
3163          */
3164         if (hfsmp->hfs_attribute_cp) {
3165                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3166         }
3167
3168         /*
3169          * Leave room for 11 clumps of the Catalog B-tree.
3170          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3171          * When installing a full OS install onto a 20GB volume, we use
3172          * 7 to 8 clumps worth of space (depending on packages), so that leaves
3173          * us with another 3 or 4 clumps worth before we need another extent.
3174          */
3175         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3176
3177         /*
3178          * Add space for hot file region.
3179          *
3180          * ...for now, use 5 MB per 1 GB (0.5 %)
3181          */
3182         filesize = (fs_size / 1024) * 5;
3183         if (filesize > HOTBAND_MAXIMUM_SIZE)
3184                 filesize = HOTBAND_MAXIMUM_SIZE;
3185         else if (filesize < HOTBAND_MINIMUM_SIZE)
3186                 filesize = HOTBAND_MINIMUM_SIZE;
3187         /*
3188          * Calculate user quota file requirements.
3189          */
3190         if (hfsmp->hfs_flags & HFS_QUOTAS) {
3191                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3192                 if (items < QF_MIN_USERS)
3193                         items = QF_MIN_USERS;
3194                 else if (items > QF_MAX_USERS)
3195                         items = QF_MAX_USERS;
3196                 if (!powerof2(items)) {
3197                         int x = items;
3198                         items = 4;
3199                         while (x>>1 != 1) {
3200                                 x = x >> 1;
3201                                 items = items << 1;
3202                         }
3203                 }
3204                 filesize += (items + 1) * sizeof(struct dqblk);
3205                 /*
3206                  * Calculate group quota file requirements.
3207                  *
3208                  */
3209                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3210                 if (items < QF_MIN_GROUPS)
3211                         items = QF_MIN_GROUPS;
3212                 else if (items > QF_MAX_GROUPS)
3213                         items = QF_MAX_GROUPS;
3214                 if (!powerof2(items)) {
3215                         int x = items;
3216                         items = 4;
3217                         while (x>>1 != 1) {
3218                                 x = x >> 1;
3219                                 items = items << 1;
3220                         }
3221                 }
3222                 filesize += (items + 1) * sizeof(struct dqblk);
3223         }
3224         zonesize += filesize;
3225
3226         /*
3227          * Round up entire zone to a bitmap block's worth.
3228          * The extra space goes to the catalog file and hot file area.
3229          */
3230         temp = zonesize;
3231         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3232         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3233         /*
3234          * If doing the round up for hfs_min_alloc_start would push us past
3235          * allocLimit, then just reset it back to 0.  Though using a value
3236          * bigger than allocLimit would not cause damage in the block allocator
3237          * code, this value could get stored in the volume header and make it out
3238          * to disk, making the volume header technically corrupt.
3239          */
3240         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3241                 hfsmp->hfs_min_alloc_start = 0;
3242         }
3243
3244         if (really_do_it == 0) {
3245                 /* If metadata zone needs to be disabled because the
3246                  * volume was truncated, clear the bit and zero out
3247                  * the values that are no longer needed.
3248                  */
3249                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3250                         /* Disable metadata zone */
3251                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3252
3253                         /* Zero out mount point values that are not required */
3254                         hfsmp->hfs_catalog_maxblks = 0;
3255                         hfsmp->hfs_hotfile_maxblks = 0;
3256                         hfsmp->hfs_hotfile_start = 0;
3257                         hfsmp->hfs_hotfile_end = 0;
3258                         hfsmp->hfs_hotfile_freeblks = 0;
3259                         hfsmp->hfs_metazone_start = 0;
3260                         hfsmp->hfs_metazone_end = 0;
3261                 }
3262
3263                 return;
3264         }
3265
3266         temp = zonesize - temp;  /* temp has extra space */
3267         filesize += temp / 3;
3268         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3269
3270         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3271                 hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
3272         } else {
3273                 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3274         }
3275
3276         /* Convert to allocation blocks. */
3277         blk = zonesize / vcb->blockSize;
3278
3279         /* The default metadata zone location is at the start of volume. */
3280         hfsmp->hfs_metazone_start = 1;
3281         hfsmp->hfs_metazone_end = blk - 1;
3282
3283         /* The default hotfile area is at the end of the zone. */
3284         if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3285                 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3286                 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3287                 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3288         }
3289         else {
3290                 hfsmp->hfs_hotfile_start = 0;
3291                 hfsmp->hfs_hotfile_end = 0;
3292                 hfsmp->hfs_hotfile_freeblks = 0;
3293         }
3294 #if DEBUG
3295         printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3296         printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3297         printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
3298 #endif
3299
3300         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3301 }
3302
3303
3304 static u_int32_t
3305 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3306 {
3307         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
3308         int  lockflags;
3309         int  freeblocks;
3310
3311         if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3312                 //
3313                 // This is only used at initialization time and on an ssd
3314                 // we'll get the real info from the hotfile btree user
3315                 // info
3316                 //
3317                 return 0;
3318         }
3319
3320         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3321         freeblocks = MetaZoneFreeBlocks(vcb);
3322         hfs_systemfile_unlock(hfsmp, lockflags);
3323
3324         /* Minus Extents overflow file reserve. */
3325         if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
3326                 freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3327         }
3328
3329         /* Minus catalog file reserve. */
3330         if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
3331                 freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3332         }
3333
3334         if (freeblocks < 0)
3335                 freeblocks = 0;
3336
3337         // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
3338         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3339 }
3340
3341 /*
3342  * Determine if a file is a "virtual" metadata file.
3343  * This includes journal and quota files.
3344  */
3345 int
3346 hfs_virtualmetafile(struct cnode *cp)
3347 {
3348         const char * filename;
3349
3350
3351         if (cp->c_parentcnid != kHFSRootFolderID)
3352                 return (0);
3353
3354         filename = (const char *)cp->c_desc.cd_nameptr;
3355         if (filename == NULL)
3356                 return (0);
3357
3358         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3359             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3360             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3361             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3362             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3363                 return (1);
3364
3365         return (0);
3366 }
3367
3368 void hfs_syncer_lock(struct hfsmount *hfsmp)
3369 {
3370     hfs_lock_mount(hfsmp);
3371 }
3372
3373 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3374 {
3375     hfs_unlock_mount(hfsmp);
3376 }
3377
3378 void hfs_syncer_wait(struct hfsmount *hfsmp, struct timespec *ts)
3379 {
3380     msleep(&hfsmp->hfs_syncer_thread, &hfsmp->hfs_mutex, PWAIT,
3381            "hfs_syncer_wait", ts);
3382 }
3383
3384 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3385 {
3386     wakeup(&hfsmp->hfs_syncer_thread);
3387 }
3388
3389 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3390 {
3391     uint64_t deadline;
3392     clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3393     return deadline;
3394 }
3395
3396 //
3397 // Fire off a timed callback to sync the disk if the
3398 // volume is on ejectable media.
3399 //
3400 void hfs_sync_ejectable(struct hfsmount *hfsmp)
3401 {
3402     // If we don't have a syncer or we get called by the syncer, just return
3403     if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3404                 || current_thread() == hfsmp->hfs_syncer_thread) {
3405         return;
3406         }
3407
3408     hfs_syncer_lock(hfsmp);
3409
3410     if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3411         microuptime(&hfsmp->hfs_sync_req_oldest);
3412
3413     /* If hfs_unmount is running, it will clear the HFS_RUN_SYNCER
3414            flag. Also, we don't want to queue again if there is a sync
3415            outstanding. */
3416     if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3417                 || hfsmp->hfs_syncer_thread) {
3418         hfs_syncer_unlock(hfsmp);
3419         return;
3420     }
3421
3422     hfsmp->hfs_syncer_thread = (void *)1;
3423
3424     hfs_syncer_unlock(hfsmp);
3425
3426         kernel_thread_start(hfs_syncer, hfsmp, &hfsmp->hfs_syncer_thread);
3427         thread_deallocate(hfsmp->hfs_syncer_thread);
3428 }
3429
3430 int
3431 hfs_start_transaction(struct hfsmount *hfsmp)
3432 {
3433         int ret = 0, unlock_on_err = 0;
3434         thread_t thread = current_thread();
3435
3436 #ifdef HFS_CHECK_LOCK_ORDER
3437         /*
3438          * You cannot start a transaction while holding a system
3439          * file lock. (unless the transaction is nested.)
3440          */
3441         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3442                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3443                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3444                 }
3445                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3446                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3447                 }
3448                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3449                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3450                 }
3451         }
3452 #endif /* HFS_CHECK_LOCK_ORDER */
3453
3454 again:
3455
3456         if (hfsmp->jnl) {
3457                 if (journal_owner(hfsmp->jnl) != thread) {
3458                         /*
3459                          * The global lock should be held shared if journal is
3460                          * active to prevent disabling.  If we're not the owner
3461                          * of the journal lock, verify that we're not already
3462                          * holding the global lock exclusive before moving on.
3463                          */
3464                         if (hfsmp->hfs_global_lockowner == thread) {
3465                                 ret = EBUSY;
3466                                 goto out;
3467                         }
3468
3469                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3470
3471                         // Things could have changed
3472                         if (!hfsmp->jnl) {
3473                                 hfs_unlock_global(hfsmp);
3474                                 goto again;
3475                         }
3476
3477                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3478                         unlock_on_err = 1;
3479                 }
3480         } else {
3481                 // No journal
3482                 if (hfsmp->hfs_global_lockowner != thread) {
3483                         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3484
3485                         // Things could have changed
3486                         if (hfsmp->jnl) {
3487                                 hfs_unlock_global(hfsmp);
3488                                 goto again;
3489                         }
3490
3491                         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3492                         unlock_on_err = 1;
3493                 }
3494         }
3495
3496         /* If a downgrade to read-only mount is in progress, no other
3497          * thread than the downgrade thread is allowed to modify
3498          * the file system.
3499          */
3500         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3501             hfsmp->hfs_downgrading_thread != thread) {
3502                 ret = EROFS;
3503                 goto out;
3504         }
3505
3506         if (hfsmp->jnl) {
3507                 ret = journal_start_transaction(hfsmp->jnl);
3508         } else {
3509                 ret = 0;
3510         }
3511
3512         if (ret == 0)
3513                 ++hfsmp->hfs_transaction_nesting;
3514
3515 out:
3516         if (ret != 0 && unlock_on_err) {
3517                 hfs_unlock_global (hfsmp);
3518                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3519         }
3520
3521     return ret;
3522 }
3523
3524 int
3525 hfs_end_transaction(struct hfsmount *hfsmp)
3526 {
3527     int ret;
3528
3529         hfs_assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
3530         hfs_assert(hfsmp->hfs_transaction_nesting > 0);
3531
3532         if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
3533                 hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
3534
3535         bool need_unlock = !--hfsmp->hfs_transaction_nesting;
3536
3537         if (hfsmp->jnl) {
3538                 ret = journal_end_transaction(hfsmp->jnl);
3539         } else {
3540                 ret = 0;
3541         }
3542
3543         if (need_unlock) {
3544                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3545                 hfs_unlock_global (hfsmp);
3546                 hfs_sync_ejectable(hfsmp);
3547         }
3548
3549     return ret;
3550 }
3551
3552
3553 void
3554 hfs_journal_lock(struct hfsmount *hfsmp)
3555 {
3556         /* Only peek at hfsmp->jnl while holding the global lock */
3557         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3558         if (hfsmp->jnl) {
3559                 journal_lock(hfsmp->jnl);
3560         }
3561         hfs_unlock_global (hfsmp);
3562 }
3563
3564 void
3565 hfs_journal_unlock(struct hfsmount *hfsmp)
3566 {
3567         /* Only peek at hfsmp->jnl while holding the global lock */
3568         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3569         if (hfsmp->jnl) {
3570                 journal_unlock(hfsmp->jnl);
3571         }
3572         hfs_unlock_global (hfsmp);
3573 }
3574
3575 /*
3576  * Flush the contents of the journal to the disk.
3577  *
3578  *  - HFS_FLUSH_JOURNAL
3579  *      Wait to write in-memory journal to the disk consistently.
3580  *      This means that the journal still contains uncommitted
3581  *      transactions and the file system metadata blocks in
3582  *      the journal transactions might be written asynchronously
3583  *      to the disk.  But there is no guarantee that they are
3584  *      written to the disk before returning to the caller.
3585  *      Note that this option is sufficient for file system
3586  *      data integrity as it guarantees consistent journal
3587  *      content on the disk.
3588  *
3589  *  - HFS_FLUSH_JOURNAL_META
3590  *      Wait to write in-memory journal to the disk
3591  *      consistently, and also wait to write all asynchronous
3592  *      metadata blocks to its corresponding locations
3593  *      consistently on the disk. This is overkill in normal
3594  *      scenarios but is useful whenever the metadata blocks
3595  *      are required to be consistent on-disk instead of
3596  *      just the journalbeing consistent; like before live
3597  *      verification and live volume resizing.  The update of the
3598  *      metadata doesn't include a barrier of track cache flush.
3599  *
3600  *  - HFS_FLUSH_FULL
3601  *      HFS_FLUSH_JOURNAL + force a track cache flush to media
3602  *
3603  *  - HFS_FLUSH_CACHE
3604  *      Force a track cache flush to media.
3605  *
3606  *  - HFS_FLUSH_BARRIER
3607  *      Barrier-only flush to ensure write order
3608  *
3609  */
3610 errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
3611 {
3612         errno_t error = 0;
3613         int options = 0;
3614         dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
3615
3616         switch (mode) {
3617                 case HFS_FLUSH_JOURNAL_META:
3618                         // wait for journal, metadata blocks and previous async flush to finish
3619                         SET(options, JOURNAL_WAIT_FOR_IO);
3620
3621                         // no break
3622
3623                 case HFS_FLUSH_JOURNAL:
3624                 case HFS_FLUSH_JOURNAL_BARRIER:
3625                 case HFS_FLUSH_FULL:
3626
3627                         if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
3628                             !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3629                                 mode = HFS_FLUSH_FULL;
3630
3631                         if (mode == HFS_FLUSH_FULL)
3632                                 SET(options, JOURNAL_FLUSH_FULL);
3633
3634                         /* Only peek at hfsmp->jnl while holding the global lock */
3635                         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3636
3637                         if (hfsmp->jnl)
3638                                 error = journal_flush(hfsmp->jnl, options);
3639
3640                         hfs_unlock_global (hfsmp);
3641
3642                         /*
3643                          * This may result in a double barrier as
3644                          * journal_flush may have issued a barrier itself
3645                          */
3646                         if (mode == HFS_FLUSH_JOURNAL_BARRIER)
3647                                 error = VNOP_IOCTL(hfsmp->hfs_devvp,
3648                                     DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3649                                     FWRITE, NULL);
3650
3651                         break;
3652
3653                 case HFS_FLUSH_CACHE:
3654                         // Do a full sync
3655                         sync_req.options = 0;
3656
3657                         // no break
3658
3659                 case HFS_FLUSH_BARRIER:
3660                         // If barrier only flush doesn't support, fall back to use full flush.
3661                         if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3662                                 sync_req.options = 0;
3663
3664                         error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3665                                            FWRITE, NULL);
3666                         break;
3667
3668                 default:
3669                         error = EINVAL;
3670         }
3671
3672         return error;
3673 }
3674
3675 /*
3676  * hfs_erase_unused_nodes
3677  *
3678  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3679  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3680  * zeroes to the unused nodes.
3681  *
3682  * How do we detect when a volume needs this repair?  We can't always be
3683  * certain.  If a volume was created after a certain date, then it may have
3684  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3685  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3686  * that means that the entire first clump must have been written to, which means
3687  * there shouldn't be unused and unwritten nodes in that first clump, and this
3688  * repair is not needed.
3689  *
3690  * We have defined a bit in the Volume Header's attributes to indicate when the
3691  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3692  * As will fsck_hfs when it repairs the unused nodes.
3693  */
3694 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3695 {
3696         int result;
3697         struct filefork *catalog;
3698         int lockflags;
3699
3700         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3701         {
3702                 /* This volume has already been checked and repaired. */
3703                 return 0;
3704         }
3705
3706         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3707         {
3708                 /* This volume is too old to have had the problem. */
3709                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3710                 return 0;
3711         }
3712
3713         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3714         if (catalog->ff_size > catalog->ff_clumpsize)
3715         {
3716                 /* The entire first clump must have been in use at some point. */
3717                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3718                 return 0;
3719         }
3720
3721         /*
3722          * If we get here, we need to zero out those unused nodes.
3723          *
3724          * We start a transaction and lock the catalog since we're going to be
3725          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3726          * do its writing via the journal, because that would be too much I/O
3727          * to fit in a transaction, and it's a pain to break it up into multiple
3728          * transactions.  (It behaves more like growing a B-tree would.)
3729          */
3730         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3731         result = hfs_start_transaction(hfsmp);
3732         if (result)
3733                 goto done;
3734         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3735         result = BTZeroUnusedNodes(catalog);
3736         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3737         hfs_systemfile_unlock(hfsmp, lockflags);
3738         hfs_end_transaction(hfsmp);
3739         if (result == 0)
3740                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3741         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3742
3743 done:
3744         return result;
3745 }
3746
3747
3748 int
3749 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3750 {
3751         int error;
3752
3753         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || decmpfs_cnode_cmp_type(VTOCMP(vp)) != DATALESS_CMPFS_TYPE) {
3754                 // there's nothing to do, it's not dataless
3755                 return 0;
3756         }
3757
3758         /* Swap files are special; ignore them */
3759         if (vnode_isswap(vp)) {
3760                 return 0;
3761         }
3762
3763         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3764         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3765         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3766                 error = 0;
3767         } else if (error) {
3768                 if (error == EAGAIN) {
3769                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3770                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3771                         return 0;
3772                 } else if (error == EINTR) {
3773                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3774                         return EINTR;
3775                 }
3776         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3777                 //
3778                 // if we're here, the dataless bit is still set on the file
3779                 // which means it didn't get handled.  we return an error
3780                 // but it's presently ignored by all callers of this function.
3781                 //
3782                 // XXXdbg - EDATANOTPRESENT is what we really need...
3783                 //
3784                 return EBADF;
3785         }
3786
3787         return error;
3788 }
3789
3790
3791 //
3792 // NOTE: this function takes care of starting a transaction and
3793 //       acquiring the systemfile lock so that it can call
3794 //       cat_update().
3795 //
3796 // NOTE: do NOT hold and cnode locks while calling this function
3797 //       to avoid deadlocks (because we take a lock on the root
3798 //       cnode)
3799 //
3800 int
3801 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3802 {
3803         struct vnode *rvp;
3804         struct cnode *cp;
3805         int error;
3806
3807         error = hfs_vfs_root(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3808         if (error) {
3809                 return error;
3810         }
3811
3812         cp = VTOC(rvp);
3813         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3814                 return error;
3815         }
3816         struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3817
3818         int lockflags;
3819         if ((error = hfs_start_transaction(hfsmp)) != 0) {
3820                 return error;
3821         }
3822         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3823
3824         if (extinfo->document_id == 0) {
3825                 // initialize this to start at 3 (one greater than the root-dir id)
3826                 extinfo->document_id = 3;
3827         }
3828
3829         *docid = extinfo->document_id++;
3830
3831         // mark the root cnode dirty
3832         cp->c_flag |= C_MODIFIED;
3833         hfs_update(cp->c_vp, 0);
3834
3835         hfs_systemfile_unlock (hfsmp, lockflags);
3836         (void) hfs_end_transaction(hfsmp);
3837
3838         (void) hfs_unlock(cp);
3839
3840         vnode_put(rvp);
3841         rvp = NULL;
3842
3843         return 0;
3844 }
3845
3846
3847 /*
3848  * Return information about number of file system allocation blocks
3849  * taken by metadata on a volume.
3850  *
3851  * This function populates struct hfsinfo_metadata with allocation blocks
3852  * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3853  * journal file, and sum of all of the above.
3854  */
3855 int
3856 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3857 {
3858         int lockflags = 0;
3859         int ret_lockflags = 0;
3860
3861         /* Zero out the output buffer */
3862         bzero(hinfo, sizeof(struct hfsinfo_metadata));
3863
3864         /*
3865          * Getting number of allocation blocks for all btrees
3866          * should be a quick operation, so we grab locks for
3867          * all of them at the same time
3868          */
3869         lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3870         ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3871         /*
3872          * Make sure that we were able to acquire all locks requested
3873          * to protect us against conditions like unmount in progress.
3874          */
3875         if ((lockflags & ret_lockflags) != lockflags) {
3876                 /* Release any locks that were acquired */
3877                 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3878                 return EPERM;
3879         }
3880
3881         /* Get information about all the btrees */
3882         hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3883         hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3884         hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3885         hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3886
3887         /* Done with btrees, give up the locks */
3888         hfs_systemfile_unlock(hfsmp, ret_lockflags);
3889
3890         /* Get information about journal file */
3891         hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3892
3893         /* Calculate total number of metadata blocks */
3894         hinfo->total = hinfo->extents + hinfo->catalog +
3895                         hinfo->allocation + hinfo->attribute +
3896                         hinfo->journal;
3897
3898         return 0;
3899 }
3900
3901 static int
3902 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3903 {
3904         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3905
3906         return 0;
3907 }
3908
3909 int hfs_freeze(struct hfsmount *hfsmp)
3910 {
3911         // First make sure some other process isn't freezing
3912         hfs_lock_mount(hfsmp);
3913         while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3914                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3915                                    PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3916                         hfs_unlock_mount(hfsmp);
3917                         return EINTR;
3918                 }
3919         }
3920
3921         // Stop new syncers from starting
3922         hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3923
3924         // Now wait for all syncers to finish
3925         while (hfsmp->hfs_syncers) {
3926                 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3927                            PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3928                         hfs_thaw_locked(hfsmp);
3929                         hfs_unlock_mount(hfsmp);
3930                         return EINTR;
3931                 }
3932         }
3933         hfs_unlock_mount(hfsmp);
3934
3935         // flush things before we get started to try and prevent
3936         // dirty data from being paged out while we're frozen.
3937         // note: we can't do this once we're in the freezing state because
3938         // other threads will need to take the global lock
3939         vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3940
3941         // Block everything in hfs_lock_global now
3942         hfs_lock_mount(hfsmp);
3943         hfsmp->hfs_freeze_state = HFS_FREEZING;
3944         hfsmp->hfs_freezing_thread = current_thread();
3945         hfs_unlock_mount(hfsmp);
3946
3947         /* Take the exclusive lock to flush out anything else that
3948            might have the global lock at the moment and also so we
3949            can flush the journal. */
3950         hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3951         journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
3952         hfs_unlock_global(hfsmp);
3953
3954         // don't need to iterate on all vnodes, we just need to
3955         // wait for writes to the system files and the device vnode
3956         //
3957         // Now that journal flush waits for all metadata blocks to
3958         // be written out, waiting for btree writes is probably no
3959         // longer required.
3960         if (HFSTOVCB(hfsmp)->extentsRefNum)
3961                 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3962         if (HFSTOVCB(hfsmp)->catalogRefNum)
3963                 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3964         if (HFSTOVCB(hfsmp)->allocationsRefNum)
3965                 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3966         if (hfsmp->hfs_attribute_vp)
3967                 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3968         vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3969
3970         // We're done, mark frozen
3971         hfs_lock_mount(hfsmp);
3972         hfsmp->hfs_freeze_state  = HFS_FROZEN;
3973         hfsmp->hfs_freezing_proc = current_proc();
3974         hfs_unlock_mount(hfsmp);
3975
3976         return 0;
3977 }
3978
3979 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
3980 {
3981         hfs_lock_mount(hfsmp);
3982
3983         if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
3984                 hfs_unlock_mount(hfsmp);
3985                 return EINVAL;
3986         }
3987         if (process && hfsmp->hfs_freezing_proc != process) {
3988                 hfs_unlock_mount(hfsmp);
3989                 return EPERM;
3990         }
3991
3992         hfs_thaw_locked(hfsmp);
3993
3994         hfs_unlock_mount(hfsmp);
3995
3996         return 0;
3997 }
3998
3999 static void hfs_thaw_locked(struct hfsmount *hfsmp)
4000 {
4001         hfsmp->hfs_freezing_proc = NULL;
4002         hfsmp->hfs_freeze_state = HFS_THAWED;
4003
4004         wakeup(&hfsmp->hfs_freeze_state);
4005 }
4006
4007 uintptr_t obfuscate_addr(void *addr)
4008 {
4009         vm_offset_t new_addr;
4010         vm_kernel_addrperm_external((vm_offset_t)addr, &new_addr);
4011         return new_addr;
4012 }
4013
4014 #if CONFIG_HFS_STD
4015 /*
4016  * Convert HFS encoded string into UTF-8
4017  *
4018  * Unicode output is fully decomposed
4019  * '/' chars are converted to ':'
4020  */
4021 int
4022 hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
4023 {
4024         int error;
4025         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4026         ItemCount uniCount;
4027         size_t utf8len;
4028         hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
4029         u_int8_t pascal_length = 0;
4030
4031         /*
4032          * Validate the length of the Pascal-style string before passing it
4033          * down to the decoding engine.
4034          */
4035         pascal_length = *((const u_int8_t*)(hfs_str));
4036         if (pascal_length > 31) {
4037                 /* invalid string; longer than 31 bytes */
4038                 error = EINVAL;
4039                 return error;
4040         }
4041
4042         error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
4043
4044         if (uniCount == 0)
4045                 error = EINVAL;
4046
4047         if (error == 0) {
4048                 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
4049                 if (error == ENAMETOOLONG)
4050                         *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
4051                 else
4052                         *actualDstLen = utf8len;
4053         }
4054
4055         return error;
4056 }
4057
4058 /*
4059  * Convert UTF-8 string into HFS encoding
4060  *
4061  * ':' chars are converted to '/'
4062  * Assumes input represents fully decomposed Unicode
4063  */
4064 int
4065 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
4066 {
4067         int error;
4068         UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4069         size_t ucslen;
4070
4071         error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
4072         if (error == 0)
4073                 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
4074
4075         return error;
4076 }
4077
4078 /*
4079  * Convert Unicode string into HFS encoding
4080  *
4081  * ':' chars are converted to '/'
4082  * Assumes input represents fully decomposed Unicode
4083  */
4084 int
4085 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
4086 {
4087         int error;
4088         unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
4089
4090         error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
4091         if (error && retry) {
4092                 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
4093         }
4094         return error;
4095 }
4096
4097 #endif // CONFIG_HFS_STD
4098
4099 static uint64_t hfs_allocated __attribute__((aligned(8)));
4100
4101 #if HFS_MALLOC_DEBUG
4102
4103 #warning HFS_MALLOC_DEBUG is on
4104
4105 #include <libkern/OSDebug.h>
4106 #include "hfs_alloc_trace.h"
4107
4108 struct alloc_debug_header {
4109         uint32_t magic;
4110         uint32_t size;
4111         uint64_t sequence;
4112         LIST_ENTRY(alloc_debug_header) chain;
4113         void *backtrace[HFS_ALLOC_BACKTRACE_LEN];
4114 };
4115
4116 enum {
4117         HFS_ALLOC_MAGIC = 0x68667361,   // "hfsa"
4118         HFS_ALLOC_DEAD  = 0x68667364,   // "hfsd"
4119 };
4120
4121 static LIST_HEAD(, alloc_debug_header) hfs_alloc_list;
4122 static lck_mtx_t *hfs_alloc_mtx;
4123 static int hfs_alloc_tracing;
4124 static uint64_t hfs_alloc_sequence;
4125
4126 void hfs_alloc_trace_enable(void)
4127 {
4128         if (hfs_alloc_tracing)
4129                 return;
4130
4131         // Not thread-safe, but this is debug so who cares
4132         extern lck_grp_t *hfs_mutex_group;
4133         extern lck_attr_t *hfs_lock_attr;
4134
4135         if (!hfs_alloc_mtx) {
4136                 hfs_alloc_mtx = lck_mtx_alloc_init(hfs_mutex_group, hfs_lock_attr);
4137                 LIST_INIT(&hfs_alloc_list);
4138         }
4139
4140         // Using OSCompareAndSwap in lieu of a barrier
4141         OSCompareAndSwap(hfs_alloc_tracing, true, &hfs_alloc_tracing);
4142 }
4143
4144 void hfs_alloc_trace_disable(void)
4145 {
4146         if (!hfs_alloc_tracing)
4147                 return;
4148
4149         hfs_alloc_tracing = false;
4150
4151         lck_mtx_lock_spin(hfs_alloc_mtx);
4152
4153         struct alloc_debug_header *hdr;
4154         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4155                 hdr->chain.le_prev = NULL;
4156         }
4157         LIST_INIT(&hfs_alloc_list);
4158
4159         lck_mtx_unlock(hfs_alloc_mtx);
4160 }
4161
4162 static int hfs_handle_alloc_tracing SYSCTL_HANDLER_ARGS
4163 {
4164         int v = hfs_alloc_tracing;
4165
4166         int err = sysctl_handle_int(oidp, &v, 0, req);
4167         if (err || req->newptr == USER_ADDR_NULL || v == hfs_alloc_tracing)
4168                 return err;
4169
4170         if (v)
4171                 hfs_alloc_trace_enable();
4172         else
4173                 hfs_alloc_trace_disable();
4174
4175         return 0;
4176 }
4177
4178 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_tracing,
4179                    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0,
4180                    hfs_handle_alloc_tracing, "I", "Allocation tracing")
4181
4182 static int hfs_handle_alloc_trace_info SYSCTL_HANDLER_ARGS
4183 {
4184         if (!hfs_alloc_tracing) {
4185                 struct hfs_alloc_trace_info info = {};
4186                 return sysctl_handle_opaque(oidp, &info, sizeof(info), req);
4187         }
4188
4189         const int size = 128 * 1024;
4190         struct hfs_alloc_trace_info *info = kalloc(size);
4191
4192         const int max_entries = ((size - sizeof(*info))
4193                                                          / sizeof(struct hfs_alloc_info_entry));
4194
4195         info->entry_count = 0;
4196         info->more = false;
4197
4198         lck_mtx_lock_spin(hfs_alloc_mtx);
4199
4200         struct alloc_debug_header *hdr;
4201         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4202                 if (info->entry_count == max_entries) {
4203                         info->more = true;
4204                         break;
4205                 }
4206                 vm_offset_t o;
4207                 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4208                 info->entries[info->entry_count].ptr = o;
4209                 info->entries[info->entry_count].size = hdr->size;
4210                 info->entries[info->entry_count].sequence = hdr->sequence;
4211                 for (int i = 0; i < HFS_ALLOC_BACKTRACE_LEN; ++i) {
4212                         vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[i], &o);
4213                         info->entries[info->entry_count].backtrace[i] = o;
4214                 }
4215                 ++info->entry_count;
4216         }
4217
4218         lck_mtx_unlock(hfs_alloc_mtx);
4219
4220         int err = sysctl_handle_opaque(oidp, info,
4221                                                                    sizeof(*info) + info->entry_count
4222                                                                    * sizeof(struct hfs_alloc_info_entry),
4223                                                                    req);
4224
4225         kfree(info, size);
4226
4227         return err;
4228 }
4229
4230 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_trace_info,
4231                    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED, NULL, 0,
4232                    hfs_handle_alloc_trace_info, "-", "Allocation trace info")
4233
4234 bool hfs_dump_allocations(void)
4235 {
4236         if (!hfs_allocated)
4237                 return false;
4238
4239         lck_mtx_lock(hfs_alloc_mtx);
4240
4241         struct alloc_debug_header *hdr;
4242         LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4243                 vm_offset_t o;
4244                 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4245                 printf(" -- 0x%lx:%llu <%u> --\n", o, hdr->sequence, hdr->size);
4246                 for (int j = 0; j < HFS_ALLOC_BACKTRACE_LEN && hdr->backtrace[j]; ++j) {
4247                         vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[j], &o);
4248                         printf("0x%lx\n", o);
4249                 }
4250         }
4251
4252         lck_mtx_unlock(hfs_alloc_mtx);
4253
4254         return true;
4255 }
4256
4257 #endif
4258
4259 HFS_SYSCTL(QUAD, _vfs_generic_hfs, OID_AUTO, allocated,
4260                    CTLFLAG_RD | CTLFLAG_LOCKED, &hfs_allocated, "Memory allocated")
4261
4262 void *hfs_malloc(size_t size)
4263 {
4264 #if HFS_MALLOC_DEBUG
4265         hfs_assert(size <= 0xffffffff);
4266
4267         struct alloc_debug_header *hdr;
4268
4269         void *ptr;
4270         ptr = kalloc(size + sizeof(*hdr));
4271
4272         hdr = ptr + size;
4273
4274         hdr->magic = HFS_ALLOC_MAGIC;
4275         hdr->size = size;
4276
4277         if (hfs_alloc_tracing) {
4278                 OSBacktrace(hdr->backtrace, HFS_ALLOC_BACKTRACE_LEN);
4279                 lck_mtx_lock_spin(hfs_alloc_mtx);
4280                 LIST_INSERT_HEAD(&hfs_alloc_list, hdr, chain);
4281                 hdr->sequence = ++hfs_alloc_sequence;
4282                 lck_mtx_unlock(hfs_alloc_mtx);
4283         } else
4284                 hdr->chain.le_prev = NULL;
4285 #else
4286         void *ptr;
4287         ptr = kalloc(size);
4288 #endif
4289
4290         OSAddAtomic64(size, &hfs_allocated);
4291
4292         return ptr;
4293 }
4294
4295 void hfs_free(void *ptr, size_t size)
4296 {
4297         if (!ptr)
4298                 return;
4299
4300         OSAddAtomic64(-(int64_t)size, &hfs_allocated);
4301
4302 #if HFS_MALLOC_DEBUG
4303         struct alloc_debug_header *hdr = ptr + size;
4304
4305         hfs_assert(hdr->magic == HFS_ALLOC_MAGIC);
4306         hfs_assert(hdr->size == size);
4307
4308         hdr->magic = HFS_ALLOC_DEAD;
4309
4310         if (hdr->chain.le_prev) {
4311                 lck_mtx_lock_spin(hfs_alloc_mtx);
4312                 LIST_REMOVE(hdr, chain);
4313                 lck_mtx_unlock(hfs_alloc_mtx);
4314         }
4315
4316         kfree(ptr, size + sizeof(*hdr));
4317 #else
4318         kfree(ptr, size);
4319 #endif
4320 }
4321
4322 void *hfs_mallocz(size_t size)
4323 {
4324         void *ptr = hfs_malloc(size);
4325         bzero(ptr, size);
4326         return ptr;
4327 }
4328
4329 // -- Zone allocator-related structures and routines --
4330
4331 hfs_zone_entry_t hfs_zone_entries[HFS_NUM_ZONES] = {
4332         { HFS_CNODE_ZONE, sizeof(struct cnode), "HFS node", true },
4333         { HFS_FILEFORK_ZONE, sizeof(struct filefork), "HFS fork", true },
4334         { HFS_DIRHINT_ZONE, sizeof(struct directoryhint), "HFS dirhint", true }
4335 };
4336
4337 hfs_zone_t hfs_zones[HFS_NUM_ZONES];
4338
4339 void hfs_init_zones(void) {
4340         for (int i = 0; i < HFS_NUM_ZONES; i++) {
4341                 hfs_zones[i].hz_zone = zinit(hfs_zone_entries[i].hze_elem_size, 1024 * 1024, PAGE_SIZE, hfs_zone_entries[i].hze_name);
4342                 hfs_zones[i].hz_elem_size = hfs_zone_entries[i].hze_elem_size;
4343
4344                 zone_change(hfs_zones[i].hz_zone, Z_CALLERACCT, false);
4345                 if (hfs_zone_entries[i].hze_noencrypt)
4346                         zone_change(hfs_zones[i].hz_zone, Z_NOENCRYPT, true);
4347         }
4348 }
4349
4350 void *hfs_zalloc(hfs_zone_kind_t zone)
4351 {
4352         OSAddAtomic64(hfs_zones[zone].hz_elem_size, &hfs_allocated);
4353
4354         return zalloc(hfs_zones[zone].hz_zone);
4355 }
4356
4357 void hfs_zfree(void *ptr, hfs_zone_kind_t zone)
4358 {
4359         OSAddAtomic64(-(int64_t)hfs_zones[zone].hz_elem_size, &hfs_allocated);
4360
4361         zfree(hfs_zones[zone].hz_zone, ptr);
4362 }
4363
4364 struct hfs_sysctl_chain *sysctl_list;
4365
4366 void hfs_sysctl_register(void)
4367 {
4368         struct hfs_sysctl_chain *e = sysctl_list;
4369         while (e) {
4370                 sysctl_register_oid(e->oid);
4371                 e = e->next;
4372         }
4373 }
4374
4375 void hfs_sysctl_unregister(void)
4376 {
4377         struct hfs_sysctl_chain *e = sysctl_list;
4378         while (e) {
4379                 sysctl_unregister_oid(e->oid);
4380                 e = e->next;
4381         }
4382 }
4383
4384 void hfs_assert_fail(const char *file, unsigned line, const char *expr)
4385 {
4386         Assert(file, line, expr);
4387         __builtin_unreachable();
4388 }