bsd/hfs/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/buf.h>
  43 #include <sys/buf_internal.h>
  44 #include <sys/ubc.h>
  45 #include <sys/unistd.h>
  46 #include <sys/utfconv.h>
  47 #include <sys/kauth.h>
  48 #include <sys/fcntl.h>
  49 #include <sys/fsctl.h>
  50 #include <sys/vnode_internal.h>
  51 #include <kern/clock.h>
  52
  53 #include <libkern/OSAtomic.h>
  54
  55 /* for parsing boot-args */
  56 #include <pexpert/pexpert.h>
  57
  58 #if CONFIG_PROTECT
  59 #include <sys/cprotect.h>
  60 #endif
  61
  62 #include "hfs.h"
  63 #include "hfs_catalog.h"
  64 #include "hfs_dbg.h"
  65 #include "hfs_mount.h"
  66 #include "hfs_endian.h"
  67 #include "hfs_cnode.h"
  68 #include "hfs_fsctl.h"
  69
  70 #include "hfscommon/headers/FileMgrInternal.h"
  71 #include "hfscommon/headers/BTreesInternal.h"
  72 #include "hfscommon/headers/HFSUnicodeWrappers.h"
  73
  74 static void ReleaseMetaFileVNode(struct vnode *vp);
  75 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  76
  77 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  78
  79 #define HFS_MOUNT_DEBUG 1
  80
  81
  82 //*******************************************************************************
  83 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  84 //       hence are not in the right byte order on little endian machines. It is
  85 //       the responsibility of the finder and other clients to swap the data.
  86 //*******************************************************************************
  87
  88 //*******************************************************************************
  89 //      Routine:        hfs_MountHFSVolume
  90 //
  91 //
  92 //*******************************************************************************
  93 unsigned char hfs_catname[] = "Catalog B-tree";
  94 unsigned char hfs_extname[] = "Extents B-tree";
  95 unsigned char hfs_vbmname[] = "Volume Bitmap";
  96 unsigned char hfs_attrname[] = "Attribute B-tree";
  97 unsigned char hfs_startupname[] = "Startup File";
  98
  99 #if CONFIG_HFS_STD
 100 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 101                 __unused struct proc *p)
 102 {
 103         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
 104         int error;
 105         ByteCount utf8chars;
 106         struct cat_desc cndesc;
 107         struct cat_attr cnattr;
 108         struct cat_fork fork;
 109         int newvnode_flags = 0;
 110
 111         /* Block size must be a multiple of 512 */
 112         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 113             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 114                 return (EINVAL);
 115
 116         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 117         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 118             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 119                 return (EINVAL);
 120         }
 121         hfsmp->hfs_flags |= HFS_STANDARD;
 122         /*
 123          * The MDB seems OK: transfer info from it into VCB
 124          * Note - the VCB starts out clear (all zeros)
 125          *
 126          */
 127         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 128         vcb->hfs_itime          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 129         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 130         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 131         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 132         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 133         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 134         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 135         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 136         vcb->allocLimit         = vcb->totalBlocks;
 137         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 138         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 139         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 140         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 141         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 142         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 143         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 144         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 145         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 146         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 147         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 148         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 149                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 150
 151         /* convert hfs encoded name into UTF-8 string */
 152         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 153         /*
 154          * When an HFS name cannot be encoded with the current
 155          * volume encoding we use MacRoman as a fallback.
 156          */
 157         if (error || (utf8chars == 0)) {
 158                 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 159                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 160                 if (error) {
 161                         goto MtVolErr;
 162                 }
 163         }
 164
 165         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 166         vcb->vcbVBMIOSize = kHFSBlockSize;
 167
 168         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 169                                                   hfsmp->hfs_logical_block_count);
 170
 171         bzero(&cndesc, sizeof(cndesc));
 172         cndesc.cd_parentcnid = kHFSRootParentID;
 173         cndesc.cd_flags |= CD_ISMETA;
 174         bzero(&cnattr, sizeof(cnattr));
 175         cnattr.ca_linkcount = 1;
 176         cnattr.ca_mode = S_IFREG;
 177         bzero(&fork, sizeof(fork));
 178
 179         /*
 180          * Set up Extents B-tree vnode
 181          */
 182         cndesc.cd_nameptr = hfs_extname;
 183         cndesc.cd_namelen = strlen((char *)hfs_extname);
 184         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 185         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 186         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 187         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 188         fork.cf_vblocks = 0;
 189         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 190         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 191         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 192         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 193         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 194         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 195         cnattr.ca_blocks = fork.cf_blocks;
 196
 197         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 198                                 &hfsmp->hfs_extents_vp, &newvnode_flags);
 199         if (error) {
 200                 if (HFS_MOUNT_DEBUG) {
 201                         printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
 202                 }
 203                 goto MtVolErr;
 204         }
 205         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 206                                          (KeyCompareProcPtr)CompareExtentKeys));
 207         if (error) {
 208                 if (HFS_MOUNT_DEBUG) {
 209                         printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
 210                 }
 211                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 212                 goto MtVolErr;
 213         }
 214         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 215
 216         /*
 217          * Set up Catalog B-tree vnode...
 218          */
 219         cndesc.cd_nameptr = hfs_catname;
 220         cndesc.cd_namelen = strlen((char *)hfs_catname);
 221         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 222         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 223         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 224         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 225         fork.cf_vblocks = 0;
 226         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 227         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 228         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 229         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 230         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 231         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 232         cnattr.ca_blocks = fork.cf_blocks;
 233
 234         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 235                                 &hfsmp->hfs_catalog_vp, &newvnode_flags);
 236         if (error) {
 237                 if (HFS_MOUNT_DEBUG) {
 238                         printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
 239                 }
 240                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 241                 goto MtVolErr;
 242         }
 243         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 244                                          (KeyCompareProcPtr)CompareCatalogKeys));
 245         if (error) {
 246                 if (HFS_MOUNT_DEBUG) {
 247                         printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
 248                 }
 249                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 250                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 251                 goto MtVolErr;
 252         }
 253         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 254
 255         /*
 256          * Set up dummy Allocation file vnode (used only for locking bitmap)
 257          */
 258         cndesc.cd_nameptr = hfs_vbmname;
 259         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 260         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 261         bzero(&fork, sizeof(fork));
 262         cnattr.ca_blocks = 0;
 263
 264         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 265                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 266         if (error) {
 267                 if (HFS_MOUNT_DEBUG) {
 268                         printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
 269                 }
 270                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 271                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 272                 goto MtVolErr;
 273         }
 274         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 275
 276         /* mark the volume dirty (clear clean unmount bit) */
 277         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 278
 279     if (error == noErr) {
 280                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
 281                 if (HFS_MOUNT_DEBUG) {
 282                         printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
 283                 }
 284         }
 285
 286     if (error == noErr) {
 287                 /* If the disk isn't write protected.. */
 288         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
 289             MarkVCBDirty (vcb); //      mark VCB dirty so it will be written
 290                 }
 291         }
 292
 293         /*
 294          * all done with system files so we can unlock now...
 295          */
 296         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 297         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 298         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 299
 300         if (error == noErr) {
 301                 /* If successful, then we can just return once we've unlocked the cnodes */
 302                 return error;
 303         }
 304
 305     //--        Release any resources allocated so far before exiting with an error:
 306 MtVolErr:
 307         hfsUnmount(hfsmp, NULL);
 308
 309     return (error);
 310 }
 311
 312 #endif
 313
 314 //*******************************************************************************
 315 //      Routine:        hfs_MountHFSPlusVolume
 316 //
 317 //
 318 //*******************************************************************************
 319
 320 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 321         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 322 {
 323         register ExtendedVCB *vcb;
 324         struct cat_desc cndesc;
 325         struct cat_attr cnattr;
 326         struct cat_fork cfork;
 327         u_int32_t blockSize;
 328         daddr64_t spare_sectors;
 329         struct BTreeInfoRec btinfo;
 330         u_int16_t  signature;
 331         u_int16_t  hfs_version;
 332         int newvnode_flags = 0;
 333         int  i;
 334         OSErr retval;
 335         char converted_volname[256];
 336         size_t volname_length = 0;
 337         size_t conv_volname_length = 0;
 338
 339         signature = SWAP_BE16(vhp->signature);
 340         hfs_version = SWAP_BE16(vhp->version);
 341
 342         if (signature == kHFSPlusSigWord) {
 343                 if (hfs_version != kHFSPlusVersion) {
 344                         printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version);
 345                         return (EINVAL);
 346                 }
 347         } else if (signature == kHFSXSigWord) {
 348                 if (hfs_version != kHFSXVersion) {
 349                         printf("hfs_mount: invalid HFSX version: %x\n", hfs_version);
 350                         return (EINVAL);
 351                 }
 352                 /* The in-memory signature is always 'H+'. */
 353                 signature = kHFSPlusSigWord;
 354                 hfsmp->hfs_flags |= HFS_X;
 355         } else {
 356                 /* Removed printf for invalid HFS+ signature because it gives
 357                  * false error for UFS root volume
 358                  */
 359                 if (HFS_MOUNT_DEBUG) {
 360                         printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature);
 361                 }
 362                 return (EINVAL);
 363         }
 364
 365         /* Block size must be at least 512 and a power of 2 */
 366         blockSize = SWAP_BE32(vhp->blockSize);
 367         if (blockSize < 512 || !powerof2(blockSize)) {
 368                 if (HFS_MOUNT_DEBUG) {
 369                         printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
 370                 }
 371                 return (EINVAL);
 372         }
 373
 374         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 375         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 376             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
 377                 if (HFS_MOUNT_DEBUG) {
 378                         printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
 379                 }
 380                 return (EINVAL);
 381         }
 382
 383         /* Make sure we can live with the physical block size. */
 384         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 385             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
 386             (blockSize < hfsmp->hfs_logical_block_size)) {
 387                 if (HFS_MOUNT_DEBUG) {
 388                         printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
 389                                         blockSize, hfsmp->hfs_logical_block_size);
 390                 }
 391                 return (ENXIO);
 392         }
 393
 394         /* If allocation block size is less than the physical
 395          * block size, we assume that the physical block size
 396          * is same as logical block size.  The physical block
 397          * size value is used to round down the offsets for
 398          * reading and writing the primary and alternate volume
 399          * headers at physical block boundary and will cause
 400          * problems if it is less than the block size.
 401          */
 402         if (blockSize < hfsmp->hfs_physical_block_size) {
 403                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 404                 hfsmp->hfs_log_per_phys = 1;
 405         }
 406
 407         /*
 408          * The VolumeHeader seems OK: transfer info from it into VCB
 409          * Note - the VCB starts out clear (all zeros)
 410          */
 411         vcb = HFSTOVCB(hfsmp);
 412
 413         vcb->vcbSigWord = signature;
 414         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 415         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 416         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 417         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 418         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 419         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 420         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 421         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 422         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 423
 424         /* copy 32 bytes of Finder info */
 425         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 426
 427         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 428         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 429                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 430
 431         /* Now fill in the Extended VCB info */
 432         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 433         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 434         vcb->allocLimit         = vcb->totalBlocks;
 435         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 436         vcb->blockSize          = blockSize;
 437         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 438         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 439
 440         vcb->hfsPlusIOPosOffset = embeddedOffset;
 441
 442         /* Default to no free block reserve */
 443         vcb->reserveBlocks = 0;
 444
 445         /*
 446          * Update the logical block size in the mount struct
 447          * (currently set up from the wrapper MDB) using the
 448          * new blocksize value:
 449          */
 450         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 451         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 452
 453         /*
 454          * Validate and initialize the location of the alternate volume header.
 455          */
 456         spare_sectors = hfsmp->hfs_logical_block_count -
 457                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 458                            hfsmp->hfs_logical_block_size);
 459
 460         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 461                 hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
 462         } else {
 463                 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 464                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 465                                                           hfsmp->hfs_logical_block_count);
 466         }
 467
 468         bzero(&cndesc, sizeof(cndesc));
 469         cndesc.cd_parentcnid = kHFSRootParentID;
 470         cndesc.cd_flags |= CD_ISMETA;
 471         bzero(&cnattr, sizeof(cnattr));
 472         cnattr.ca_linkcount = 1;
 473         cnattr.ca_mode = S_IFREG;
 474
 475         /*
 476          * Set up Extents B-tree vnode
 477          */
 478         cndesc.cd_nameptr = hfs_extname;
 479         cndesc.cd_namelen = strlen((char *)hfs_extname);
 480         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 481
 482         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 483         cfork.cf_new_size= 0;
 484         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 485         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 486         cfork.cf_vblocks = 0;
 487         cnattr.ca_blocks = cfork.cf_blocks;
 488         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 489                 cfork.cf_extents[i].startBlock =
 490                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 491                 cfork.cf_extents[i].blockCount =
 492                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 493         }
 494         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 495                                  &hfsmp->hfs_extents_vp, &newvnode_flags);
 496         if (retval)
 497         {
 498                 if (HFS_MOUNT_DEBUG) {
 499                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
 500                 }
 501                 goto ErrorExit;
 502         }
 503         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 504         hfs_unlock(hfsmp->hfs_extents_cp);
 505
 506         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 507                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 508         if (retval)
 509         {
 510                 if (HFS_MOUNT_DEBUG) {
 511                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
 512                 }
 513                 goto ErrorExit;
 514         }
 515         /*
 516          * Set up Catalog B-tree vnode
 517          */
 518         cndesc.cd_nameptr = hfs_catname;
 519         cndesc.cd_namelen = strlen((char *)hfs_catname);
 520         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 521
 522         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 523         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 524         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 525         cfork.cf_vblocks = 0;
 526         cnattr.ca_blocks = cfork.cf_blocks;
 527         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 528                 cfork.cf_extents[i].startBlock =
 529                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 530                 cfork.cf_extents[i].blockCount =
 531                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 532         }
 533         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 534                                  &hfsmp->hfs_catalog_vp, &newvnode_flags);
 535         if (retval) {
 536                 if (HFS_MOUNT_DEBUG) {
 537                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
 538                 }
 539                 goto ErrorExit;
 540         }
 541         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 542         hfs_unlock(hfsmp->hfs_catalog_cp);
 543
 544         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 545                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 546         if (retval) {
 547                 if (HFS_MOUNT_DEBUG) {
 548                         printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
 549                 }
 550                 goto ErrorExit;
 551         }
 552         if ((hfsmp->hfs_flags & HFS_X) &&
 553             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 554                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 555                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 556                         /* Install a case-sensitive key compare */
 557                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 558                                           (KeyCompareProcPtr)cat_binarykeycompare);
 559                 }
 560         }
 561
 562         /*
 563          * Set up Allocation file vnode
 564          */
 565         cndesc.cd_nameptr = hfs_vbmname;
 566         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 567         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 568
 569         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 570         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 571         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 572         cfork.cf_vblocks = 0;
 573         cnattr.ca_blocks = cfork.cf_blocks;
 574         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 575                 cfork.cf_extents[i].startBlock =
 576                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 577                 cfork.cf_extents[i].blockCount =
 578                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 579         }
 580         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 581                                  &hfsmp->hfs_allocation_vp, &newvnode_flags);
 582         if (retval) {
 583                 if (HFS_MOUNT_DEBUG) {
 584                         printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
 585                 }
 586                 goto ErrorExit;
 587         }
 588         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 589         hfs_unlock(hfsmp->hfs_allocation_cp);
 590
 591         /*
 592          * Set up Attribute B-tree vnode
 593          */
 594         if (vhp->attributesFile.totalBlocks != 0) {
 595                 cndesc.cd_nameptr = hfs_attrname;
 596                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 597                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 598
 599                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 600                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 601                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 602                 cfork.cf_vblocks = 0;
 603                 cnattr.ca_blocks = cfork.cf_blocks;
 604                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 605                         cfork.cf_extents[i].startBlock =
 606                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 607                         cfork.cf_extents[i].blockCount =
 608                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 609                 }
 610                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 611                                          &hfsmp->hfs_attribute_vp, &newvnode_flags);
 612                 if (retval) {
 613                         if (HFS_MOUNT_DEBUG) {
 614                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
 615                         }
 616                         goto ErrorExit;
 617                 }
 618                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 619                 hfs_unlock(hfsmp->hfs_attribute_cp);
 620                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 621                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 622                 if (retval) {
 623                         if (HFS_MOUNT_DEBUG) {
 624                                 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
 625                         }
 626                         goto ErrorExit;
 627                 }
 628
 629                 /* Initialize vnode for virtual attribute data file that spans the
 630                  * entire file system space for performing I/O to attribute btree
 631                  * We hold iocount on the attrdata vnode for the entire duration
 632                  * of mount (similar to btree vnodes)
 633                  */
 634                 retval = init_attrdata_vnode(hfsmp);
 635                 if (retval) {
 636                         if (HFS_MOUNT_DEBUG) {
 637                                 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
 638                         }
 639                         goto ErrorExit;
 640                 }
 641         }
 642
 643         /*
 644          * Set up Startup file vnode
 645          */
 646         if (vhp->startupFile.totalBlocks != 0) {
 647                 cndesc.cd_nameptr = hfs_startupname;
 648                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 649                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 650
 651                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 652                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 653                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 654                 cfork.cf_vblocks = 0;
 655                 cnattr.ca_blocks = cfork.cf_blocks;
 656                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 657                         cfork.cf_extents[i].startBlock =
 658                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 659                         cfork.cf_extents[i].blockCount =
 660                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 661                 }
 662                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 663                                          &hfsmp->hfs_startup_vp, &newvnode_flags);
 664                 if (retval) {
 665                         if (HFS_MOUNT_DEBUG) {
 666                                 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
 667                         }
 668                         goto ErrorExit;
 669                 }
 670                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 671                 hfs_unlock(hfsmp->hfs_startup_cp);
 672         }
 673
 674         /*
 675          * Pick up volume name and create date
 676          *
 677          * Acquiring the volume name should not manipulate the bitmap, only the catalog
 678          * btree and possibly the extents overflow b-tree.
 679          */
 680         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
 681         if (retval) {
 682                 if (HFS_MOUNT_DEBUG) {
 683                         printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
 684                 }
 685                 goto ErrorExit;
 686         }
 687         vcb->hfs_itime = cnattr.ca_itime;
 688         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 689         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 690         volname_length = strlen ((const char*)vcb->vcbVN);
 691         cat_releasedesc(&cndesc);
 692
 693 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
 694
 695
 696         /* Send the volume name down to CoreStorage if necessary */
 697         retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
 698         if (retval == 0) {
 699                 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
 700         }
 701
 702         /* reset retval == 0. we don't care about errors in volname conversion */
 703         retval = 0;
 704
 705
 706         /*
 707          * We now always initiate a full bitmap scan even if the volume is read-only because this is
 708          * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
 709          * expects. TRIMs will not be delivered to the underlying media if the volume is not
 710          * read-write though.
 711          */
 712         thread_t allocator_scanner;
 713         hfsmp->scan_var = 0;
 714
 715         /* Take the HFS mount mutex and wait on scan_var */
 716         hfs_lock_mount (hfsmp);
 717
 718         kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
 719         /* Wait until it registers that it's got the appropriate locks */
 720         while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
 721                 (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
 722                 if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
 723                         break;
 724                 }
 725                 else {
 726                         hfs_lock_mount (hfsmp);
 727                 }
 728         }
 729
 730         thread_deallocate (allocator_scanner);
 731
 732         /* mark the volume dirty (clear clean unmount bit) */
 733         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 734         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 735                 hfs_flushvolumeheader(hfsmp, TRUE, 0);
 736         }
 737
 738         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 739         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 740                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 741         }
 742
 743         //
 744         // Check if we need to do late journal initialization.  This only
 745         // happens if a previous version of MacOS X (or 9) touched the disk.
 746         // In that case hfs_late_journal_init() will go re-locate the journal
 747         // and journal_info_block files and validate that they're still kosher.
 748         //
 749         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 750                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 751                 && (hfsmp->jnl == NULL)) {
 752
 753                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 754                 if (retval != 0) {
 755                         if (retval == EROFS) {
 756                                 // EROFS is a special error code that means the volume has an external
 757                                 // journal which we couldn't find.  in that case we do not want to
 758                                 // rewrite the volume header - we'll just refuse to mount the volume.
 759                                 if (HFS_MOUNT_DEBUG) {
 760                                         printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
 761                                 }
 762                                 retval = EINVAL;
 763                                 goto ErrorExit;
 764                         }
 765
 766                         hfsmp->jnl = NULL;
 767
 768                         // if the journal failed to open, then set the lastMountedVersion
 769                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 770                         // of just bailing out because the volume is journaled.
 771                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 772                                 HFSPlusVolumeHeader *jvhp;
 773                                 daddr64_t mdb_offset;
 774                                 struct buf *bp = NULL;
 775
 776                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 777
 778                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 779
 780                                 bp = NULL;
 781                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 782                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 783                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 784                                 if (retval == 0) {
 785                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 786
 787                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 788                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 789                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 790                                                 buf_bwrite(bp);
 791                                         } else {
 792                                                 buf_brelse(bp);
 793                                         }
 794                                         bp = NULL;
 795                                 } else if (bp) {
 796                                         buf_brelse(bp);
 797                                         // clear this so the error exit path won't try to use it
 798                                         bp = NULL;
 799                             }
 800                         }
 801
 802                         if (HFS_MOUNT_DEBUG) {
 803                                 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
 804                         }
 805                         retval = EINVAL;
 806                         goto ErrorExit;
 807                 } else if (hfsmp->jnl) {
 808                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 809                 }
 810         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 811                 struct cat_attr jinfo_attr, jnl_attr;
 812
 813                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 814                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 815                 }
 816
 817                 // if we're here we need to fill in the fileid's for the
 818                 // journal and journal_info_block.
 819                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 820                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 821                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 822                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 823                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 824                 }
 825
 826                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 827                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 828                 }
 829
 830                 if (hfsmp->jnl == NULL) {
 831                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 832                 }
 833         }
 834
 835         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 836         {
 837                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 838         }
 839
 840         /*
 841          * Distinguish 3 potential cases involving content protection:
 842          * 1. mount point bit set; vcbAtrb does not support it. Fail.
 843          * 2. mount point bit set; vcbattrb supports it. we're good.
 844          * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
 845          */
 846         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 847                 /* Does the mount point support it ? */
 848                 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
 849                         /* Case 1 above */
 850                         retval = EINVAL;
 851                         goto ErrorExit;
 852                 }
 853         }
 854         else {
 855                 /* not requested in the mount point. Is it in FS? */
 856                 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
 857                         /* Case 3 above */
 858                         vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
 859                 }
 860         }
 861
 862         /* At this point, if the mount point flag is set, we can enable it. */
 863         if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
 864                 /* Cases 2+3 above */
 865 #if CONFIG_PROTECT
 866                 /* Get the EAs as needed. */
 867                 int cperr = 0;
 868                 uint16_t majorversion;
 869                 uint16_t minorversion;
 870
 871                 struct cp_root_xattr *xattr = NULL;
 872                 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
 873                 if (xattr == NULL) {
 874                         retval = ENOMEM;
 875                         goto ErrorExit;
 876                 }
 877                 bzero (xattr, sizeof(struct cp_root_xattr));
 878
 879                 /* go get the EA to get the version information */
 880                 cperr = cp_getrootxattr (hfsmp, xattr);
 881                 /*
 882                  * If there was no EA there, then write one out.
 883                  * Assuming EA is not present on the root means
 884                  * this is an erase install or a very old FS
 885                  */
 886
 887                 if (cperr == 0) {
 888                         /* Have to run a valid CP version. */
 889                         if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
 890                                 cperr = EINVAL;
 891                         }
 892                 }
 893                 else if (cperr == ENOATTR) {
 894                         printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
 895                         bzero(xattr, sizeof(struct cp_root_xattr));
 896                         xattr->major_version = CP_NEW_MAJOR_VERS;
 897                         xattr->minor_version = CP_MINOR_VERS;
 898                         xattr->flags = 0;
 899                         cperr = cp_setrootxattr (hfsmp, xattr);
 900                 }
 901                 majorversion = xattr->major_version;
 902                 minorversion = xattr->minor_version;
 903                 if (xattr) {
 904                         FREE(xattr, M_TEMP);
 905                 }
 906
 907                 /* Recheck for good status */
 908                 if (cperr == 0) {
 909                         /* If we got here, then the CP version is valid. Set it in the mount point */
 910                         hfsmp->hfs_running_cp_major_vers = majorversion;
 911                         printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
 912
 913                         /*
 914                          * Acquire the boot-arg for the AKS default key.
 915                          * Ensure that the boot-arg's value is valid for FILES (not directories),
 916                          * since only files are actually protected for now.
 917                          */
 918                         PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
 919                         if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
 920                                 hfsmp->default_cp_class = PROTECTION_CLASS_D;
 921                         }
 922                 }
 923                 else {
 924                         retval = EPERM;
 925                         goto ErrorExit;
 926                 }
 927 #else
 928                 /* If CONFIG_PROTECT not built, ignore CP */
 929                 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
 930 #endif
 931         }
 932
 933         /*
 934          * Establish a metadata allocation zone.
 935          */
 936         hfs_metadatazone_init(hfsmp, false);
 937
 938         /*
 939          * Make any metadata zone adjustments.
 940          */
 941         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
 942                 /* Keep the roving allocator out of the metadata zone. */
 943                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
 944                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
 945                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
 946                 }
 947         } else {
 948                 if (vcb->nextAllocation <= 1) {
 949                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
 950                 }
 951         }
 952         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
 953
 954         /* Setup private/hidden directories for hardlinks. */
 955         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 956         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 957
 958         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 959                 hfs_remove_orphans(hfsmp);
 960
 961         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 962         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 963         {
 964                 retval = hfs_erase_unused_nodes(hfsmp);
 965                 if (retval) {
 966                         if (HFS_MOUNT_DEBUG) {
 967                                 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
 968                         }
 969
 970                         goto ErrorExit;
 971                 }
 972         }
 973
 974         /*
 975          * Allow hot file clustering if conditions allow.
 976          */
 977         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
 978             ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
 979                 (void) hfs_recording_init(hfsmp);
 980         }
 981
 982         /* Force ACLs on HFS+ file systems. */
 983         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 984
 985         /* Enable extent-based extended attributes by default */
 986         hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
 987
 988         return (0);
 989
 990 ErrorExit:
 991         /*
 992          * A fatal error occurred and the volume cannot be mounted, so
 993          * release any resources that we acquired...
 994          */
 995         hfsUnmount(hfsmp, NULL);
 996
 997         if (HFS_MOUNT_DEBUG) {
 998                 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
 999         }
1000         return (retval);
1001 }
1002
1003
1004 /*
1005  * ReleaseMetaFileVNode
1006  *
1007  * vp   L - -
1008  */
1009 static void ReleaseMetaFileVNode(struct vnode *vp)
1010 {
1011         struct filefork *fp;
1012
1013         if (vp && (fp = VTOF(vp))) {
1014                 if (fp->fcbBTCBPtr != NULL) {
1015                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1016                         (void) BTClosePath(fp);
1017                         hfs_unlock(VTOC(vp));
1018                 }
1019
1020                 /* release the node even if BTClosePath fails */
1021                 vnode_recycle(vp);
1022                 vnode_put(vp);
1023         }
1024 }
1025
1026
1027 /*************************************************************
1028 *
1029 * Unmounts a hfs volume.
1030 *       At this point vflush() has been called (to dump all non-metadata files)
1031 *
1032 *************************************************************/
1033
1034 int
1035 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1036 {
1037         /* Get rid of our attribute data vnode (if any).  This is done
1038          * after the vflush() during mount, so we don't need to worry
1039          * about any locks.
1040          */
1041         if (hfsmp->hfs_attrdata_vp) {
1042                 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1043                 hfsmp->hfs_attrdata_vp = NULLVP;
1044         }
1045
1046         if (hfsmp->hfs_startup_vp) {
1047                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1048                 hfsmp->hfs_startup_cp = NULL;
1049                 hfsmp->hfs_startup_vp = NULL;
1050         }
1051
1052         if (hfsmp->hfs_attribute_vp) {
1053                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1054                 hfsmp->hfs_attribute_cp = NULL;
1055                 hfsmp->hfs_attribute_vp = NULL;
1056         }
1057
1058         if (hfsmp->hfs_catalog_vp) {
1059                 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1060                 hfsmp->hfs_catalog_cp = NULL;
1061                 hfsmp->hfs_catalog_vp = NULL;
1062         }
1063
1064         if (hfsmp->hfs_extents_vp) {
1065                 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1066                 hfsmp->hfs_extents_cp = NULL;
1067                 hfsmp->hfs_extents_vp = NULL;
1068         }
1069
1070         if (hfsmp->hfs_allocation_vp) {
1071                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1072                 hfsmp->hfs_allocation_cp = NULL;
1073                 hfsmp->hfs_allocation_vp = NULL;
1074         }
1075
1076         return (0);
1077 }
1078
1079
1080 /*
1081  * Test if fork has overflow extents.
1082  *
1083  * Returns:
1084  *      non-zero - overflow extents exist
1085  *      zero     - overflow extents do not exist
1086  */
1087 __private_extern__
1088 int
1089 overflow_extents(struct filefork *fp)
1090 {
1091         u_int32_t blocks;
1092
1093         //
1094         // If the vnode pointer is NULL then we're being called
1095         // from hfs_remove_orphans() with a faked-up filefork
1096         // and therefore it has to be an HFS+ volume.  Otherwise
1097         // we check through the volume header to see what type
1098         // of volume we're on.
1099         //
1100         if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
1101                 if (fp->ff_extents[7].blockCount == 0)
1102                         return (0);
1103
1104                 blocks = fp->ff_extents[0].blockCount +
1105                          fp->ff_extents[1].blockCount +
1106                          fp->ff_extents[2].blockCount +
1107                          fp->ff_extents[3].blockCount +
1108                          fp->ff_extents[4].blockCount +
1109                          fp->ff_extents[5].blockCount +
1110                          fp->ff_extents[6].blockCount +
1111                          fp->ff_extents[7].blockCount;
1112         } else {
1113                 if (fp->ff_extents[2].blockCount == 0)
1114                         return false;
1115
1116                 blocks = fp->ff_extents[0].blockCount +
1117                          fp->ff_extents[1].blockCount +
1118                          fp->ff_extents[2].blockCount;
1119           }
1120
1121         return (fp->ff_blocks > blocks);
1122 }
1123
1124 /*
1125  * Lock the HFS global journal lock
1126  */
1127 int
1128 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1129 {
1130         void *thread = current_thread();
1131
1132         if (hfsmp->hfs_global_lockowner == thread) {
1133                 panic ("hfs_lock_global: locking against myself!");
1134         }
1135
1136     /* HFS_SHARED_LOCK */
1137         if (locktype == HFS_SHARED_LOCK) {
1138                 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1139                 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1140         }
1141     /* HFS_EXCLUSIVE_LOCK */
1142         else {
1143                 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1144                 hfsmp->hfs_global_lockowner = thread;
1145         }
1146
1147         return 0;
1148 }
1149
1150
1151 /*
1152  * Unlock the HFS global journal lock
1153  */
1154 void
1155 hfs_unlock_global (struct hfsmount *hfsmp)
1156 {
1157
1158         void *thread = current_thread();
1159
1160     /* HFS_LOCK_EXCLUSIVE */
1161         if (hfsmp->hfs_global_lockowner == thread) {
1162                 hfsmp->hfs_global_lockowner = NULL;
1163                 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1164         }
1165     /* HFS_LOCK_SHARED */
1166         else {
1167                 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1168         }
1169 }
1170
1171 /*
1172  * Lock the HFS mount lock
1173  *
1174  * Note: this is a mutex, not a rw lock!
1175  */
1176 inline
1177 void hfs_lock_mount (struct hfsmount *hfsmp) {
1178         lck_mtx_lock (&(hfsmp->hfs_mutex));
1179 }
1180
1181 /*
1182  * Unlock the HFS mount lock
1183  *
1184  * Note: this is a mutex, not a rw lock!
1185  */
1186 inline
1187 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1188         lck_mtx_unlock (&(hfsmp->hfs_mutex));
1189 }
1190
1191 /*
1192  * Lock HFS system file(s).
1193  */
1194 int
1195 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1196 {
1197         /*
1198          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1199          */
1200         if (flags & SFL_CATALOG) {
1201 #ifdef HFS_CHECK_LOCK_ORDER
1202                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1203                         panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1204                 }
1205                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1206                         panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1207                 }
1208                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1209                         panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1210                 }
1211 #endif /* HFS_CHECK_LOCK_ORDER */
1212
1213                 if (hfsmp->hfs_catalog_cp) {
1214                         (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1215                         /*
1216                          * When the catalog file has overflow extents then
1217                          * also acquire the extents b-tree lock if its not
1218                          * already requested.
1219                          */
1220                         if (((flags & SFL_EXTENTS) == 0) &&
1221                             (hfsmp->hfs_catalog_vp != NULL) &&
1222                             (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1223                                 flags |= SFL_EXTENTS;
1224                         }
1225                 } else {
1226                         flags &= ~SFL_CATALOG;
1227                 }
1228         }
1229
1230         if (flags & SFL_ATTRIBUTE) {
1231 #ifdef HFS_CHECK_LOCK_ORDER
1232                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1233                         panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1234                 }
1235                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1236                         panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1237                 }
1238 #endif /* HFS_CHECK_LOCK_ORDER */
1239
1240                 if (hfsmp->hfs_attribute_cp) {
1241                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1242                         /*
1243                          * When the attribute file has overflow extents then
1244                          * also acquire the extents b-tree lock if its not
1245                          * already requested.
1246                          */
1247                         if (((flags & SFL_EXTENTS) == 0) &&
1248                             (hfsmp->hfs_attribute_vp != NULL) &&
1249                             (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1250                                 flags |= SFL_EXTENTS;
1251                         }
1252                 } else {
1253                         flags &= ~SFL_ATTRIBUTE;
1254                 }
1255         }
1256
1257         if (flags & SFL_STARTUP) {
1258 #ifdef HFS_CHECK_LOCK_ORDER
1259                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1260                         panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1261                 }
1262 #endif /* HFS_CHECK_LOCK_ORDER */
1263
1264                 if (hfsmp->hfs_startup_cp) {
1265                         (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1266                         /*
1267                          * When the startup file has overflow extents then
1268                          * also acquire the extents b-tree lock if its not
1269                          * already requested.
1270                          */
1271                         if (((flags & SFL_EXTENTS) == 0) &&
1272                             (hfsmp->hfs_startup_vp != NULL) &&
1273                             (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1274                                 flags |= SFL_EXTENTS;
1275                         }
1276                 } else {
1277                         flags &= ~SFL_STARTUP;
1278                 }
1279         }
1280
1281         /*
1282          * To prevent locks being taken in the wrong order, the extent lock
1283          * gets a bitmap lock as well.
1284          */
1285         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1286                 if (hfsmp->hfs_allocation_cp) {
1287                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1288                         /*
1289                          * The bitmap lock is also grabbed when only extent lock
1290                          * was requested. Set the bitmap lock bit in the lock
1291                          * flags which callers will use during unlock.
1292                          */
1293                         flags |= SFL_BITMAP;
1294                 } else {
1295                         flags &= ~SFL_BITMAP;
1296                 }
1297         }
1298
1299         if (flags & SFL_EXTENTS) {
1300                 /*
1301                  * Since the extents btree lock is recursive we always
1302                  * need exclusive access.
1303                  */
1304                 if (hfsmp->hfs_extents_cp) {
1305                         (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1306                 } else {
1307                         flags &= ~SFL_EXTENTS;
1308                 }
1309         }
1310
1311         return (flags);
1312 }
1313
1314 /*
1315  * unlock HFS system file(s).
1316  */
1317 void
1318 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1319 {
1320         struct timeval tv;
1321         u_int32_t lastfsync;
1322         int numOfLockedBuffs;
1323
1324         if (hfsmp->jnl == NULL) {
1325                 microuptime(&tv);
1326                 lastfsync = tv.tv_sec;
1327         }
1328         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1329                 hfs_unlock(hfsmp->hfs_startup_cp);
1330         }
1331         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1332                 if (hfsmp->jnl == NULL) {
1333                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1334                         numOfLockedBuffs = count_lock_queue();
1335                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1336                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1337                               kMaxSecsForFsync))) {
1338                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1339                         }
1340                 }
1341                 hfs_unlock(hfsmp->hfs_attribute_cp);
1342         }
1343         if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1344                 if (hfsmp->jnl == NULL) {
1345                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1346                         numOfLockedBuffs = count_lock_queue();
1347                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1348                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1349                               kMaxSecsForFsync))) {
1350                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1351                         }
1352                 }
1353                 hfs_unlock(hfsmp->hfs_catalog_cp);
1354         }
1355         if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1356                 hfs_unlock(hfsmp->hfs_allocation_cp);
1357         }
1358         if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1359                 if (hfsmp->jnl == NULL) {
1360                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1361                         numOfLockedBuffs = count_lock_queue();
1362                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1363                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1364                               kMaxSecsForFsync))) {
1365                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1366                         }
1367                 }
1368                 hfs_unlock(hfsmp->hfs_extents_cp);
1369         }
1370 }
1371
1372
1373 /*
1374  * RequireFileLock
1375  *
1376  * Check to see if a vnode is locked in the current context
1377  * This is to be used for debugging purposes only!!
1378  */
1379 #if HFS_DIAGNOSTIC
1380 void RequireFileLock(FileReference vp, int shareable)
1381 {
1382         int locked;
1383
1384         /* The extents btree and allocation bitmap are always exclusive. */
1385         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1386             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1387                 shareable = 0;
1388         }
1389
1390         locked = VTOC(vp)->c_lockowner == (void *)current_thread();
1391
1392         if (!locked && !shareable) {
1393                 switch (VTOC(vp)->c_fileid) {
1394                 case kHFSExtentsFileID:
1395                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1396                         break;
1397                 case kHFSCatalogFileID:
1398                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1399                         break;
1400                 case kHFSAllocationFileID:
1401                         /* The allocation file can hide behind the jornal lock. */
1402                         if (VTOHFS(vp)->jnl == NULL)
1403                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1404                         break;
1405                 case kHFSStartupFileID:
1406                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1407                 case kHFSAttributesFileID:
1408                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1409                         break;
1410                 }
1411         }
1412 }
1413 #endif
1414
1415
1416 /*
1417  * There are three ways to qualify for ownership rights on an object:
1418  *
1419  * 1. (a) Your UID matches the cnode's UID.
1420  *    (b) The object in question is owned by "unknown"
1421  * 2. (a) Permissions on the filesystem are being ignored and
1422  *        your UID matches the replacement UID.
1423  *    (b) Permissions on the filesystem are being ignored and
1424  *        the replacement UID is "unknown".
1425  * 3. You are root.
1426  *
1427  */
1428 int
1429 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1430                 __unused struct proc *p, int invokesuperuserstatus)
1431 {
1432         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1433             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1434             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1435               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1436                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1437             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1438                 return (0);
1439         } else {
1440                 return (EPERM);
1441         }
1442 }
1443
1444
1445 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1446                                u_int32_t blockSizeLimit,
1447                                u_int32_t baseMultiple) {
1448     /*
1449        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1450        specified limit but still an even multiple of the baseMultiple.
1451      */
1452     int baseBlockCount, blockCount;
1453     u_int32_t trialBlockSize;
1454
1455     if (allocationBlockSize % baseMultiple != 0) {
1456         /*
1457            Whoops: the allocation blocks aren't even multiples of the specified base:
1458            no amount of dividing them into even parts will be a multiple, either then!
1459         */
1460         return 512;             /* Hope for the best */
1461     };
1462
1463     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1464        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1465        Even though the former (the result of the loop below) is the larger allocation
1466        block size, the latter is more efficient: */
1467     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1468
1469     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1470     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1471
1472     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1473         trialBlockSize = blockCount * baseMultiple;
1474         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1475             if ((trialBlockSize <= blockSizeLimit) &&
1476                 (trialBlockSize % baseMultiple == 0)) {
1477                 return trialBlockSize;
1478             };
1479         };
1480     };
1481
1482     /* Note: we should never get here, since blockCount = 1 should always work,
1483        but this is nice and safe and makes the compiler happy, too ... */
1484     return 512;
1485 }
1486
1487
1488 u_int32_t
1489 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1490                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1491 {
1492         struct hfsmount * hfsmp;
1493         struct cat_desc jdesc;
1494         int lockflags;
1495         int error;
1496
1497         if (vcb->vcbSigWord != kHFSPlusSigWord)
1498                 return (0);
1499
1500         hfsmp = VCBTOHFS(vcb);
1501
1502         memset(&jdesc, 0, sizeof(struct cat_desc));
1503         jdesc.cd_parentcnid = kRootDirID;
1504         jdesc.cd_nameptr = (const u_int8_t *)name;
1505         jdesc.cd_namelen = strlen(name);
1506
1507         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1508         error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1509         hfs_systemfile_unlock(hfsmp, lockflags);
1510
1511         if (error == 0) {
1512                 return (fattr->ca_fileid);
1513         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1514                 return (0);
1515         }
1516
1517         return (0);     /* XXX what callers expect on an error */
1518 }
1519
1520
1521 /*
1522  * On HFS Plus Volumes, there can be orphaned files or directories
1523  * These are files or directories that were unlinked while busy.
1524  * If the volume was not cleanly unmounted then some of these may
1525  * have persisted and need to be removed.
1526  */
1527 void
1528 hfs_remove_orphans(struct hfsmount * hfsmp)
1529 {
1530         struct BTreeIterator * iterator = NULL;
1531         struct FSBufferDescriptor btdata;
1532         struct HFSPlusCatalogFile filerec;
1533         struct HFSPlusCatalogKey * keyp;
1534         struct proc *p = current_proc();
1535         FCB *fcb;
1536         ExtendedVCB *vcb;
1537         char filename[32];
1538         char tempname[32];
1539         size_t namelen;
1540         cat_cookie_t cookie;
1541         int catlock = 0;
1542         int catreserve = 0;
1543         int started_tr = 0;
1544         int lockflags;
1545         int result;
1546         int orphaned_files = 0;
1547         int orphaned_dirs = 0;
1548
1549         bzero(&cookie, sizeof(cookie));
1550
1551         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1552                 return;
1553
1554         vcb = HFSTOVCB(hfsmp);
1555         fcb = VTOF(hfsmp->hfs_catalog_vp);
1556
1557         btdata.bufferAddress = &filerec;
1558         btdata.itemSize = sizeof(filerec);
1559         btdata.itemCount = 1;
1560
1561         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1562         bzero(iterator, sizeof(*iterator));
1563
1564         /* Build a key to "temp" */
1565         keyp = (HFSPlusCatalogKey*)&iterator->key;
1566         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1567         keyp->nodeName.length = 4;  /* "temp" */
1568         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1569         keyp->nodeName.unicode[0] = 't';
1570         keyp->nodeName.unicode[1] = 'e';
1571         keyp->nodeName.unicode[2] = 'm';
1572         keyp->nodeName.unicode[3] = 'p';
1573
1574         /*
1575          * Position the iterator just before the first real temp file/dir.
1576          */
1577         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1578         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1579         hfs_systemfile_unlock(hfsmp, lockflags);
1580
1581         /* Visit all the temp files/dirs in the HFS+ private directory. */
1582         for (;;) {
1583                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1584                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1585                 hfs_systemfile_unlock(hfsmp, lockflags);
1586                 if (result)
1587                         break;
1588                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1589                         break;
1590
1591                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1592                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1593
1594                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1595                                 HFS_DELETE_PREFIX, filerec.fileID);
1596
1597                 /*
1598                  * Delete all files (and directories) named "tempxxx",
1599                  * where xxx is the file's cnid in decimal.
1600                  *
1601                  */
1602                 if (bcmp(tempname, filename, namelen) == 0) {
1603                         struct filefork dfork;
1604                 struct filefork rfork;
1605                         struct cnode cnode;
1606                         int mode = 0;
1607
1608                         bzero(&dfork, sizeof(dfork));
1609                         bzero(&rfork, sizeof(rfork));
1610                         bzero(&cnode, sizeof(cnode));
1611
1612                         /* Delete any attributes, ignore errors */
1613                         (void) hfs_removeallattr(hfsmp, filerec.fileID);
1614
1615                         if (hfs_start_transaction(hfsmp) != 0) {
1616                             printf("hfs_remove_orphans: failed to start transaction\n");
1617                             goto exit;
1618                         }
1619                         started_tr = 1;
1620
1621                         /*
1622                          * Reserve some space in the Catalog file.
1623                          */
1624                         if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1625                             printf("hfs_remove_orphans: cat_preflight failed\n");
1626                                 goto exit;
1627                         }
1628                         catreserve = 1;
1629
1630                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1631                         catlock = 1;
1632
1633                         /* Build a fake cnode */
1634                         cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1635                                         &dfork.ff_data, &rfork.ff_data);
1636                         cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1637                         cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1638                         cnode.c_desc.cd_namelen = namelen;
1639                         cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1640                         cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1641
1642                         /* Position iterator at previous entry */
1643                         if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1644                             NULL, NULL) != 0) {
1645                                 break;
1646                         }
1647
1648                         /* Truncate the file to zero (both forks) */
1649                         if (dfork.ff_blocks > 0) {
1650                                 u_int64_t fsize;
1651
1652                                 dfork.ff_cp = &cnode;
1653                                 cnode.c_datafork = &dfork;
1654                                 cnode.c_rsrcfork = NULL;
1655                                 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1656                                 while (fsize > 0) {
1657                                     if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
1658                                                 fsize -= HFS_BIGFILE_SIZE;
1659                                         } else {
1660                                                 fsize = 0;
1661                                         }
1662
1663                                         if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1664                                                                           cnode.c_attr.ca_fileid, false) != 0) {
1665                                                 printf("hfs: error truncating data fork!\n");
1666                                                 break;
1667                                         }
1668
1669                                         //
1670                                         // if we're iteratively truncating this file down,
1671                                         // then end the transaction and start a new one so
1672                                         // that no one transaction gets too big.
1673                                         //
1674                                         if (fsize > 0 && started_tr) {
1675                                                 /* Drop system file locks before starting
1676                                                  * another transaction to preserve lock order.
1677                                                  */
1678                                                 hfs_systemfile_unlock(hfsmp, lockflags);
1679                                                 catlock = 0;
1680                                                 hfs_end_transaction(hfsmp);
1681
1682                                                 if (hfs_start_transaction(hfsmp) != 0) {
1683                                                         started_tr = 0;
1684                                                         break;
1685                                                 }
1686                                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1687                                                 catlock = 1;
1688                                         }
1689                                 }
1690                         }
1691
1692                         if (rfork.ff_blocks > 0) {
1693                                 rfork.ff_cp = &cnode;
1694                                 cnode.c_datafork = NULL;
1695                                 cnode.c_rsrcfork = &rfork;
1696                                 if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1697                                         printf("hfs: error truncating rsrc fork!\n");
1698                                         break;
1699                                 }
1700                         }
1701
1702                         /* Remove the file or folder record from the Catalog */
1703                         if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1704                                 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1705                                 hfs_systemfile_unlock(hfsmp, lockflags);
1706                                 catlock = 0;
1707                                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1708                                 break;
1709                         }
1710
1711                         mode = cnode.c_attr.ca_mode & S_IFMT;
1712
1713                         if (mode == S_IFDIR) {
1714                                 orphaned_dirs++;
1715                         }
1716                         else {
1717                                 orphaned_files++;
1718                         }
1719
1720                         /* Update parent and volume counts */
1721                         hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1722                         if (mode == S_IFDIR) {
1723                                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1724                         }
1725
1726                         (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1727                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1728
1729                         /* Drop locks and end the transaction */
1730                         hfs_systemfile_unlock(hfsmp, lockflags);
1731                         cat_postflight(hfsmp, &cookie, p);
1732                         catlock = catreserve = 0;
1733
1734                         /*
1735                            Now that Catalog is unlocked, update the volume info, making
1736                            sure to differentiate between files and directories
1737                         */
1738                         if (mode == S_IFDIR) {
1739                                 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1740                         }
1741                         else{
1742                                 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1743                         }
1744
1745                         if (started_tr) {
1746                                 hfs_end_transaction(hfsmp);
1747                                 started_tr = 0;
1748                         }
1749
1750                 } /* end if */
1751         } /* end for */
1752         if (orphaned_files > 0 || orphaned_dirs > 0)
1753                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1754 exit:
1755         if (catlock) {
1756                 hfs_systemfile_unlock(hfsmp, lockflags);
1757         }
1758         if (catreserve) {
1759                 cat_postflight(hfsmp, &cookie, p);
1760         }
1761         if (started_tr) {
1762                 hfs_end_transaction(hfsmp);
1763         }
1764
1765         FREE(iterator, M_TEMP);
1766         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1767 }
1768
1769
1770 /*
1771  * This will return the correct logical block size for a given vnode.
1772  * For most files, it is the allocation block size, for meta data like
1773  * BTrees, this is kept as part of the BTree private nodeSize
1774  */
1775 u_int32_t
1776 GetLogicalBlockSize(struct vnode *vp)
1777 {
1778 u_int32_t logBlockSize;
1779
1780         DBG_ASSERT(vp != NULL);
1781
1782         /* start with default */
1783         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1784
1785         if (vnode_issystem(vp)) {
1786                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1787                         BTreeInfoRec                    bTreeInfo;
1788
1789                         /*
1790                          * We do not lock the BTrees, because if we are getting block..then the tree
1791                          * should be locked in the first place.
1792                          * We just want the nodeSize wich will NEVER change..so even if the world
1793                          * is changing..the nodeSize should remain the same. Which argues why lock
1794                          * it in the first place??
1795                          */
1796
1797                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1798
1799                         logBlockSize = bTreeInfo.nodeSize;
1800
1801                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1802                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1803                 }
1804         }
1805
1806         DBG_ASSERT(logBlockSize > 0);
1807
1808         return logBlockSize;
1809 }
1810
1811 u_int32_t
1812 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
1813 {
1814         u_int32_t freeblks;
1815         u_int32_t rsrvblks;
1816         u_int32_t loanblks;
1817
1818         /*
1819          * We don't bother taking the mount lock
1820          * to look at these values since the values
1821          * themselves are each updated atomically
1822          * on aligned addresses.
1823          */
1824         freeblks = hfsmp->freeBlocks;
1825         rsrvblks = hfsmp->reserveBlocks;
1826         loanblks = hfsmp->loanedBlocks;
1827         if (wantreserve) {
1828                 if (freeblks > rsrvblks)
1829                         freeblks -= rsrvblks;
1830                 else
1831                         freeblks = 0;
1832         }
1833         if (freeblks > loanblks)
1834                 freeblks -= loanblks;
1835         else
1836                 freeblks = 0;
1837
1838 #if HFS_SPARSE_DEV
1839         /*
1840          * When the underlying device is sparse, check the
1841          * available space on the backing store volume.
1842          */
1843         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1844                 struct vfsstatfs *vfsp;  /* 272 bytes */
1845                 u_int64_t vfreeblks;
1846                 u_int32_t loanedblks;
1847                 struct mount * backingfs_mp;
1848                 struct timeval now;
1849
1850                 backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
1851
1852                 microtime(&now);
1853                 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1854                     vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1855                     hfsmp->hfs_last_backingstatfs = now.tv_sec;
1856                 }
1857
1858                 if ((vfsp = vfs_statfs(backingfs_mp))) {
1859                         hfs_lock_mount (hfsmp);
1860                         vfreeblks = vfsp->f_bavail;
1861                         /* Normalize block count if needed. */
1862                         if (vfsp->f_bsize != hfsmp->blockSize) {
1863                                 vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
1864                         }
1865                         if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
1866                                 vfreeblks -= hfsmp->hfs_sparsebandblks;
1867                         else
1868                                 vfreeblks = 0;
1869
1870                         /* Take into account any delayed allocations. */
1871                         loanedblks = 2 * hfsmp->loanedBlocks;
1872                         if (vfreeblks > loanedblks)
1873                                 vfreeblks -= loanedblks;
1874                         else
1875                                 vfreeblks = 0;
1876
1877                         if (hfsmp->hfs_backingfs_maxblocks) {
1878                                 vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
1879                         }
1880                         freeblks = MIN(vfreeblks, freeblks);
1881                         hfs_unlock_mount (hfsmp);
1882                 }
1883         }
1884 #endif /* HFS_SPARSE_DEV */
1885         if (hfsmp->hfs_flags & HFS_CS) {
1886                 uint64_t cs_free_bytes;
1887                 uint64_t cs_free_blks;
1888                 if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
1889                     (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
1890                         cs_free_blks = cs_free_bytes / hfsmp->blockSize;
1891                         if (cs_free_blks > loanblks)
1892                                 cs_free_blks -= loanblks;
1893                         else
1894                                 cs_free_blks = 0;
1895                         freeblks = MIN(cs_free_blks, freeblks);
1896                 }
1897         }
1898
1899         return (freeblks);
1900 }
1901
1902 /*
1903  * Map HFS Common errors (negative) to BSD error codes (positive).
1904  * Positive errors (ie BSD errors) are passed through unchanged.
1905  */
1906 short MacToVFSError(OSErr err)
1907 {
1908         if (err >= 0)
1909                 return err;
1910
1911         /* BSD/VFS internal errnos */
1912         switch (err) {
1913                 case ERESERVEDNAME: /* -8 */
1914                         return err;
1915         }
1916
1917         switch (err) {
1918         case dskFulErr:                 /*    -34 */
1919         case btNoSpaceAvail:            /* -32733 */
1920                 return ENOSPC;
1921         case fxOvFlErr:                 /* -32750 */
1922                 return EOVERFLOW;
1923
1924         case btBadNode:                 /* -32731 */
1925                 return EIO;
1926
1927         case memFullErr:                /*  -108 */
1928                 return ENOMEM;          /*   +12 */
1929
1930         case cmExists:                  /* -32718 */
1931         case btExists:                  /* -32734 */
1932                 return EEXIST;          /*    +17 */
1933
1934         case cmNotFound:                /* -32719 */
1935         case btNotFound:                /* -32735 */
1936                 return ENOENT;          /*     28 */
1937
1938         case cmNotEmpty:                /* -32717 */
1939                 return ENOTEMPTY;       /*     66 */
1940
1941         case cmFThdDirErr:              /* -32714 */
1942                 return EISDIR;          /*     21 */
1943
1944         case fxRangeErr:                /* -32751 */
1945                 return ERANGE;
1946
1947         case bdNamErr:                  /*   -37 */
1948                 return ENAMETOOLONG;    /*    63 */
1949
1950         case paramErr:                  /*   -50 */
1951         case fileBoundsErr:             /* -1309 */
1952                 return EINVAL;          /*   +22 */
1953
1954         case fsBTBadNodeSize:
1955                 return ENXIO;
1956
1957         default:
1958                 return EIO;             /*   +5 */
1959         }
1960 }
1961
1962
1963 /*
1964  * Find the current thread's directory hint for a given index.
1965  *
1966  * Requires an exclusive lock on directory cnode.
1967  *
1968  * Use detach if the cnode lock must be dropped while the hint is still active.
1969  */
1970 __private_extern__
1971 directoryhint_t *
1972 hfs_getdirhint(struct cnode *dcp, int index, int detach)
1973 {
1974         struct timeval tv;
1975         directoryhint_t *hint;
1976         boolean_t need_remove, need_init;
1977         const u_int8_t * name;
1978
1979         microuptime(&tv);
1980
1981         /*
1982          *  Look for an existing hint first.  If not found, create a new one (when
1983          *  the list is not full) or recycle the oldest hint.  Since new hints are
1984          *  always added to the head of the list, the last hint is always the
1985          *  oldest.
1986          */
1987         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1988                 if (hint->dh_index == index)
1989                         break;
1990         }
1991         if (hint != NULL) { /* found an existing hint */
1992                 need_init = false;
1993                 need_remove = true;
1994         } else { /* cannot find an existing hint */
1995                 need_init = true;
1996                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
1997                         /* Create a default directory hint */
1998                         MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
1999                         ++dcp->c_dirhintcnt;
2000                         need_remove = false;
2001                 } else {                                /* recycle the last (i.e., the oldest) hint */
2002                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2003                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2004                             (name = hint->dh_desc.cd_nameptr)) {
2005                                 hint->dh_desc.cd_nameptr = NULL;
2006                                 hint->dh_desc.cd_namelen = 0;
2007                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2008                                 vfs_removename((const char *)name);
2009                         }
2010                         need_remove = true;
2011                 }
2012         }
2013
2014         if (need_remove)
2015                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2016
2017         if (detach)
2018                 --dcp->c_dirhintcnt;
2019         else
2020                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2021
2022         if (need_init) {
2023                 hint->dh_index = index;
2024                 hint->dh_desc.cd_flags = 0;
2025                 hint->dh_desc.cd_encoding = 0;
2026                 hint->dh_desc.cd_namelen = 0;
2027                 hint->dh_desc.cd_nameptr = NULL;
2028                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2029                 hint->dh_desc.cd_hint = dcp->c_childhint;
2030                 hint->dh_desc.cd_cnid = 0;
2031         }
2032         hint->dh_time = tv.tv_sec;
2033         return (hint);
2034 }
2035
2036 /*
2037  * Release a single directory hint.
2038  *
2039  * Requires an exclusive lock on directory cnode.
2040  */
2041 __private_extern__
2042 void
2043 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2044 {
2045         const u_int8_t * name;
2046         directoryhint_t *hint;
2047
2048         /* Check if item is on list (could be detached) */
2049         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2050                 if (hint == relhint) {
2051                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2052                         --dcp->c_dirhintcnt;
2053                         break;
2054                 }
2055         }
2056         name = relhint->dh_desc.cd_nameptr;
2057         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2058                 relhint->dh_desc.cd_nameptr = NULL;
2059                 relhint->dh_desc.cd_namelen = 0;
2060                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2061                 vfs_removename((const char *)name);
2062         }
2063         FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2064 }
2065
2066 /*
2067  * Release directory hints for given directory
2068  *
2069  * Requires an exclusive lock on directory cnode.
2070  */
2071 __private_extern__
2072 void
2073 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2074 {
2075         struct timeval tv;
2076         directoryhint_t *hint, *prev;
2077         const u_int8_t * name;
2078
2079         if (stale_hints_only)
2080                 microuptime(&tv);
2081
2082         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2083         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2084                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2085                         break;  /* stop here if this entry is too new */
2086                 name = hint->dh_desc.cd_nameptr;
2087                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2088                         hint->dh_desc.cd_nameptr = NULL;
2089                         hint->dh_desc.cd_namelen = 0;
2090                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
2091                         vfs_removename((const char *)name);
2092                 }
2093                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2094                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2095                 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2096                 --dcp->c_dirhintcnt;
2097         }
2098 }
2099
2100 /*
2101  * Insert a detached directory hint back into the list of dirhints.
2102  *
2103  * Requires an exclusive lock on directory cnode.
2104  */
2105 __private_extern__
2106 void
2107 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2108 {
2109         directoryhint_t *test;
2110
2111         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2112                 if (test == hint)
2113                         panic("hfs_insertdirhint: hint %p already on list!", hint);
2114         }
2115
2116         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2117         ++dcp->c_dirhintcnt;
2118 }
2119
2120 /*
2121  * Perform a case-insensitive compare of two UTF-8 filenames.
2122  *
2123  * Returns 0 if the strings match.
2124  */
2125 __private_extern__
2126 int
2127 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2128 {
2129         u_int16_t *ustr1, *ustr2;
2130         size_t ulen1, ulen2;
2131         size_t maxbytes;
2132         int cmp = -1;
2133
2134         if (len1 != len2)
2135                 return (cmp);
2136
2137         maxbytes = kHFSPlusMaxFileNameChars << 1;
2138         MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2139         ustr2 = ustr1 + (maxbytes >> 1);
2140
2141         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2142                 goto out;
2143         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2144                 goto out;
2145
2146         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2147 out:
2148         FREE(ustr1, M_TEMP);
2149         return (cmp);
2150 }
2151
2152
2153 typedef struct jopen_cb_info {
2154         off_t   jsize;
2155         char   *desired_uuid;
2156         struct  vnode *jvp;
2157         size_t  blksize;
2158         int     need_clean;
2159         int     need_init;
2160 } jopen_cb_info;
2161
2162 static int
2163 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2164 {
2165         struct nameidata nd;
2166         jopen_cb_info *ji = (jopen_cb_info *)arg;
2167         char bsd_name[256];
2168         int error;
2169
2170         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2171         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2172
2173         if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2174                 return 1;   // keep iterating
2175         }
2176
2177         // if we're here, either the desired uuid matched or there was no
2178         // desired uuid so let's try to open the device for writing and
2179         // see if it works.  if it does, we'll use it.
2180
2181         NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2182         if ((error = namei(&nd))) {
2183                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2184                 return 1;   // keep iterating
2185         }
2186
2187         ji->jvp = nd.ni_vp;
2188         nameidone(&nd);
2189
2190         if (ji->jvp == NULL) {
2191                 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2192         } else {
2193                 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2194                 if (error == 0) {
2195                         // if the journal is dirty and we didn't specify a desired
2196                         // journal device uuid, then do not use the journal.  but
2197                         // if the journal is just invalid (e.g. it hasn't been
2198                         // initialized) then just set the need_init flag.
2199                         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2200                                 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2201                                 if (error == EBUSY) {
2202                                         VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2203                                         vnode_put(ji->jvp);
2204                                         ji->jvp = NULL;
2205                                         return 1;    // keep iterating
2206                                 } else if (error == EINVAL) {
2207                                         ji->need_init = 1;
2208                                 }
2209                         }
2210
2211                         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2212                                 strlcpy(ji->desired_uuid, uuid_str, 128);
2213                         }
2214                         vnode_setmountedon(ji->jvp);
2215                         return 0;   // stop iterating
2216                 } else {
2217                         vnode_put(ji->jvp);
2218                         ji->jvp = NULL;
2219                 }
2220         }
2221
2222         return 1;   // keep iterating
2223 }
2224
2225 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2226 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2227
2228
2229 static vnode_t
2230 open_journal_dev(const char *vol_device,
2231                  int need_clean,
2232                  char *uuid_str,
2233                  char *machine_serial_num,
2234                  off_t jsize,
2235                  size_t blksize,
2236                  int *need_init)
2237 {
2238     int retry_counter=0;
2239     jopen_cb_info ji;
2240
2241     ji.jsize        = jsize;
2242     ji.desired_uuid = uuid_str;
2243     ji.jvp          = NULL;
2244     ji.blksize      = blksize;
2245     ji.need_clean   = need_clean;
2246     ji.need_init    = 0;
2247
2248 //    if (uuid_str[0] == '\0') {
2249 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2250 //    } else {
2251 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2252 //    }
2253     while (ji.jvp == NULL && retry_counter++ < 4) {
2254             if (retry_counter > 1) {
2255                     if (uuid_str[0]) {
2256                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
2257                     } else {
2258                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
2259                     }
2260                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
2261             }
2262
2263             IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2264     }
2265
2266     if (ji.jvp == NULL) {
2267             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2268                    vol_device, uuid_str, machine_serial_num);
2269     }
2270
2271     *need_init = ji.need_init;
2272
2273     return ji.jvp;
2274 }
2275
2276
2277 int
2278 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2279                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2280                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2281 {
2282         JournalInfoBlock *jibp;
2283         struct buf       *jinfo_bp, *bp;
2284         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2285         int               retval, write_jibp = 0;
2286         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
2287         struct vnode     *devvp;
2288         struct hfs_mount_args *args = _args;
2289         u_int32_t         jib_flags;
2290         u_int64_t         jib_offset;
2291         u_int64_t         jib_size;
2292         const char *dev_name;
2293
2294         devvp = hfsmp->hfs_devvp;
2295         dev_name = vnode_getname_printable(devvp);
2296
2297         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2298                 arg_flags  = args->journal_flags;
2299                 arg_tbufsz = args->journal_tbuffer_size;
2300         }
2301
2302         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2303
2304         jinfo_bp = NULL;
2305         retval = (int)buf_meta_bread(devvp,
2306                                                 (daddr64_t)((embeddedOffset/blksize) +
2307                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2308                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2309         if (retval) {
2310                 if (jinfo_bp) {
2311                         buf_brelse(jinfo_bp);
2312                 }
2313                 goto cleanup_dev_name;
2314         }
2315
2316         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2317         jib_flags  = SWAP_BE32(jibp->flags);
2318         jib_size   = SWAP_BE64(jibp->size);
2319
2320         if (jib_flags & kJIJournalInFSMask) {
2321                 hfsmp->jvp = hfsmp->hfs_devvp;
2322                 jib_offset = SWAP_BE64(jibp->offset);
2323         } else {
2324             int need_init=0;
2325
2326             // if the volume was unmounted cleanly then we'll pick any
2327             // available external journal partition
2328             //
2329             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2330                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2331             }
2332
2333             hfsmp->jvp = open_journal_dev(dev_name,
2334                                           !(jib_flags & kJIJournalNeedInitMask),
2335                                           (char *)&jibp->ext_jnl_uuid[0],
2336                                           (char *)&jibp->machine_serial_num[0],
2337                                           jib_size,
2338                                           hfsmp->hfs_logical_block_size,
2339                                           &need_init);
2340             if (hfsmp->jvp == NULL) {
2341                     buf_brelse(jinfo_bp);
2342                     retval = EROFS;
2343                     goto cleanup_dev_name;
2344             } else {
2345                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2346                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2347                     }
2348             }
2349
2350             jib_offset = 0;
2351             write_jibp = 1;
2352             if (need_init) {
2353                     jib_flags |= kJIJournalNeedInitMask;
2354             }
2355         }
2356
2357         // save this off for the hack-y check in hfs_remove()
2358         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2359         hfsmp->jnl_size  = jib_size;
2360
2361         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2362             // if the file system is read-only, check if the journal is empty.
2363             // if it is, then we can allow the mount.  otherwise we have to
2364             // return failure.
2365             retval = journal_is_clean(hfsmp->jvp,
2366                                       jib_offset + embeddedOffset,
2367                                       jib_size,
2368                                       devvp,
2369                                       hfsmp->hfs_logical_block_size);
2370
2371             hfsmp->jnl = NULL;
2372
2373             buf_brelse(jinfo_bp);
2374
2375             if (retval) {
2376                     const char *name = vnode_getname_printable(devvp);
2377                     printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2378                     name);
2379                     vnode_putname_printable(name);
2380             }
2381
2382             goto cleanup_dev_name;
2383         }
2384
2385         if (jib_flags & kJIJournalNeedInitMask) {
2386                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2387                            jib_offset + embeddedOffset, jib_size);
2388                 hfsmp->jnl = journal_create(hfsmp->jvp,
2389                                                                         jib_offset + embeddedOffset,
2390                                                                         jib_size,
2391                                                                         devvp,
2392                                                                         blksize,
2393                                                                         arg_flags,
2394                                                                         arg_tbufsz,
2395                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2396                                                                         hfsmp->hfs_mp);
2397                 if (hfsmp->jnl)
2398                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2399
2400                 // no need to start a transaction here... if this were to fail
2401                 // we'd just re-init it on the next mount.
2402                 jib_flags &= ~kJIJournalNeedInitMask;
2403                 jibp->flags  = SWAP_BE32(jib_flags);
2404                 buf_bwrite(jinfo_bp);
2405                 jinfo_bp = NULL;
2406                 jibp     = NULL;
2407         } else {
2408                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2409                 //         jib_offset + embeddedOffset,
2410                 //         jib_size, SWAP_BE32(vhp->blockSize));
2411
2412                 hfsmp->jnl = journal_open(hfsmp->jvp,
2413                                                                   jib_offset + embeddedOffset,
2414                                                                   jib_size,
2415                                                                   devvp,
2416                                                                   blksize,
2417                                                                   arg_flags,
2418                                                                   arg_tbufsz,
2419                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2420                                                                   hfsmp->hfs_mp);
2421                 if (hfsmp->jnl)
2422                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2423
2424                 if (write_jibp) {
2425                         buf_bwrite(jinfo_bp);
2426                 } else {
2427                         buf_brelse(jinfo_bp);
2428                 }
2429                 jinfo_bp = NULL;
2430                 jibp     = NULL;
2431
2432                 if (hfsmp->jnl && mdbp) {
2433                         // reload the mdb because it could have changed
2434                         // if the journal had to be replayed.
2435                         if (mdb_offset == 0) {
2436                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2437                         }
2438                         bp = NULL;
2439                         retval = (int)buf_meta_bread(devvp,
2440                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2441                                         hfsmp->hfs_physical_block_size, cred, &bp);
2442                         if (retval) {
2443                                 if (bp) {
2444                                         buf_brelse(bp);
2445                                 }
2446                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2447                                            retval);
2448                                 goto cleanup_dev_name;
2449                         }
2450                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2451                         buf_brelse(bp);
2452                         bp = NULL;
2453                 }
2454         }
2455
2456         // if we expected the journal to be there and we couldn't
2457         // create it or open it then we have to bail out.
2458         if (hfsmp->jnl == NULL) {
2459                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2460                 retval = EINVAL;
2461                 goto cleanup_dev_name;
2462         }
2463
2464         retval = 0;
2465
2466 cleanup_dev_name:
2467         vnode_putname_printable(dev_name);
2468         return retval;
2469 }
2470
2471
2472 //
2473 // This function will go and re-locate the .journal_info_block and
2474 // the .journal files in case they moved (which can happen if you
2475 // run Norton SpeedDisk).  If we fail to find either file we just
2476 // disable journaling for this volume and return.  We turn off the
2477 // journaling bit in the vcb and assume it will get written to disk
2478 // later (if it doesn't on the next mount we'd do the same thing
2479 // again which is harmless).  If we disable journaling we don't
2480 // return an error so that the volume is still mountable.
2481 //
2482 // If the info we find for the .journal_info_block and .journal files
2483 // isn't what we had stored, we re-set our cached info and proceed
2484 // with opening the journal normally.
2485 //
2486 static int
2487 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2488 {
2489         JournalInfoBlock *jibp;
2490         struct buf       *jinfo_bp;
2491         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2492         int               retval, write_jibp = 0, recreate_journal = 0;
2493         struct vnode     *devvp;
2494         struct cat_attr   jib_attr, jattr;
2495         struct cat_fork   jib_fork, jfork;
2496         ExtendedVCB      *vcb;
2497         u_int32_t            fid;
2498         struct hfs_mount_args *args = _args;
2499         u_int32_t         jib_flags;
2500         u_int64_t         jib_offset;
2501         u_int64_t         jib_size;
2502
2503         devvp = hfsmp->hfs_devvp;
2504         vcb = HFSTOVCB(hfsmp);
2505
2506         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2507                 if (args->journal_disable) {
2508                         return 0;
2509                 }
2510
2511                 arg_flags  = args->journal_flags;
2512                 arg_tbufsz = args->journal_tbuffer_size;
2513         }
2514
2515         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2516         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2517                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2518                            jib_fork.cf_extents[0].startBlock);
2519                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2520                 return 0;
2521         }
2522         hfsmp->hfs_jnlinfoblkid = fid;
2523
2524         // make sure the journal_info_block begins where we think it should.
2525         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2526                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2527                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2528
2529                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2530                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2531                 recreate_journal = 1;
2532         }
2533
2534
2535         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2536         jinfo_bp = NULL;
2537         retval = (int)buf_meta_bread(devvp,
2538                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2539                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2540                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2541         if (retval) {
2542                 if (jinfo_bp) {
2543                         buf_brelse(jinfo_bp);
2544                 }
2545                 printf("hfs: can't read journal info block. disabling journaling.\n");
2546                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2547                 return 0;
2548         }
2549
2550         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2551         jib_flags  = SWAP_BE32(jibp->flags);
2552         jib_offset = SWAP_BE64(jibp->offset);
2553         jib_size   = SWAP_BE64(jibp->size);
2554
2555         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2556         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2557                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2558                            jfork.cf_extents[0].startBlock);
2559                 buf_brelse(jinfo_bp);
2560                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2561                 return 0;
2562         }
2563         hfsmp->hfs_jnlfileid = fid;
2564
2565         // make sure the journal file begins where we think it should.
2566         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2567                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2568                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2569
2570                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2571                 write_jibp   = 1;
2572                 recreate_journal = 1;
2573         }
2574
2575         // check the size of the journal file.
2576         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2577                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2578                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2579
2580                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2581                 write_jibp = 1;
2582                 recreate_journal = 1;
2583         }
2584
2585         if (jib_flags & kJIJournalInFSMask) {
2586                 hfsmp->jvp = hfsmp->hfs_devvp;
2587                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2588         } else {
2589             const char *dev_name;
2590             int need_init = 0;
2591
2592             dev_name = vnode_getname_printable(devvp);
2593
2594             // since the journal is empty, just use any available external journal
2595             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2596
2597             // this fills in the uuid of the device we actually get
2598             hfsmp->jvp = open_journal_dev(dev_name,
2599                                           !(jib_flags & kJIJournalNeedInitMask),
2600                                           (char *)&jibp->ext_jnl_uuid[0],
2601                                           (char *)&jibp->machine_serial_num[0],
2602                                           jib_size,
2603                                           hfsmp->hfs_logical_block_size,
2604                                           &need_init);
2605             if (hfsmp->jvp == NULL) {
2606                     buf_brelse(jinfo_bp);
2607                     vnode_putname_printable(dev_name);
2608                     return EROFS;
2609             } else {
2610                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2611                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2612                     }
2613             }
2614             jib_offset = 0;
2615             recreate_journal = 1;
2616             write_jibp = 1;
2617             if (need_init) {
2618                     jib_flags |= kJIJournalNeedInitMask;
2619             }
2620             vnode_putname_printable(dev_name);
2621         }
2622
2623         // save this off for the hack-y check in hfs_remove()
2624         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2625         hfsmp->jnl_size  = jib_size;
2626
2627         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2628             // if the file system is read-only, check if the journal is empty.
2629             // if it is, then we can allow the mount.  otherwise we have to
2630             // return failure.
2631             retval = journal_is_clean(hfsmp->jvp,
2632                                       jib_offset,
2633                                       jib_size,
2634                                       devvp,
2635                                       hfsmp->hfs_logical_block_size);
2636
2637             hfsmp->jnl = NULL;
2638
2639             buf_brelse(jinfo_bp);
2640
2641             if (retval) {
2642                     const char *name = vnode_getname_printable(devvp);
2643                     printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2644                     name);
2645                     vnode_putname_printable(name);
2646             }
2647
2648             return retval;
2649         }
2650
2651         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2652                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2653                            jib_offset, jib_size);
2654                 hfsmp->jnl = journal_create(hfsmp->jvp,
2655                                                                         jib_offset,
2656                                                                         jib_size,
2657                                                                         devvp,
2658                                                                         hfsmp->hfs_logical_block_size,
2659                                                                         arg_flags,
2660                                                                         arg_tbufsz,
2661                                                                         hfs_sync_metadata, hfsmp->hfs_mp,
2662                                                                         hfsmp->hfs_mp);
2663                 if (hfsmp->jnl)
2664                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2665
2666                 // no need to start a transaction here... if this were to fail
2667                 // we'd just re-init it on the next mount.
2668                 jib_flags &= ~kJIJournalNeedInitMask;
2669                 write_jibp   = 1;
2670
2671         } else {
2672                 //
2673                 // if we weren't the last person to mount this volume
2674                 // then we need to throw away the journal because it
2675                 // is likely that someone else mucked with the disk.
2676                 // if the journal is empty this is no big deal.  if the
2677                 // disk is dirty this prevents us from replaying the
2678                 // journal over top of changes that someone else made.
2679                 //
2680                 arg_flags |= JOURNAL_RESET;
2681
2682                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2683                 //         jib_offset,
2684                 //         jib_size, SWAP_BE32(vhp->blockSize));
2685
2686                 hfsmp->jnl = journal_open(hfsmp->jvp,
2687                                                                   jib_offset,
2688                                                                   jib_size,
2689                                                                   devvp,
2690                                                                   hfsmp->hfs_logical_block_size,
2691                                                                   arg_flags,
2692                                                                   arg_tbufsz,
2693                                                                   hfs_sync_metadata, hfsmp->hfs_mp,
2694                                                                   hfsmp->hfs_mp);
2695                 if (hfsmp->jnl)
2696                         journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2697         }
2698
2699
2700         if (write_jibp) {
2701                 jibp->flags  = SWAP_BE32(jib_flags);
2702                 jibp->offset = SWAP_BE64(jib_offset);
2703                 jibp->size   = SWAP_BE64(jib_size);
2704
2705                 buf_bwrite(jinfo_bp);
2706         } else {
2707                 buf_brelse(jinfo_bp);
2708         }
2709         jinfo_bp = NULL;
2710         jibp     = NULL;
2711
2712         // if we expected the journal to be there and we couldn't
2713         // create it or open it then we have to bail out.
2714         if (hfsmp->jnl == NULL) {
2715                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2716                 return EINVAL;
2717         }
2718
2719         return 0;
2720 }
2721
2722 /*
2723  * Calculate the allocation zone for metadata.
2724  *
2725  * This zone includes the following:
2726  *      Allocation Bitmap file
2727  *      Overflow Extents file
2728  *      Journal file
2729  *      Quota files
2730  *      Clustered Hot files
2731  *      Catalog file
2732  *
2733  *                          METADATA ALLOCATION ZONE
2734  * ____________________________________________________________________________
2735  * |    |    |     |               |                              |           |
2736  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2737  * |____|____|_____|_______________|______________________________|___________|
2738  *
2739  * <------------------------------- N * 128 MB ------------------------------->
2740  *
2741  */
2742 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
2743
2744 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2745 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2746 #define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2747 #define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2748 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2749 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2750
2751 /* Initialize the metadata zone.
2752  *
2753  * If the size of  the volume is less than the minimum size for
2754  * metadata zone, metadata zone is disabled.
2755  *
2756  * If disable is true, disable metadata zone unconditionally.
2757  */
2758 void
2759 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2760 {
2761         ExtendedVCB  *vcb;
2762         u_int64_t  fs_size;
2763         u_int64_t  zonesize;
2764         u_int64_t  temp;
2765         u_int64_t  filesize;
2766         u_int32_t  blk;
2767         int  items, really_do_it=1;
2768
2769         vcb = HFSTOVCB(hfsmp);
2770         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2771
2772         /*
2773          * For volumes less than 10 GB, don't bother.
2774          */
2775         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2776                 really_do_it = 0;
2777         }
2778
2779         /*
2780          * Skip non-journaled volumes as well.
2781          */
2782         if (hfsmp->jnl == NULL) {
2783                 really_do_it = 0;
2784         }
2785
2786         /* If caller wants to disable metadata zone, do it */
2787         if (disable == true) {
2788                 really_do_it = 0;
2789         }
2790
2791         /*
2792          * Start with space for the boot blocks and Volume Header.
2793          * 1536 = byte offset from start of volume to end of volume header:
2794          * 1024 bytes is the offset from the start of the volume to the
2795          * start of the volume header (defined by the volume format)
2796          * + 512 bytes (the size of the volume header).
2797          */
2798         zonesize = roundup(1536, hfsmp->blockSize);
2799
2800         /*
2801          * Add the on-disk size of allocation bitmap.
2802          */
2803         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2804
2805         /*
2806          * Add space for the Journal Info Block and Journal (if they're in
2807          * this file system).
2808          */
2809         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
2810                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
2811         }
2812
2813         /*
2814          * Add the existing size of the Extents Overflow B-tree.
2815          * (It rarely grows, so don't bother reserving additional room for it.)
2816          */
2817         zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2818
2819         /*
2820          * If there is an Attributes B-tree, leave room for 11 clumps worth.
2821          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
2822          * When installing a full OS install onto a 20GB volume, we use
2823          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2824          * us with another 3 or 4 clumps worth before we need another extent.
2825          */
2826         if (hfsmp->hfs_attribute_cp) {
2827                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
2828         }
2829
2830         /*
2831          * Leave room for 11 clumps of the Catalog B-tree.
2832          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
2833          * When installing a full OS install onto a 20GB volume, we use
2834          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2835          * us with another 3 or 4 clumps worth before we need another extent.
2836          */
2837         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
2838
2839         /*
2840          * Add space for hot file region.
2841          *
2842          * ...for now, use 5 MB per 1 GB (0.5 %)
2843          */
2844         filesize = (fs_size / 1024) * 5;
2845         if (filesize > HOTBAND_MAXIMUM_SIZE)
2846                 filesize = HOTBAND_MAXIMUM_SIZE;
2847         else if (filesize < HOTBAND_MINIMUM_SIZE)
2848                 filesize = HOTBAND_MINIMUM_SIZE;
2849         /*
2850          * Calculate user quota file requirements.
2851          */
2852         if (hfsmp->hfs_flags & HFS_QUOTAS) {
2853                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
2854                 if (items < QF_MIN_USERS)
2855                         items = QF_MIN_USERS;
2856                 else if (items > QF_MAX_USERS)
2857                         items = QF_MAX_USERS;
2858                 if (!powerof2(items)) {
2859                         int x = items;
2860                         items = 4;
2861                         while (x>>1 != 1) {
2862                                 x = x >> 1;
2863                                 items = items << 1;
2864                         }
2865                 }
2866                 filesize += (items + 1) * sizeof(struct dqblk);
2867                 /*
2868                  * Calculate group quota file requirements.
2869                  *
2870                  */
2871                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
2872                 if (items < QF_MIN_GROUPS)
2873                         items = QF_MIN_GROUPS;
2874                 else if (items > QF_MAX_GROUPS)
2875                         items = QF_MAX_GROUPS;
2876                 if (!powerof2(items)) {
2877                         int x = items;
2878                         items = 4;
2879                         while (x>>1 != 1) {
2880                                 x = x >> 1;
2881                                 items = items << 1;
2882                         }
2883                 }
2884                 filesize += (items + 1) * sizeof(struct dqblk);
2885         }
2886         zonesize += filesize;
2887
2888         /*
2889          * Round up entire zone to a bitmap block's worth.
2890          * The extra space goes to the catalog file and hot file area.
2891          */
2892         temp = zonesize;
2893         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
2894         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
2895         /*
2896          * If doing the round up for hfs_min_alloc_start would push us past
2897          * allocLimit, then just reset it back to 0.  Though using a value
2898          * bigger than allocLimit would not cause damage in the block allocator
2899          * code, this value could get stored in the volume header and make it out
2900          * to disk, making the volume header technically corrupt.
2901          */
2902         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
2903                 hfsmp->hfs_min_alloc_start = 0;
2904         }
2905
2906         if (really_do_it == 0) {
2907                 /* If metadata zone needs to be disabled because the
2908                  * volume was truncated, clear the bit and zero out
2909                  * the values that are no longer needed.
2910                  */
2911                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2912                         /* Disable metadata zone */
2913                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
2914
2915                         /* Zero out mount point values that are not required */
2916                         hfsmp->hfs_catalog_maxblks = 0;
2917                         hfsmp->hfs_hotfile_maxblks = 0;
2918                         hfsmp->hfs_hotfile_start = 0;
2919                         hfsmp->hfs_hotfile_end = 0;
2920                         hfsmp->hfs_hotfile_freeblks = 0;
2921                         hfsmp->hfs_metazone_start = 0;
2922                         hfsmp->hfs_metazone_end = 0;
2923                 }
2924
2925                 return;
2926         }
2927
2928         temp = zonesize - temp;  /* temp has extra space */
2929         filesize += temp / 3;
2930         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
2931
2932         hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
2933
2934         /* Convert to allocation blocks. */
2935         blk = zonesize / vcb->blockSize;
2936
2937         /* The default metadata zone location is at the start of volume. */
2938         hfsmp->hfs_metazone_start = 1;
2939         hfsmp->hfs_metazone_end = blk - 1;
2940
2941         /* The default hotfile area is at the end of the zone. */
2942         if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
2943                 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
2944                 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
2945                 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
2946         }
2947         else {
2948                 hfsmp->hfs_hotfile_start = 0;
2949                 hfsmp->hfs_hotfile_end = 0;
2950                 hfsmp->hfs_hotfile_freeblks = 0;
2951         }
2952 #if 0
2953         printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
2954         printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
2955         printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
2956 #endif
2957         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
2958 }
2959
2960
2961 static u_int32_t
2962 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
2963 {
2964         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
2965         int  lockflags;
2966         int  freeblocks;
2967
2968         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2969         freeblocks = MetaZoneFreeBlocks(vcb);
2970         hfs_systemfile_unlock(hfsmp, lockflags);
2971
2972         /* Minus Extents overflow file reserve. */
2973         freeblocks -=
2974                 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
2975         /* Minus catalog file reserve. */
2976         freeblocks -=
2977                 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
2978         if (freeblocks < 0)
2979                 freeblocks = 0;
2980
2981         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
2982 }
2983
2984 /*
2985  * Determine if a file is a "virtual" metadata file.
2986  * This includes journal and quota files.
2987  */
2988 int
2989 hfs_virtualmetafile(struct cnode *cp)
2990 {
2991         const char * filename;
2992
2993
2994         if (cp->c_parentcnid != kHFSRootFolderID)
2995                 return (0);
2996
2997         filename = (const char *)cp->c_desc.cd_nameptr;
2998         if (filename == NULL)
2999                 return (0);
3000
3001         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3002             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3003             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3004             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3005             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3006                 return (1);
3007
3008         return (0);
3009 }
3010
3011 __private_extern__
3012 void hfs_syncer_lock(struct hfsmount *hfsmp)
3013 {
3014     hfs_lock_mount(hfsmp);
3015 }
3016
3017 __private_extern__
3018 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3019 {
3020     hfs_unlock_mount(hfsmp);
3021 }
3022
3023 __private_extern__
3024 void hfs_syncer_wait(struct hfsmount *hfsmp)
3025 {
3026     msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3027            "hfs_syncer_wait", NULL);
3028 }
3029
3030 __private_extern__
3031 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3032 {
3033     wakeup(&hfsmp->hfs_sync_incomplete);
3034 }
3035
3036 __private_extern__
3037 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3038 {
3039     uint64_t deadline;
3040     clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3041     return deadline;
3042 }
3043
3044 __private_extern__
3045 void hfs_syncer_queue(thread_call_t syncer)
3046 {
3047     if (thread_call_enter_delayed_with_leeway(syncer,
3048                                               NULL,
3049                                               hfs_usecs_to_deadline(HFS_META_DELAY),
3050                                               0,
3051                                               THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3052         printf ("hfs: syncer already scheduled!");
3053     }
3054 }
3055
3056 //
3057 // Fire off a timed callback to sync the disk if the
3058 // volume is on ejectable media.
3059 //
3060  __private_extern__
3061 void
3062 hfs_sync_ejectable(struct hfsmount *hfsmp)
3063 {
3064     // If we don't have a syncer or we get called by the syncer, just return
3065     if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3066         return;
3067
3068     hfs_syncer_lock(hfsmp);
3069
3070     if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3071         microuptime(&hfsmp->hfs_sync_req_oldest);
3072
3073     /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3074        don't want to queue again if there is a sync outstanding. */
3075     if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3076         hfs_syncer_unlock(hfsmp);
3077         return;
3078     }
3079
3080     hfsmp->hfs_sync_incomplete = TRUE;
3081
3082     thread_call_t syncer = hfsmp->hfs_syncer;
3083
3084     hfs_syncer_unlock(hfsmp);
3085
3086     hfs_syncer_queue(syncer);
3087 }
3088
3089 int
3090 hfs_start_transaction(struct hfsmount *hfsmp)
3091 {
3092         int ret, unlock_on_err=0;
3093         void * thread = current_thread();
3094
3095 #ifdef HFS_CHECK_LOCK_ORDER
3096         /*
3097          * You cannot start a transaction while holding a system
3098          * file lock. (unless the transaction is nested.)
3099          */
3100         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3101                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3102                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3103                 }
3104                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3105                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3106                 }
3107                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3108                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3109                 }
3110         }
3111 #endif /* HFS_CHECK_LOCK_ORDER */
3112
3113         if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
3114                 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3115                 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3116                 unlock_on_err = 1;
3117         }
3118
3119         /* If a downgrade to read-only mount is in progress, no other
3120          * process than the downgrade process is allowed to modify
3121          * the file system.
3122          */
3123         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3124                         (hfsmp->hfs_downgrading_proc != thread)) {
3125                 ret = EROFS;
3126                 goto out;
3127         }
3128
3129         if (hfsmp->jnl) {
3130                 ret = journal_start_transaction(hfsmp->jnl);
3131                 if (ret == 0) {
3132                         OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
3133                 }
3134         } else {
3135                 ret = 0;
3136         }
3137
3138 out:
3139         if (ret != 0 && unlock_on_err) {
3140                 hfs_unlock_global (hfsmp);
3141                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3142         }
3143
3144     return ret;
3145 }
3146
3147 int
3148 hfs_end_transaction(struct hfsmount *hfsmp)
3149 {
3150     int need_unlock=0, ret;
3151
3152     if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
3153             && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
3154             need_unlock = 1;
3155     }
3156
3157         if (hfsmp->jnl) {
3158                 ret = journal_end_transaction(hfsmp->jnl);
3159         } else {
3160                 ret = 0;
3161         }
3162
3163         if (need_unlock) {
3164                 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3165                 hfs_unlock_global (hfsmp);
3166                 hfs_sync_ejectable(hfsmp);
3167         }
3168
3169     return ret;
3170 }
3171
3172
3173 /*
3174  * Flush the contents of the journal to the disk.
3175  *
3176  *  Input:
3177  *      wait_for_IO -
3178  *      If TRUE, wait to write in-memory journal to the disk
3179  *      consistently, and also wait to write all asynchronous
3180  *      metadata blocks to its corresponding locations
3181  *      consistently on the disk.  This means that the journal
3182  *      is empty at this point and does not contain any
3183  *      transactions.  This is overkill in normal scenarios
3184  *      but is useful whenever the metadata blocks are required
3185  *      to be consistent on-disk instead of just the journal
3186  *      being consistent; like before live verification
3187  *      and live volume resizing.
3188  *
3189  *      If FALSE, only wait to write in-memory journal to the
3190  *      disk consistently.  This means that the journal still
3191  *      contains uncommitted transactions and the file system
3192  *      metadata blocks in the journal transactions might be
3193  *      written asynchronously to the disk.  But there is no
3194  *      guarantee that they are written to the disk before
3195  *      returning to the caller.  Note that this option is
3196  *      sufficient for file system data integrity as it
3197  *      guarantees consistent journal content on the disk.
3198  */
3199 int
3200 hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
3201 {
3202         int ret;
3203
3204         /* Only peek at hfsmp->jnl while holding the global lock */
3205         hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3206         if (hfsmp->jnl) {
3207                 ret = journal_flush(hfsmp->jnl, wait_for_IO);
3208         } else {
3209                 ret = 0;
3210         }
3211         hfs_unlock_global (hfsmp);
3212
3213         return ret;
3214 }
3215
3216
3217 /*
3218  * hfs_erase_unused_nodes
3219  *
3220  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3221  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
3222  * zeroes to the unused nodes.
3223  *
3224  * How do we detect when a volume needs this repair?  We can't always be
3225  * certain.  If a volume was created after a certain date, then it may have
3226  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
3227  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3228  * that means that the entire first clump must have been written to, which means
3229  * there shouldn't be unused and unwritten nodes in that first clump, and this
3230  * repair is not needed.
3231  *
3232  * We have defined a bit in the Volume Header's attributes to indicate when the
3233  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
3234  * As will fsck_hfs when it repairs the unused nodes.
3235  */
3236 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3237 {
3238         int result;
3239         struct filefork *catalog;
3240         int lockflags;
3241
3242         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3243         {
3244                 /* This volume has already been checked and repaired. */
3245                 return 0;
3246         }
3247
3248         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3249         {
3250                 /* This volume is too old to have had the problem. */
3251                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3252                 return 0;
3253         }
3254
3255         catalog = hfsmp->hfs_catalog_cp->c_datafork;
3256         if (catalog->ff_size > catalog->ff_clumpsize)
3257         {
3258                 /* The entire first clump must have been in use at some point. */
3259                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3260                 return 0;
3261         }
3262
3263         /*
3264          * If we get here, we need to zero out those unused nodes.
3265          *
3266          * We start a transaction and lock the catalog since we're going to be
3267          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
3268          * do its writing via the journal, because that would be too much I/O
3269          * to fit in a transaction, and it's a pain to break it up into multiple
3270          * transactions.  (It behaves more like growing a B-tree would.)
3271          */
3272         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3273         result = hfs_start_transaction(hfsmp);
3274         if (result)
3275                 goto done;
3276         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3277         result = BTZeroUnusedNodes(catalog);
3278         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3279         hfs_systemfile_unlock(hfsmp, lockflags);
3280         hfs_end_transaction(hfsmp);
3281         if (result == 0)
3282                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3283         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3284
3285 done:
3286         return result;
3287 }
3288
3289
3290 extern time_t snapshot_timestamp;
3291
3292 int
3293 check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3294 {
3295         int tracked_error = 0, snapshot_error = 0;
3296
3297         if (vp == NULL) {
3298                 return 0;
3299         }
3300
3301         /* Swap files are special; skip them */
3302         if (vnode_isswap(vp)) {
3303                 return 0;
3304         }
3305
3306         if (VTOC(vp)->c_bsdflags & UF_TRACKED) {
3307                 // the file has the tracked bit set, so send an event to the tracked-file handler
3308                 int error;
3309
3310                 // printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
3311                 error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
3312                 if (error) {
3313                         if (error == EAGAIN) {
3314                                 printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
3315
3316                         } else if (error == EINTR) {
3317                                 // printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
3318                                 tracked_error = EINTR;
3319                         }
3320                 }
3321         }
3322
3323         if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3324                 // the change time is within this epoch
3325                 int error;
3326
3327                 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3328                 if (error == EDEADLK) {
3329                         snapshot_error = 0;
3330                 } else if (error) {
3331                         if (error == EAGAIN) {
3332                                 printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3333                         } else if (error == EINTR) {
3334                                 // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3335                                 snapshot_error = EINTR;
3336                         }
3337                 }
3338         }
3339
3340         if (tracked_error) return tracked_error;
3341         if (snapshot_error) return snapshot_error;
3342
3343         return 0;
3344 }
3345
3346 int
3347 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3348 {
3349         int error;
3350
3351         if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3352                 // there's nothing to do, it's not dataless
3353                 return 0;
3354         }
3355
3356         /* Swap files are special; ignore them */
3357         if (vnode_isswap(vp)) {
3358                 return 0;
3359         }
3360
3361         // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3362         error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3363         if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3364                 error = 0;
3365         } else if (error) {
3366                 if (error == EAGAIN) {
3367                         printf("hfs: dataless: timed out waiting for namespace handler...\n");
3368                         // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3369                         return 0;
3370                 } else if (error == EINTR) {
3371                         // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3372                         return EINTR;
3373                 }
3374         } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3375                 //
3376                 // if we're here, the dataless bit is still set on the file
3377                 // which means it didn't get handled.  we return an error
3378                 // but it's presently ignored by all callers of this function.
3379                 //
3380                 // XXXdbg - EDATANOTPRESENT is what we really need...
3381                 //
3382                 return EBADF;
3383         }
3384
3385         return error;
3386 }
3387
3388
3389 //
3390 // NOTE: this function takes care of starting a transaction and
3391 //       acquiring the systemfile lock so that it can call
3392 //       cat_update().
3393 //
3394 // NOTE: do NOT hold and cnode locks while calling this function
3395 //       to avoid deadlocks (because we take a lock on the root
3396 //       cnode)
3397 //
3398 int
3399 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3400 {
3401         struct vnode *rvp;
3402         struct cnode *cp;
3403         int error;
3404
3405         error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3406         if (error) {
3407                 return error;
3408         }
3409
3410         cp = VTOC(rvp);
3411         if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3412                 return error;
3413         }
3414         struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3415
3416         int lockflags;
3417         if (hfs_start_transaction(hfsmp) != 0) {
3418                 return error;
3419         }
3420         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3421
3422         if (extinfo->document_id == 0) {
3423                 // initialize this to start at 3 (one greater than the root-dir id)
3424                 extinfo->document_id = 3;
3425         }
3426
3427         *docid = extinfo->document_id++;
3428
3429         // mark the root cnode dirty
3430         cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
3431         (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
3432
3433         hfs_systemfile_unlock (hfsmp, lockflags);
3434         (void) hfs_end_transaction(hfsmp);
3435
3436         (void) hfs_unlock(cp);
3437
3438         vnode_put(rvp);
3439         rvp = NULL;
3440
3441         return 0;
3442 }