bsd/hfs/hfs_vfsutils.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_vfsutils.c      4.0
  29 *
  30 *       (c) 1997-2002 Apple Computer, Inc.  All Rights Reserved
  31 *
  32 *       hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
  33 *
  34 */
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/malloc.h>
  39 #include <sys/stat.h>
  40 #include <sys/mount.h>
  41 #include <sys/mount_internal.h>
  42 #include <sys/buf.h>
  43 #include <sys/buf_internal.h>
  44 #include <sys/ubc.h>
  45 #include <sys/unistd.h>
  46 #include <sys/utfconv.h>
  47 #include <sys/kauth.h>
  48 #include <sys/fcntl.h>
  49 #include <sys/vnode_internal.h>
  50 #include <kern/clock.h>
  51
  52 #include <libkern/OSAtomic.h>
  53
  54 #include "hfs.h"
  55 #include "hfs_catalog.h"
  56 #include "hfs_dbg.h"
  57 #include "hfs_mount.h"
  58 #include "hfs_endian.h"
  59 #include "hfs_cnode.h"
  60 #include "hfs_fsctl.h"
  61
  62 #include "hfscommon/headers/FileMgrInternal.h"
  63 #include "hfscommon/headers/BTreesInternal.h"
  64 #include "hfscommon/headers/HFSUnicodeWrappers.h"
  65
  66 static void ReleaseMetaFileVNode(struct vnode *vp);
  67 static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
  68
  69 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
  70
  71
  72 //*******************************************************************************
  73 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
  74 //       hence are not in the right byte order on little endian machines. It is
  75 //       the responsibility of the finder and other clients to swap the data.
  76 //*******************************************************************************
  77
  78 //*******************************************************************************
  79 //      Routine:        hfs_MountHFSVolume
  80 //
  81 //
  82 //*******************************************************************************
  83 unsigned char hfs_catname[] = "Catalog B-tree";
  84 unsigned char hfs_extname[] = "Extents B-tree";
  85 unsigned char hfs_vbmname[] = "Volume Bitmap";
  86 unsigned char hfs_attrname[] = "Attribute B-tree";
  87 unsigned char hfs_startupname[] = "Startup File";
  88
  89
  90 __private_extern__
  91 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
  92                 __unused struct proc *p)
  93 {
  94         ExtendedVCB *vcb = HFSTOVCB(hfsmp);
  95         int error;
  96         ByteCount utf8chars;
  97         struct cat_desc cndesc;
  98         struct cat_attr cnattr;
  99         struct cat_fork fork;
 100
 101         /* Block size must be a multiple of 512 */
 102         if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
 103             (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
 104                 return (EINVAL);
 105
 106         /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
 107         if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 108             ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
 109                 return (EINVAL);
 110         }
 111         hfsmp->hfs_flags |= HFS_STANDARD;
 112         /*
 113          * The MDB seems OK: transfer info from it into VCB
 114          * Note - the VCB starts out clear (all zeros)
 115          *
 116          */
 117         vcb->vcbSigWord         = SWAP_BE16 (mdb->drSigWord);
 118         vcb->vcbCrDate          = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 119         vcb->localCreateDate    = SWAP_BE32 (mdb->drCrDate);
 120         vcb->vcbLsMod           = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 121         vcb->vcbAtrb            = SWAP_BE16 (mdb->drAtrb);
 122         vcb->vcbNmFls           = SWAP_BE16 (mdb->drNmFls);
 123         vcb->vcbVBMSt           = SWAP_BE16 (mdb->drVBMSt);
 124         vcb->nextAllocation     = SWAP_BE16 (mdb->drAllocPtr);
 125         vcb->totalBlocks        = SWAP_BE16 (mdb->drNmAlBlks);
 126         vcb->allocLimit         = vcb->totalBlocks;
 127         vcb->blockSize          = SWAP_BE32 (mdb->drAlBlkSiz);
 128         vcb->vcbClpSiz          = SWAP_BE32 (mdb->drClpSiz);
 129         vcb->vcbAlBlSt          = SWAP_BE16 (mdb->drAlBlSt);
 130         vcb->vcbNxtCNID         = SWAP_BE32 (mdb->drNxtCNID);
 131         vcb->freeBlocks         = SWAP_BE16 (mdb->drFreeBks);
 132         vcb->vcbVolBkUp         = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
 133         vcb->vcbWrCnt           = SWAP_BE32 (mdb->drWrCnt);
 134         vcb->vcbNmRtDirs        = SWAP_BE16 (mdb->drNmRtDirs);
 135         vcb->vcbFilCnt          = SWAP_BE32 (mdb->drFilCnt);
 136         vcb->vcbDirCnt          = SWAP_BE32 (mdb->drDirCnt);
 137         bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
 138         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 139                 vcb->vcbWrCnt++;        /* Compensate for write of MDB on last flush */
 140
 141         /* convert hfs encoded name into UTF-8 string */
 142         error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 143         /*
 144          * When an HFS name cannot be encoded with the current
 145          * volume encoding we use MacRoman as a fallback.
 146          */
 147         if (error || (utf8chars == 0)) {
 148                 (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 149                 /* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny mount */
 150                 if (error) {
 151                         goto MtVolErr;
 152                 }
 153         }
 154         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 155         vcb->vcbVBMIOSize = kHFSBlockSize;
 156
 157         hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 158                                                   hfsmp->hfs_logical_block_count);
 159
 160         bzero(&cndesc, sizeof(cndesc));
 161         cndesc.cd_parentcnid = kHFSRootParentID;
 162         cndesc.cd_flags |= CD_ISMETA;
 163         bzero(&cnattr, sizeof(cnattr));
 164         cnattr.ca_linkcount = 1;
 165         cnattr.ca_mode = S_IFREG;
 166         bzero(&fork, sizeof(fork));
 167
 168         /*
 169          * Set up Extents B-tree vnode
 170          */
 171         cndesc.cd_nameptr = hfs_extname;
 172         cndesc.cd_namelen = strlen((char *)hfs_extname);
 173         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 174         fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
 175         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 176         fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
 177         fork.cf_vblocks = 0;
 178         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
 179         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
 180         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
 181         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
 182         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
 183         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
 184         cnattr.ca_blocks = fork.cf_blocks;
 185
 186         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 187                                 &hfsmp->hfs_extents_vp);
 188         if (error) goto MtVolErr;
 189         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 190                                          (KeyCompareProcPtr)CompareExtentKeys));
 191         if (error) {
 192                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 193                 goto MtVolErr;
 194         }
 195         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 196
 197         /*
 198          * Set up Catalog B-tree vnode...
 199          */
 200         cndesc.cd_nameptr = hfs_catname;
 201         cndesc.cd_namelen = strlen((char *)hfs_catname);
 202         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 203         fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
 204         fork.cf_blocks = fork.cf_size / vcb->blockSize;
 205         fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
 206         fork.cf_vblocks = 0;
 207         fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
 208         fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
 209         fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
 210         fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
 211         fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
 212         fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
 213         cnattr.ca_blocks = fork.cf_blocks;
 214
 215         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 216                                 &hfsmp->hfs_catalog_vp);
 217         if (error) {
 218                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 219                 goto MtVolErr;
 220         }
 221         error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 222                                          (KeyCompareProcPtr)CompareCatalogKeys));
 223         if (error) {
 224                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 225                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 226                 goto MtVolErr;
 227         }
 228         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 229
 230         /*
 231          * Set up dummy Allocation file vnode (used only for locking bitmap)
 232          */
 233         cndesc.cd_nameptr = hfs_vbmname;
 234         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 235         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 236         bzero(&fork, sizeof(fork));
 237         cnattr.ca_blocks = 0;
 238
 239         error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
 240                                  &hfsmp->hfs_allocation_vp);
 241         if (error) {
 242                 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 243                 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 244                 goto MtVolErr;
 245         }
 246         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 247
 248     /* mark the volume dirty (clear clean unmount bit) */
 249         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 250
 251     if (error == noErr)
 252       {
 253                 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
 254       }
 255
 256     if ( error == noErr )
 257       {
 258         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )             //      if the disk is not write protected
 259           {
 260             MarkVCBDirty( vcb );                                                                //      mark VCB dirty so it will be written
 261           }
 262       }
 263
 264         /*
 265          * all done with system files so we can unlock now...
 266          */
 267         hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 268         hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 269         hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 270
 271         if (error == noErr) {
 272                 /* If successful, then we can just return once we've unlocked the cnodes */
 273                 return error;
 274         }
 275
 276     //--        Release any resources allocated so far before exiting with an error:
 277 MtVolErr:
 278         ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 279         ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 280         ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
 281
 282     return (error);
 283 }
 284
 285 //*******************************************************************************
 286 //      Routine:        hfs_MountHFSPlusVolume
 287 //
 288 //
 289 //*******************************************************************************
 290
 291 __private_extern__
 292 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 293         off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 294 {
 295         register ExtendedVCB *vcb;
 296         struct cat_desc cndesc;
 297         struct cat_attr cnattr;
 298         struct cat_fork cfork;
 299         u_int32_t blockSize;
 300         daddr64_t spare_sectors;
 301         struct BTreeInfoRec btinfo;
 302         u_int16_t  signature;
 303         u_int16_t  hfs_version;
 304         int  i;
 305         OSErr retval;
 306
 307         signature = SWAP_BE16(vhp->signature);
 308         hfs_version = SWAP_BE16(vhp->version);
 309
 310         if (signature == kHFSPlusSigWord) {
 311                 if (hfs_version != kHFSPlusVersion) {
 312                         printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version);
 313                         return (EINVAL);
 314                 }
 315         } else if (signature == kHFSXSigWord) {
 316                 if (hfs_version != kHFSXVersion) {
 317                         printf("hfs_mount: invalid HFSX version: %d\n", hfs_version);
 318                         return (EINVAL);
 319                 }
 320                 /* The in-memory signature is always 'H+'. */
 321                 signature = kHFSPlusSigWord;
 322                 hfsmp->hfs_flags |= HFS_X;
 323         } else {
 324                 /* Removed printf for invalid HFS+ signature because it gives
 325                  * false error for UFS root volume
 326                  */
 327                 return (EINVAL);
 328         }
 329
 330         /* Block size must be at least 512 and a power of 2 */
 331         blockSize = SWAP_BE32(vhp->blockSize);
 332         if (blockSize < 512 || !powerof2(blockSize))
 333                 return (EINVAL);
 334
 335         /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 336         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
 337             (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0)
 338                 return (EINVAL);
 339
 340         /* Make sure we can live with the physical block size. */
 341         if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 342             (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
 343             (blockSize < hfsmp->hfs_logical_block_size)) {
 344                 return (ENXIO);
 345         }
 346
 347         /* If allocation block size is less than the physical
 348          * block size, we assume that the physical block size
 349          * is same as logical block size.  The physical block
 350          * size value is used to round down the offsets for
 351          * reading and writing the primary and alternate volume
 352          * headers at physical block boundary and will cause
 353          * problems if it is less than the block size.
 354          */
 355         if (blockSize < hfsmp->hfs_physical_block_size) {
 356                 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
 357                 hfsmp->hfs_log_per_phys = 1;
 358         }
 359
 360         /*
 361          * The VolumeHeader seems OK: transfer info from it into VCB
 362          * Note - the VCB starts out clear (all zeros)
 363          */
 364         vcb = HFSTOVCB(hfsmp);
 365
 366         vcb->vcbSigWord = signature;
 367         vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
 368         vcb->vcbLsMod   = to_bsd_time(SWAP_BE32(vhp->modifyDate));
 369         vcb->vcbAtrb    = SWAP_BE32(vhp->attributes);
 370         vcb->vcbClpSiz  = SWAP_BE32(vhp->rsrcClumpSize);
 371         vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
 372         vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
 373         vcb->vcbWrCnt   = SWAP_BE32(vhp->writeCount);
 374         vcb->vcbFilCnt  = SWAP_BE32(vhp->fileCount);
 375         vcb->vcbDirCnt  = SWAP_BE32(vhp->folderCount);
 376
 377         /* copy 32 bytes of Finder info */
 378         bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
 379
 380         vcb->vcbAlBlSt = 0;             /* hfs+ allocation blocks start at first block of volume */
 381         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 382                 vcb->vcbWrCnt++;        /* compensate for write of Volume Header on last flush */
 383
 384         /* Now fill in the Extended VCB info */
 385         vcb->nextAllocation     = SWAP_BE32(vhp->nextAllocation);
 386         vcb->totalBlocks        = SWAP_BE32(vhp->totalBlocks);
 387         vcb->allocLimit         = vcb->totalBlocks;
 388         vcb->freeBlocks         = SWAP_BE32(vhp->freeBlocks);
 389         vcb->blockSize          = blockSize;
 390         vcb->encodingsBitmap    = SWAP_BE64(vhp->encodingsBitmap);
 391         vcb->localCreateDate    = SWAP_BE32(vhp->createDate);
 392
 393         vcb->hfsPlusIOPosOffset = embeddedOffset;
 394
 395         /* Default to no free block reserve */
 396         vcb->reserveBlocks = 0;
 397
 398         /*
 399          * Update the logical block size in the mount struct
 400          * (currently set up from the wrapper MDB) using the
 401          * new blocksize value:
 402          */
 403         hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 404         vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 405
 406         /*
 407          * Validate and initialize the location of the alternate volume header.
 408          */
 409         spare_sectors = hfsmp->hfs_logical_block_count -
 410                         (((daddr64_t)vcb->totalBlocks * blockSize) /
 411                            hfsmp->hfs_logical_block_size);
 412
 413         if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
 414                 hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
 415         } else {
 416                 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
 417                                            HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
 418                                                           hfsmp->hfs_logical_block_count);
 419         }
 420
 421         bzero(&cndesc, sizeof(cndesc));
 422         cndesc.cd_parentcnid = kHFSRootParentID;
 423         cndesc.cd_flags |= CD_ISMETA;
 424         bzero(&cnattr, sizeof(cnattr));
 425         cnattr.ca_linkcount = 1;
 426         cnattr.ca_mode = S_IFREG;
 427
 428         /*
 429          * Set up Extents B-tree vnode
 430          */
 431         cndesc.cd_nameptr = hfs_extname;
 432         cndesc.cd_namelen = strlen((char *)hfs_extname);
 433         cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 434
 435         cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
 436         cfork.cf_new_size= 0;
 437         cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
 438         cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
 439         cfork.cf_vblocks = 0;
 440         cnattr.ca_blocks = cfork.cf_blocks;
 441         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 442                 cfork.cf_extents[i].startBlock =
 443                                 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
 444                 cfork.cf_extents[i].blockCount =
 445                                 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 446         }
 447         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 448                                  &hfsmp->hfs_extents_vp);
 449         if (retval)
 450         {
 451                 goto ErrorExit;
 452         }
 453         hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
 454         hfs_unlock(hfsmp->hfs_extents_cp);
 455
 456         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 457                                           (KeyCompareProcPtr) CompareExtentKeysPlus));
 458         if (retval)
 459         {
 460                 goto ErrorExit;
 461         }
 462         /*
 463          * Set up Catalog B-tree vnode
 464          */
 465         cndesc.cd_nameptr = hfs_catname;
 466         cndesc.cd_namelen = strlen((char *)hfs_catname);
 467         cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
 468
 469         cfork.cf_size    = SWAP_BE64 (vhp->catalogFile.logicalSize);
 470         cfork.cf_clump   = SWAP_BE32 (vhp->catalogFile.clumpSize);
 471         cfork.cf_blocks  = SWAP_BE32 (vhp->catalogFile.totalBlocks);
 472         cfork.cf_vblocks = 0;
 473         cnattr.ca_blocks = cfork.cf_blocks;
 474         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 475                 cfork.cf_extents[i].startBlock =
 476                                 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
 477                 cfork.cf_extents[i].blockCount =
 478                                 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 479         }
 480         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 481                                  &hfsmp->hfs_catalog_vp);
 482         if (retval) {
 483                 goto ErrorExit;
 484         }
 485         hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
 486         hfs_unlock(hfsmp->hfs_catalog_cp);
 487
 488         retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 489                                           (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 490         if (retval) {
 491                 goto ErrorExit;
 492         }
 493         if ((hfsmp->hfs_flags & HFS_X) &&
 494             BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
 495                 if (btinfo.keyCompareType == kHFSBinaryCompare) {
 496                         hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
 497                         /* Install a case-sensitive key compare */
 498                         (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 499                                           (KeyCompareProcPtr)cat_binarykeycompare);
 500                 }
 501         }
 502
 503         /*
 504          * Set up Allocation file vnode
 505          */
 506         cndesc.cd_nameptr = hfs_vbmname;
 507         cndesc.cd_namelen = strlen((char *)hfs_vbmname);
 508         cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
 509
 510         cfork.cf_size    = SWAP_BE64 (vhp->allocationFile.logicalSize);
 511         cfork.cf_clump   = SWAP_BE32 (vhp->allocationFile.clumpSize);
 512         cfork.cf_blocks  = SWAP_BE32 (vhp->allocationFile.totalBlocks);
 513         cfork.cf_vblocks = 0;
 514         cnattr.ca_blocks = cfork.cf_blocks;
 515         for (i = 0; i < kHFSPlusExtentDensity; i++) {
 516                 cfork.cf_extents[i].startBlock =
 517                                 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
 518                 cfork.cf_extents[i].blockCount =
 519                                 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 520         }
 521         retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 522                                  &hfsmp->hfs_allocation_vp);
 523         if (retval) {
 524                 goto ErrorExit;
 525         }
 526         hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 527         hfs_unlock(hfsmp->hfs_allocation_cp);
 528
 529         /*
 530          * Set up Attribute B-tree vnode
 531          */
 532         if (vhp->attributesFile.totalBlocks != 0) {
 533                 cndesc.cd_nameptr = hfs_attrname;
 534                 cndesc.cd_namelen = strlen((char *)hfs_attrname);
 535                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
 536
 537                 cfork.cf_size    = SWAP_BE64 (vhp->attributesFile.logicalSize);
 538                 cfork.cf_clump   = SWAP_BE32 (vhp->attributesFile.clumpSize);
 539                 cfork.cf_blocks  = SWAP_BE32 (vhp->attributesFile.totalBlocks);
 540                 cfork.cf_vblocks = 0;
 541                 cnattr.ca_blocks = cfork.cf_blocks;
 542                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 543                         cfork.cf_extents[i].startBlock =
 544                                         SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
 545                         cfork.cf_extents[i].blockCount =
 546                                         SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 547                 }
 548                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 549                                          &hfsmp->hfs_attribute_vp);
 550                 if (retval) {
 551                         goto ErrorExit;
 552                 }
 553                 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
 554                 hfs_unlock(hfsmp->hfs_attribute_cp);
 555                 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 556                                                   (KeyCompareProcPtr) hfs_attrkeycompare));
 557                 if (retval) {
 558                         goto ErrorExit;
 559                 }
 560         }
 561
 562         /*
 563          * Set up Startup file vnode
 564          */
 565         if (vhp->startupFile.totalBlocks != 0) {
 566                 cndesc.cd_nameptr = hfs_startupname;
 567                 cndesc.cd_namelen = strlen((char *)hfs_startupname);
 568                 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
 569
 570                 cfork.cf_size    = SWAP_BE64 (vhp->startupFile.logicalSize);
 571                 cfork.cf_clump   = SWAP_BE32 (vhp->startupFile.clumpSize);
 572                 cfork.cf_blocks  = SWAP_BE32 (vhp->startupFile.totalBlocks);
 573                 cfork.cf_vblocks = 0;
 574                 cnattr.ca_blocks = cfork.cf_blocks;
 575                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
 576                         cfork.cf_extents[i].startBlock =
 577                                         SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
 578                         cfork.cf_extents[i].blockCount =
 579                                         SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 580                 }
 581                 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
 582                                          &hfsmp->hfs_startup_vp);
 583                 if (retval) {
 584                         goto ErrorExit;
 585                 }
 586                 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
 587                 hfs_unlock(hfsmp->hfs_startup_cp);
 588         }
 589
 590         /* Pick up volume name and create date */
 591         retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL);
 592         if (retval) {
 593                 goto ErrorExit;
 594         }
 595         vcb->vcbCrDate = cnattr.ca_itime;
 596         vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 597         bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
 598         cat_releasedesc(&cndesc);
 599
 600         /* mark the volume dirty (clear clean unmount bit) */
 601         vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 602         if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
 603                 hfs_flushvolumeheader(hfsmp, TRUE, 0);
 604         }
 605
 606         /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
 607         if ((hfsmp->hfs_flags & HFS_X) != 0) {
 608                 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
 609         }
 610
 611         //
 612         // Check if we need to do late journal initialization.  This only
 613         // happens if a previous version of MacOS X (or 9) touched the disk.
 614         // In that case hfs_late_journal_init() will go re-locate the journal
 615         // and journal_info_block files and validate that they're still kosher.
 616         //
 617         if (   (vcb->vcbAtrb & kHFSVolumeJournaledMask)
 618                 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
 619                 && (hfsmp->jnl == NULL)) {
 620
 621                 retval = hfs_late_journal_init(hfsmp, vhp, args);
 622                 if (retval != 0) {
 623                         if (retval == EROFS) {
 624                                 // EROFS is a special error code that means the volume has an external
 625                                 // journal which we couldn't find.  in that case we do not want to
 626                                 // rewrite the volume header - we'll just refuse to mount the volume.
 627                                 retval = EINVAL;
 628                                 goto ErrorExit;
 629                         }
 630
 631                         hfsmp->jnl = NULL;
 632
 633                         // if the journal failed to open, then set the lastMountedVersion
 634                         // to be "FSK!" which fsck_hfs will see and force the fsck instead
 635                         // of just bailing out because the volume is journaled.
 636                         if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
 637                                 HFSPlusVolumeHeader *jvhp;
 638                                 daddr64_t mdb_offset;
 639                                 struct buf *bp = NULL;
 640
 641                                 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 642
 643                                 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 644
 645                                 bp = NULL;
 646                                 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
 647                                                 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 648                                                 hfsmp->hfs_physical_block_size, cred, &bp);
 649                                 if (retval == 0) {
 650                                         jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 651
 652                                         if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
 653                                                 printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
 654                                                 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
 655                                                 buf_bwrite(bp);
 656                                         } else {
 657                                                 buf_brelse(bp);
 658                                         }
 659                                         bp = NULL;
 660                                 } else if (bp) {
 661                                         buf_brelse(bp);
 662                                         // clear this so the error exit path won't try to use it
 663                                         bp = NULL;
 664                             }
 665                         }
 666
 667                         retval = EINVAL;
 668                         goto ErrorExit;
 669                 } else if (hfsmp->jnl) {
 670                         vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 671                 }
 672         } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
 673                 struct cat_attr jinfo_attr, jnl_attr;
 674
 675                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 676                     vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
 677                 }
 678
 679                 // if we're here we need to fill in the fileid's for the
 680                 // journal and journal_info_block.
 681                 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
 682                 hfsmp->hfs_jnlfileid    = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
 683                 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
 684                         printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
 685                         printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
 686                 }
 687
 688                 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 689                     vcb->vcbAtrb |= kHFSVolumeJournaledMask;
 690                 }
 691
 692                 if (hfsmp->jnl == NULL) {
 693                     vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 694                 }
 695         }
 696
 697         /*
 698          * Establish a metadata allocation zone.
 699          */
 700         hfs_metadatazone_init(hfsmp);
 701
 702         /*
 703          * Make any metadata zone adjustments.
 704          */
 705         if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
 706                 /* Keep the roving allocator out of the metadata zone. */
 707                 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
 708                     vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
 709                         HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
 710                 }
 711         } else {
 712                 if (vcb->nextAllocation <= 1) {
 713                         vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
 714                 }
 715         }
 716         vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
 717
 718         /* Setup private/hidden directories for hardlinks. */
 719         hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
 720         hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
 721
 722         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 723                 hfs_remove_orphans(hfsmp);
 724
 725         /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 726         if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 727         {
 728                 retval = hfs_erase_unused_nodes(hfsmp);
 729                 if (retval)
 730                         goto ErrorExit;
 731         }
 732
 733         if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )     // if the disk is not write protected
 734         {
 735                 MarkVCBDirty( vcb );    // mark VCB dirty so it will be written
 736         }
 737
 738         /*
 739          * Allow hot file clustering if conditions allow.
 740          */
 741         if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
 742             ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
 743             ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
 744                 (void) hfs_recording_init(hfsmp);
 745         }
 746
 747         /* Force ACLs on HFS+ file systems. */
 748         vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 749
 750         /* Check if volume supports writing of extent-based extended attributes */
 751         hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE);
 752
 753         return (0);
 754
 755 ErrorExit:
 756         /*
 757          * A fatal error occurred and the volume cannot be mounted
 758          * release any resources that we aquired...
 759          */
 760         if (hfsmp->hfs_attribute_vp)
 761                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
 762         ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
 763         ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 764         ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 765
 766         return (retval);
 767 }
 768
 769
 770 /*
 771  * ReleaseMetaFileVNode
 772  *
 773  * vp   L - -
 774  */
 775 static void ReleaseMetaFileVNode(struct vnode *vp)
 776 {
 777         struct filefork *fp;
 778
 779         if (vp && (fp = VTOF(vp))) {
 780                 if (fp->fcbBTCBPtr != NULL) {
 781                         (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
 782                         (void) BTClosePath(fp);
 783                         hfs_unlock(VTOC(vp));
 784                 }
 785
 786                 /* release the node even if BTClosePath fails */
 787                 vnode_recycle(vp);
 788                 vnode_put(vp);
 789         }
 790 }
 791
 792
 793 /*************************************************************
 794 *
 795 * Unmounts a hfs volume.
 796 *       At this point vflush() has been called (to dump all non-metadata files)
 797 *
 798 *************************************************************/
 799
 800 __private_extern__
 801 int
 802 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
 803 {
 804         /* Get rid of our attribute data vnode (if any). */
 805         if (hfsmp->hfs_attrdata_vp) {
 806                 vnode_t advp = hfsmp->hfs_attrdata_vp;
 807
 808                 if (vnode_get(advp) == 0) {
 809                         vnode_rele_ext(advp, O_EVTONLY, 0);
 810                         vnode_put(advp);
 811                 }
 812                 hfsmp->hfs_attrdata_vp = NULLVP;
 813         }
 814
 815         if (hfsmp->hfs_startup_vp)
 816                 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
 817
 818         if (hfsmp->hfs_allocation_vp)
 819                 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
 820
 821         if (hfsmp->hfs_attribute_vp)
 822                 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
 823
 824         ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
 825         ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
 826
 827         /*
 828          * Setting these pointers to NULL so that any references
 829          * past this point will fail, and tell us the point of failure.
 830          * Also, facilitates a check in hfs_update for a null catalog
 831          * vp
 832          */
 833         hfsmp->hfs_allocation_vp = NULL;
 834         hfsmp->hfs_attribute_vp = NULL;
 835         hfsmp->hfs_catalog_vp = NULL;
 836         hfsmp->hfs_extents_vp = NULL;
 837         hfsmp->hfs_startup_vp = NULL;
 838
 839         return (0);
 840 }
 841
 842
 843 /*
 844  * Test if fork has overflow extents.
 845  */
 846 __private_extern__
 847 int
 848 overflow_extents(struct filefork *fp)
 849 {
 850         u_int32_t blocks;
 851
 852         //
 853         // If the vnode pointer is NULL then we're being called
 854         // from hfs_remove_orphans() with a faked-up filefork
 855         // and therefore it has to be an HFS+ volume.  Otherwise
 856         // we check through the volume header to see what type
 857         // of volume we're on.
 858         //
 859         if (FTOV(fp) == NULL || VTOVCB(FTOV(fp))->vcbSigWord == kHFSPlusSigWord) {
 860                 if (fp->ff_extents[7].blockCount == 0)
 861                         return (0);
 862
 863                 blocks = fp->ff_extents[0].blockCount +
 864                          fp->ff_extents[1].blockCount +
 865                          fp->ff_extents[2].blockCount +
 866                          fp->ff_extents[3].blockCount +
 867                          fp->ff_extents[4].blockCount +
 868                          fp->ff_extents[5].blockCount +
 869                          fp->ff_extents[6].blockCount +
 870                          fp->ff_extents[7].blockCount;
 871         } else {
 872                 if (fp->ff_extents[2].blockCount == 0)
 873                         return false;
 874
 875                 blocks = fp->ff_extents[0].blockCount +
 876                          fp->ff_extents[1].blockCount +
 877                          fp->ff_extents[2].blockCount;
 878           }
 879
 880         return (fp->ff_blocks > blocks);
 881 }
 882
 883
 884 /*
 885  * Lock HFS system file(s).
 886  */
 887 __private_extern__
 888 int
 889 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
 890 {
 891         /*
 892          * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
 893          */
 894         if (flags & SFL_CATALOG) {
 895
 896 #ifdef HFS_CHECK_LOCK_ORDER
 897                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
 898                         panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
 899                 }
 900                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
 901                         panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
 902                 }
 903                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
 904                         panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
 905                 }
 906 #endif /* HFS_CHECK_LOCK_ORDER */
 907
 908                 (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
 909                 /*
 910                  * When the catalog file has overflow extents then
 911                  * also acquire the extents b-tree lock if its not
 912                  * already requested.
 913                  */
 914                 if ((flags & SFL_EXTENTS) == 0 &&
 915                     overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) {
 916                         flags |= SFL_EXTENTS;
 917                 }
 918         }
 919         if (flags & SFL_ATTRIBUTE) {
 920
 921 #ifdef HFS_CHECK_LOCK_ORDER
 922                 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
 923                         panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
 924                 }
 925                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
 926                         panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
 927                 }
 928 #endif /* HFS_CHECK_LOCK_ORDER */
 929
 930                 if (hfsmp->hfs_attribute_cp) {
 931                         (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype);
 932                         /*
 933                          * When the attribute file has overflow extents then
 934                          * also acquire the extents b-tree lock if its not
 935                          * already requested.
 936                          */
 937                         if ((flags & SFL_EXTENTS) == 0 &&
 938                             overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) {
 939                                 flags |= SFL_EXTENTS;
 940                         }
 941                 } else {
 942                         flags &= ~SFL_ATTRIBUTE;
 943                 }
 944         }
 945         if (flags & SFL_STARTUP) {
 946 #ifdef HFS_CHECK_LOCK_ORDER
 947                 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
 948                         panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
 949                 }
 950 #endif /* HFS_CHECK_LOCK_ORDER */
 951
 952                 (void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
 953                 /*
 954                  * When the startup file has overflow extents then
 955                  * also acquire the extents b-tree lock if its not
 956                  * already requested.
 957                  */
 958                 if ((flags & SFL_EXTENTS) == 0 &&
 959                     overflow_extents(VTOF(hfsmp->hfs_startup_vp))) {
 960                         flags |= SFL_EXTENTS;
 961                 }
 962         }
 963         /*
 964          * To prevent locks being taken in the wrong order, the extent lock
 965          * gets a bitmap lock as well.
 966          */
 967         if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
 968                 /*
 969                  * Since the only bitmap operations are clearing and
 970                  * setting bits we always need exclusive access. And
 971                  * when we have a journal, we can "hide" behind that
 972                  * lock since we can only change the bitmap from
 973                  * within a transaction.
 974                  */
 975                 if (hfsmp->jnl || (hfsmp->hfs_allocation_cp == NULL)) {
 976                         flags &= ~SFL_BITMAP;
 977                 } else {
 978                         (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
 979                         /* The bitmap lock is also grabbed when only extent lock
 980                          * was requested. Set the bitmap lock bit in the lock
 981                          * flags which callers will use during unlock.
 982                          */
 983                         flags |= SFL_BITMAP;
 984                 }
 985         }
 986         if (flags & SFL_EXTENTS) {
 987                 /*
 988                  * Since the extents btree lock is recursive we always
 989                  * need exclusive access.
 990                  */
 991                 (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
 992         }
 993         return (flags);
 994 }
 995
 996 /*
 997  * unlock HFS system file(s).
 998  */
 999 __private_extern__
1000 void
1001 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1002 {
1003         struct timeval tv;
1004         u_int32_t lastfsync;
1005         int numOfLockedBuffs;
1006
1007         if (hfsmp->jnl == NULL) {
1008                 microuptime(&tv);
1009                 lastfsync = tv.tv_sec;
1010         }
1011         if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1012                 hfs_unlock(hfsmp->hfs_startup_cp);
1013         }
1014         if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1015                 if (hfsmp->jnl == NULL) {
1016                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1017                         numOfLockedBuffs = count_lock_queue();
1018                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1019                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1020                               kMaxSecsForFsync))) {
1021                                 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1022                         }
1023                 }
1024                 hfs_unlock(hfsmp->hfs_attribute_cp);
1025         }
1026         if (flags & SFL_CATALOG) {
1027                 if (hfsmp->jnl == NULL) {
1028                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1029                         numOfLockedBuffs = count_lock_queue();
1030                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1031                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1032                               kMaxSecsForFsync))) {
1033                                 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1034                         }
1035                 }
1036                 hfs_unlock(hfsmp->hfs_catalog_cp);
1037         }
1038         if (flags & SFL_BITMAP) {
1039                 hfs_unlock(hfsmp->hfs_allocation_cp);
1040         }
1041         if (flags & SFL_EXTENTS) {
1042                 if (hfsmp->jnl == NULL) {
1043                         BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1044                         numOfLockedBuffs = count_lock_queue();
1045                         if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1046                             ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1047                               kMaxSecsForFsync))) {
1048                                 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1049                         }
1050                 }
1051                 hfs_unlock(hfsmp->hfs_extents_cp);
1052         }
1053 }
1054
1055
1056 /*
1057  * RequireFileLock
1058  *
1059  * Check to see if a vnode is locked in the current context
1060  * This is to be used for debugging purposes only!!
1061  */
1062 #if HFS_DIAGNOSTIC
1063 void RequireFileLock(FileReference vp, int shareable)
1064 {
1065         int locked;
1066
1067         /* The extents btree and allocation bitmap are always exclusive. */
1068         if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1069             VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1070                 shareable = 0;
1071         }
1072
1073         locked = VTOC(vp)->c_lockowner == (void *)current_thread();
1074
1075         if (!locked && !shareable) {
1076                 switch (VTOC(vp)->c_fileid) {
1077                 case kHFSExtentsFileID:
1078                         panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1079                         break;
1080                 case kHFSCatalogFileID:
1081                         panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1082                         break;
1083                 case kHFSAllocationFileID:
1084                         /* The allocation file can hide behind the jornal lock. */
1085                         if (VTOHFS(vp)->jnl == NULL)
1086                                 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1087                         break;
1088                 case kHFSStartupFileID:
1089                         panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1090                 case kHFSAttributesFileID:
1091                         panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1092                         break;
1093                 }
1094         }
1095 }
1096 #endif
1097
1098
1099 /*
1100  * There are three ways to qualify for ownership rights on an object:
1101  *
1102  * 1. (a) Your UID matches the cnode's UID.
1103  *    (b) The object in question is owned by "unknown"
1104  * 2. (a) Permissions on the filesystem are being ignored and
1105  *        your UID matches the replacement UID.
1106  *    (b) Permissions on the filesystem are being ignored and
1107  *        the replacement UID is "unknown".
1108  * 3. You are root.
1109  *
1110  */
1111 int
1112 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1113                 __unused struct proc *p, int invokesuperuserstatus)
1114 {
1115         if ((kauth_cred_getuid(cred) == cnode_uid) ||                                    /* [1a] */
1116             (cnode_uid == UNKNOWNUID) ||                                                                          /* [1b] */
1117             ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) &&          /* [2] */
1118               ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) ||                            /* [2a] */
1119                 (hfsmp->hfs_uid == UNKNOWNUID))) ||                           /* [2b] */
1120             (invokesuperuserstatus && (suser(cred, 0) == 0))) {    /* [3] */
1121                 return (0);
1122         } else {
1123                 return (EPERM);
1124         }
1125 }
1126
1127
1128 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1129                                u_int32_t blockSizeLimit,
1130                                u_int32_t baseMultiple) {
1131     /*
1132        Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1133        specified limit but still an even multiple of the baseMultiple.
1134      */
1135     int baseBlockCount, blockCount;
1136     u_int32_t trialBlockSize;
1137
1138     if (allocationBlockSize % baseMultiple != 0) {
1139         /*
1140            Whoops: the allocation blocks aren't even multiples of the specified base:
1141            no amount of dividing them into even parts will be a multiple, either then!
1142         */
1143         return 512;             /* Hope for the best */
1144     };
1145
1146     /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1147        from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1148        Even though the former (the result of the loop below) is the larger allocation
1149        block size, the latter is more efficient: */
1150     if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1151
1152     /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1153     baseBlockCount = allocationBlockSize / baseMultiple;                                /* Now guaranteed to be an even multiple */
1154
1155     for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1156         trialBlockSize = blockCount * baseMultiple;
1157         if (allocationBlockSize % trialBlockSize == 0) {                                /* An even multiple? */
1158             if ((trialBlockSize <= blockSizeLimit) &&
1159                 (trialBlockSize % baseMultiple == 0)) {
1160                 return trialBlockSize;
1161             };
1162         };
1163     };
1164
1165     /* Note: we should never get here, since blockCount = 1 should always work,
1166        but this is nice and safe and makes the compiler happy, too ... */
1167     return 512;
1168 }
1169
1170
1171 __private_extern__
1172 u_int32_t
1173 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1174                         struct cat_attr *fattr, struct cat_fork *forkinfo)
1175 {
1176         struct hfsmount * hfsmp;
1177         struct cat_desc jdesc;
1178         int lockflags;
1179         int error;
1180
1181         if (vcb->vcbSigWord != kHFSPlusSigWord)
1182                 return (0);
1183
1184         hfsmp = VCBTOHFS(vcb);
1185
1186         memset(&jdesc, 0, sizeof(struct cat_desc));
1187         jdesc.cd_parentcnid = kRootDirID;
1188         jdesc.cd_nameptr = (const u_int8_t *)name;
1189         jdesc.cd_namelen = strlen(name);
1190
1191         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1192         error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL);
1193         hfs_systemfile_unlock(hfsmp, lockflags);
1194
1195         if (error == 0) {
1196                 return (fattr->ca_fileid);
1197         } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1198                 return (0);
1199         }
1200
1201         return (0);     /* XXX what callers expect on an error */
1202 }
1203
1204
1205 /*
1206  * On HFS Plus Volumes, there can be orphaned files or directories
1207  * These are files or directories that were unlinked while busy.
1208  * If the volume was not cleanly unmounted then some of these may
1209  * have persisted and need to be removed.
1210  */
1211 __private_extern__
1212 void
1213 hfs_remove_orphans(struct hfsmount * hfsmp)
1214 {
1215         struct BTreeIterator * iterator = NULL;
1216         struct FSBufferDescriptor btdata;
1217         struct HFSPlusCatalogFile filerec;
1218         struct HFSPlusCatalogKey * keyp;
1219         struct proc *p = current_proc();
1220         FCB *fcb;
1221         ExtendedVCB *vcb;
1222         char filename[32];
1223         char tempname[32];
1224         size_t namelen;
1225         cat_cookie_t cookie;
1226         int catlock = 0;
1227         int catreserve = 0;
1228         int started_tr = 0;
1229         int lockflags;
1230         int result;
1231         int orphaned_files = 0;
1232         int orphaned_dirs = 0;
1233
1234         bzero(&cookie, sizeof(cookie));
1235
1236         if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1237                 return;
1238
1239         vcb = HFSTOVCB(hfsmp);
1240         fcb = VTOF(hfsmp->hfs_catalog_vp);
1241
1242         btdata.bufferAddress = &filerec;
1243         btdata.itemSize = sizeof(filerec);
1244         btdata.itemCount = 1;
1245
1246         MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1247         bzero(iterator, sizeof(*iterator));
1248
1249         /* Build a key to "temp" */
1250         keyp = (HFSPlusCatalogKey*)&iterator->key;
1251         keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1252         keyp->nodeName.length = 4;  /* "temp" */
1253         keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1254         keyp->nodeName.unicode[0] = 't';
1255         keyp->nodeName.unicode[1] = 'e';
1256         keyp->nodeName.unicode[2] = 'm';
1257         keyp->nodeName.unicode[3] = 'p';
1258
1259         /*
1260          * Position the iterator just before the first real temp file/dir.
1261          */
1262         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1263         (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1264         hfs_systemfile_unlock(hfsmp, lockflags);
1265
1266         /* Visit all the temp files/dirs in the HFS+ private directory. */
1267         for (;;) {
1268                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1269                 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1270                 hfs_systemfile_unlock(hfsmp, lockflags);
1271                 if (result)
1272                         break;
1273                 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1274                         break;
1275
1276                 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1277                                       (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1278
1279                 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1280                                 HFS_DELETE_PREFIX, filerec.fileID);
1281
1282                 /*
1283                  * Delete all files (and directories) named "tempxxx",
1284                  * where xxx is the file's cnid in decimal.
1285                  *
1286                  */
1287                 if (bcmp(tempname, filename, namelen) == 0) {
1288                         struct filefork dfork;
1289                         struct filefork rfork;
1290                         struct cnode cnode;
1291
1292                         bzero(&dfork, sizeof(dfork));
1293                         bzero(&rfork, sizeof(rfork));
1294                         bzero(&cnode, sizeof(cnode));
1295
1296                         /* Delete any attributes, ignore errors */
1297                         (void) hfs_removeallattr(hfsmp, filerec.fileID);
1298
1299                         if (hfs_start_transaction(hfsmp) != 0) {
1300                             printf("hfs_remove_orphans: failed to start transaction\n");
1301                             goto exit;
1302                         }
1303                         started_tr = 1;
1304
1305                         /*
1306                          * Reserve some space in the Catalog file.
1307                          */
1308                         if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1309                             printf("hfs_remove_orphans: cat_preflight failed\n");
1310                                 goto exit;
1311                         }
1312                         catreserve = 1;
1313
1314                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1315                         catlock = 1;
1316
1317                         /* Build a fake cnode */
1318                         cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1319                                         &dfork.ff_data, &rfork.ff_data);
1320                         cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1321                         cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1322                         cnode.c_desc.cd_namelen = namelen;
1323                         cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1324                         cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1325
1326                         /* Position iterator at previous entry */
1327                         if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1328                             NULL, NULL) != 0) {
1329                                 break;
1330                         }
1331
1332                         /* Truncate the file to zero (both forks) */
1333                         if (dfork.ff_blocks > 0) {
1334                                 u_int64_t fsize;
1335
1336                                 dfork.ff_cp = &cnode;
1337                                 cnode.c_datafork = &dfork;
1338                                 cnode.c_rsrcfork = NULL;
1339                                 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1340                                 while (fsize > 0) {
1341                                     if (fsize > HFS_BIGFILE_SIZE && overflow_extents(&dfork)) {
1342                                                 fsize -= HFS_BIGFILE_SIZE;
1343                                         } else {
1344                                                 fsize = 0;
1345                                         }
1346
1347                                         if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) {
1348                                                 printf("hfs: error truncting data fork!\n");
1349                                                 break;
1350                                         }
1351
1352                                         //
1353                                         // if we're iteratively truncating this file down,
1354                                         // then end the transaction and start a new one so
1355                                         // that no one transaction gets too big.
1356                                         //
1357                                         if (fsize > 0 && started_tr) {
1358                                                 /* Drop system file locks before starting
1359                                                  * another transaction to preserve lock order.
1360                                                  */
1361                                                 hfs_systemfile_unlock(hfsmp, lockflags);
1362                                                 catlock = 0;
1363                                                 hfs_end_transaction(hfsmp);
1364
1365                                                 if (hfs_start_transaction(hfsmp) != 0) {
1366                                                         started_tr = 0;
1367                                                         break;
1368                                                 }
1369                                                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1370                                                 catlock = 1;
1371                                         }
1372                                 }
1373                         }
1374
1375                         if (rfork.ff_blocks > 0) {
1376                                 rfork.ff_cp = &cnode;
1377                                 cnode.c_datafork = NULL;
1378                                 cnode.c_rsrcfork = &rfork;
1379                                 if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) {
1380                                         printf("hfs: error truncting rsrc fork!\n");
1381                                         break;
1382                                 }
1383                         }
1384
1385                         /* Remove the file or folder record from the Catalog */
1386                         if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1387                                 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1388                                 hfs_systemfile_unlock(hfsmp, lockflags);
1389                                 catlock = 0;
1390                                 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1391                                 break;
1392                         }
1393
1394                         if (cnode.c_attr.ca_mode & S_IFDIR) {
1395                                 orphaned_dirs++;
1396                         }
1397                         else {
1398                                 orphaned_files++;
1399                         }
1400
1401                         /* Update parent and volume counts */
1402                         hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1403                         if (cnode.c_attr.ca_mode & S_IFDIR) {
1404                                 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1405                         }
1406
1407                         (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1408                                          &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1409
1410                         /* Drop locks and end the transaction */
1411                         hfs_systemfile_unlock(hfsmp, lockflags);
1412                         cat_postflight(hfsmp, &cookie, p);
1413                         catlock = catreserve = 0;
1414
1415                         /*
1416                            Now that Catalog is unlocked, update the volume info, making
1417                            sure to differentiate between files and directories
1418                         */
1419                         if (cnode.c_attr.ca_mode & S_IFDIR) {
1420                                 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1421                         }
1422                         else{
1423                                 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1424                         }
1425
1426                         if (started_tr) {
1427                                 hfs_end_transaction(hfsmp);
1428                                 started_tr = 0;
1429                         }
1430
1431                 } /* end if */
1432         } /* end for */
1433         if (orphaned_files > 0 || orphaned_dirs > 0)
1434                 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1435 exit:
1436         if (catlock) {
1437                 hfs_systemfile_unlock(hfsmp, lockflags);
1438         }
1439         if (catreserve) {
1440                 cat_postflight(hfsmp, &cookie, p);
1441         }
1442         if (started_tr) {
1443                 hfs_end_transaction(hfsmp);
1444         }
1445
1446         FREE(iterator, M_TEMP);
1447         hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1448 }
1449
1450
1451 /*
1452  * This will return the correct logical block size for a given vnode.
1453  * For most files, it is the allocation block size, for meta data like
1454  * BTrees, this is kept as part of the BTree private nodeSize
1455  */
1456 u_int32_t
1457 GetLogicalBlockSize(struct vnode *vp)
1458 {
1459 u_int32_t logBlockSize;
1460
1461         DBG_ASSERT(vp != NULL);
1462
1463         /* start with default */
1464         logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1465
1466         if (vnode_issystem(vp)) {
1467                 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1468                         BTreeInfoRec                    bTreeInfo;
1469
1470                         /*
1471                          * We do not lock the BTrees, because if we are getting block..then the tree
1472                          * should be locked in the first place.
1473                          * We just want the nodeSize wich will NEVER change..so even if the world
1474                          * is changing..the nodeSize should remain the same. Which argues why lock
1475                          * it in the first place??
1476                          */
1477
1478                         (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1479
1480                         logBlockSize = bTreeInfo.nodeSize;
1481
1482                 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1483                                 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1484                 }
1485         }
1486
1487         DBG_ASSERT(logBlockSize > 0);
1488
1489         return logBlockSize;
1490 }
1491
1492 __private_extern__
1493 u_int32_t
1494 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
1495 {
1496         u_int32_t freeblks;
1497         u_int32_t rsrvblks;
1498         u_int32_t loanblks;
1499
1500         /*
1501          * We don't bother taking the mount lock
1502          * to look at these values since the values
1503          * themselves are each updated atomically
1504          * on aligned addresses.
1505          */
1506         freeblks = hfsmp->freeBlocks;
1507         rsrvblks = hfsmp->reserveBlocks;
1508         loanblks = hfsmp->loanedBlocks;
1509         if (wantreserve) {
1510                 if (freeblks > rsrvblks)
1511                         freeblks -= rsrvblks;
1512                 else
1513                         freeblks = 0;
1514         }
1515         if (freeblks > loanblks)
1516                 freeblks -= loanblks;
1517         else
1518                 freeblks = 0;
1519
1520 #ifdef HFS_SPARSE_DEV
1521         /*
1522          * When the underlying device is sparse, check the
1523          * available space on the backing store volume.
1524          */
1525         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
1526                 struct vfsstatfs *vfsp;  /* 272 bytes */
1527                 u_int64_t vfreeblks;
1528                 u_int32_t loanedblks;
1529                 struct mount * backingfs_mp;
1530                 struct timeval now;
1531
1532                 backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp);
1533
1534                 microtime(&now);
1535                 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1536                     vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1537                     hfsmp->hfs_last_backingstatfs = now.tv_sec;
1538                 }
1539
1540                 if ((vfsp = vfs_statfs(backingfs_mp))) {
1541                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1542                         vfreeblks = vfsp->f_bavail;
1543                         /* Normalize block count if needed. */
1544                         if (vfsp->f_bsize != hfsmp->blockSize) {
1545                                 vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / hfsmp->blockSize;
1546                         }
1547                         if (vfreeblks > (unsigned int)hfsmp->hfs_sparsebandblks)
1548                                 vfreeblks -= hfsmp->hfs_sparsebandblks;
1549                         else
1550                                 vfreeblks = 0;
1551
1552                         /* Take into account any delayed allocations. */
1553                         loanedblks = 2 * hfsmp->loanedBlocks;
1554                         if (vfreeblks > loanedblks)
1555                                 vfreeblks -= loanedblks;
1556                         else
1557                                 vfreeblks = 0;
1558
1559                         if (hfsmp->hfs_backingfs_maxblocks) {
1560                                 vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks);
1561                         }
1562                         freeblks = MIN(vfreeblks, freeblks);
1563                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1564                 }
1565         }
1566 #endif /* HFS_SPARSE_DEV */
1567
1568         return (freeblks);
1569 }
1570
1571 /*
1572  * Map HFS Common errors (negative) to BSD error codes (positive).
1573  * Positive errors (ie BSD errors) are passed through unchanged.
1574  */
1575 short MacToVFSError(OSErr err)
1576 {
1577         if (err >= 0)
1578                 return err;
1579
1580         switch (err) {
1581         case dskFulErr:                 /*    -34 */
1582         case btNoSpaceAvail:            /* -32733 */
1583                 return ENOSPC;
1584         case fxOvFlErr:                 /* -32750 */
1585                 return EOVERFLOW;
1586
1587         case btBadNode:                 /* -32731 */
1588                 return EIO;
1589
1590         case memFullErr:                /*  -108 */
1591                 return ENOMEM;          /*   +12 */
1592
1593         case cmExists:                  /* -32718 */
1594         case btExists:                  /* -32734 */
1595                 return EEXIST;          /*    +17 */
1596
1597         case cmNotFound:                /* -32719 */
1598         case btNotFound:                /* -32735 */
1599                 return ENOENT;          /*     28 */
1600
1601         case cmNotEmpty:                /* -32717 */
1602                 return ENOTEMPTY;       /*     66 */
1603
1604         case cmFThdDirErr:              /* -32714 */
1605                 return EISDIR;          /*     21 */
1606
1607         case fxRangeErr:                /* -32751 */
1608                 return ERANGE;
1609
1610         case bdNamErr:                  /*   -37 */
1611                 return ENAMETOOLONG;    /*    63 */
1612
1613         case paramErr:                  /*   -50 */
1614         case fileBoundsErr:             /* -1309 */
1615                 return EINVAL;          /*   +22 */
1616
1617         case fsBTBadNodeSize:
1618                 return ENXIO;
1619
1620         default:
1621                 return EIO;             /*   +5 */
1622         }
1623 }
1624
1625
1626 /*
1627  * Find the current thread's directory hint for a given index.
1628  *
1629  * Requires an exclusive lock on directory cnode.
1630  *
1631  * Use detach if the cnode lock must be dropped while the hint is still active.
1632  */
1633 __private_extern__
1634 directoryhint_t *
1635 hfs_getdirhint(struct cnode *dcp, int index, int detach)
1636 {
1637         struct timeval tv;
1638         directoryhint_t *hint;
1639         boolean_t need_remove, need_init;
1640         const u_int8_t * name;
1641
1642         microuptime(&tv);
1643
1644         /*
1645          *  Look for an existing hint first.  If not found, create a new one (when
1646          *  the list is not full) or recycle the oldest hint.  Since new hints are
1647          *  always added to the head of the list, the last hint is always the
1648          *  oldest.
1649          */
1650         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1651                 if (hint->dh_index == index)
1652                         break;
1653         }
1654         if (hint != NULL) { /* found an existing hint */
1655                 need_init = false;
1656                 need_remove = true;
1657         } else { /* cannot find an existing hint */
1658                 need_init = true;
1659                 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
1660                         /* Create a default directory hint */
1661                         MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
1662                         ++dcp->c_dirhintcnt;
1663                         need_remove = false;
1664                 } else {                                /* recycle the last (i.e., the oldest) hint */
1665                         hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
1666                         if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
1667                             (name = hint->dh_desc.cd_nameptr)) {
1668                                 hint->dh_desc.cd_nameptr = NULL;
1669                                 hint->dh_desc.cd_namelen = 0;
1670                                 hint->dh_desc.cd_flags &= ~CD_HASBUF;
1671                                 vfs_removename((const char *)name);
1672                         }
1673                         need_remove = true;
1674                 }
1675         }
1676
1677         if (need_remove)
1678                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1679
1680         if (detach)
1681                 --dcp->c_dirhintcnt;
1682         else
1683                 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1684
1685         if (need_init) {
1686                 hint->dh_index = index;
1687                 hint->dh_desc.cd_flags = 0;
1688                 hint->dh_desc.cd_encoding = 0;
1689                 hint->dh_desc.cd_namelen = 0;
1690                 hint->dh_desc.cd_nameptr = NULL;
1691                 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
1692                 hint->dh_desc.cd_hint = dcp->c_childhint;
1693                 hint->dh_desc.cd_cnid = 0;
1694         }
1695         hint->dh_time = tv.tv_sec;
1696         return (hint);
1697 }
1698
1699 /*
1700  * Release a single directory hint.
1701  *
1702  * Requires an exclusive lock on directory cnode.
1703  */
1704 __private_extern__
1705 void
1706 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
1707 {
1708         const u_int8_t * name;
1709         directoryhint_t *hint;
1710
1711         /* Check if item is on list (could be detached) */
1712         TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
1713                 if (hint == relhint) {
1714                         TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
1715                         --dcp->c_dirhintcnt;
1716                         break;
1717                 }
1718         }
1719         name = relhint->dh_desc.cd_nameptr;
1720         if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1721                 relhint->dh_desc.cd_nameptr = NULL;
1722                 relhint->dh_desc.cd_namelen = 0;
1723                 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
1724                 vfs_removename((const char *)name);
1725         }
1726         FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
1727 }
1728
1729 /*
1730  * Release directory hints for given directory
1731  *
1732  * Requires an exclusive lock on directory cnode.
1733  */
1734 __private_extern__
1735 void
1736 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
1737 {
1738         struct timeval tv;
1739         directoryhint_t *hint, *prev;
1740         const u_int8_t * name;
1741
1742         if (stale_hints_only)
1743                 microuptime(&tv);
1744
1745         /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
1746         for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
1747                 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
1748                         break;  /* stop here if this entry is too new */
1749                 name = hint->dh_desc.cd_nameptr;
1750                 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
1751                         hint->dh_desc.cd_nameptr = NULL;
1752                         hint->dh_desc.cd_namelen = 0;
1753                         hint->dh_desc.cd_flags &= ~CD_HASBUF;
1754                         vfs_removename((const char *)name);
1755                 }
1756                 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
1757                 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
1758                 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
1759                 --dcp->c_dirhintcnt;
1760         }
1761 }
1762
1763 /*
1764  * Insert a detached directory hint back into the list of dirhints.
1765  *
1766  * Requires an exclusive lock on directory cnode.
1767  */
1768 __private_extern__
1769 void
1770 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
1771 {
1772         directoryhint_t *test;
1773
1774         TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
1775                 if (test == hint)
1776                         panic("hfs_insertdirhint: hint %p already on list!", hint);
1777         }
1778
1779         TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
1780         ++dcp->c_dirhintcnt;
1781 }
1782
1783 /*
1784  * Perform a case-insensitive compare of two UTF-8 filenames.
1785  *
1786  * Returns 0 if the strings match.
1787  */
1788 __private_extern__
1789 int
1790 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
1791 {
1792         u_int16_t *ustr1, *ustr2;
1793         size_t ulen1, ulen2;
1794         size_t maxbytes;
1795         int cmp = -1;
1796
1797         if (len1 != len2)
1798                 return (cmp);
1799
1800         maxbytes = kHFSPlusMaxFileNameChars << 1;
1801         MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
1802         ustr2 = ustr1 + (maxbytes >> 1);
1803
1804         if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
1805                 goto out;
1806         if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
1807                 goto out;
1808
1809         cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
1810 out:
1811         FREE(ustr1, M_TEMP);
1812         return (cmp);
1813 }
1814
1815
1816 typedef struct jopen_cb_info {
1817         off_t   jsize;
1818         char   *desired_uuid;
1819         struct  vnode *jvp;
1820         size_t  blksize;
1821         int     need_clean;
1822         int     need_init;
1823 } jopen_cb_info;
1824
1825 static int
1826 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
1827 {
1828         struct nameidata nd;
1829         jopen_cb_info *ji = (jopen_cb_info *)arg;
1830         char bsd_name[256];
1831         int error;
1832
1833         strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
1834         strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
1835
1836         if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
1837                 return 1;   // keep iterating
1838         }
1839
1840         // if we're here, either the desired uuid matched or there was no
1841         // desired uuid so let's try to open the device for writing and
1842         // see if it works.  if it does, we'll use it.
1843
1844         NDINIT(&nd, LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
1845         if ((error = namei(&nd))) {
1846                 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
1847                 return 1;   // keep iterating
1848         }
1849
1850         ji->jvp = nd.ni_vp;
1851         nameidone(&nd);
1852
1853         if (ji->jvp == NULL) {
1854                 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
1855         } else {
1856                 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
1857                 if (error == 0) {
1858                         // if the journal is dirty and we didn't specify a desired
1859                         // journal device uuid, then do not use the journal.  but
1860                         // if the journal is just invalid (e.g. it hasn't been
1861                         // initialized) then just set the need_init flag.
1862                         if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
1863                                 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
1864                                 if (error == EBUSY) {
1865                                         VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
1866                                         vnode_put(ji->jvp);
1867                                         ji->jvp = NULL;
1868                                         return 1;    // keep iterating
1869                                 } else if (error == EINVAL) {
1870                                         ji->need_init = 1;
1871                                 }
1872                         }
1873
1874                         if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
1875                                 strlcpy(ji->desired_uuid, uuid_str, 128);
1876                         }
1877                         vnode_setmountedon(ji->jvp);
1878                         // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str);
1879                         return 0;   // stop iterating
1880                 } else {
1881                         vnode_put(ji->jvp);
1882                         ji->jvp = NULL;
1883                 }
1884         }
1885
1886         return 1;   // keep iterating
1887 }
1888
1889 extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
1890 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
1891 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
1892 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
1893
1894
1895 static vnode_t
1896 open_journal_dev(const char *vol_device,
1897                  int need_clean,
1898                  char *uuid_str,
1899                  char *machine_serial_num,
1900                  off_t jsize,
1901                  size_t blksize,
1902                  int *need_init)
1903 {
1904     int retry_counter=0;
1905     jopen_cb_info ji;
1906
1907     ji.jsize        = jsize;
1908     ji.desired_uuid = uuid_str;
1909     ji.jvp          = NULL;
1910     ji.blksize      = blksize;
1911     ji.need_clean   = need_clean;
1912     ji.need_init    = 0;
1913
1914 //    if (uuid_str[0] == '\0') {
1915 //          printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
1916 //    } else {
1917 //          printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
1918 //    }
1919     while (ji.jvp == NULL && retry_counter++ < 4) {
1920             if (retry_counter > 1) {
1921                     if (uuid_str[0]) {
1922                             printf("hfs: open_journal_dev: uuid %s not found.  waiting 10sec.\n", uuid_str);
1923                     } else {
1924                             printf("hfs: open_journal_dev: no available external journal partition found.  waiting 10sec.\n");
1925                     }
1926                     delay_for_interval(10* 1000000, NSEC_PER_USEC);    // wait for ten seconds and then try again
1927             }
1928
1929             IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
1930     }
1931
1932     if (ji.jvp == NULL) {
1933             printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
1934                    vol_device, uuid_str, machine_serial_num);
1935     }
1936
1937     *need_init = ji.need_init;
1938
1939     return ji.jvp;
1940 }
1941
1942
1943 __private_extern__
1944 int
1945 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
1946                                            void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
1947                                            HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
1948 {
1949         JournalInfoBlock *jibp;
1950         struct buf       *jinfo_bp, *bp;
1951         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
1952         int               retval, write_jibp = 0;
1953         uint32_t                  blksize = hfsmp->hfs_logical_block_size;
1954         struct vnode     *devvp;
1955         struct hfs_mount_args *args = _args;
1956         u_int32_t         jib_flags;
1957         u_int64_t         jib_offset;
1958         u_int64_t         jib_size;
1959         const char *dev_name;
1960
1961         devvp = hfsmp->hfs_devvp;
1962         dev_name = vnode_name(devvp);
1963         if (dev_name == NULL) {
1964                 dev_name = "unknown-dev";
1965         }
1966
1967         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
1968                 arg_flags  = args->journal_flags;
1969                 arg_tbufsz = args->journal_tbuffer_size;
1970         }
1971
1972         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
1973
1974         jinfo_bp = NULL;
1975         retval = (int)buf_meta_bread(devvp,
1976                                                 (daddr64_t)((embeddedOffset/blksize) +
1977                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
1978                                                 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
1979         if (retval) {
1980                 if (jinfo_bp) {
1981                         buf_brelse(jinfo_bp);
1982                 }
1983                 return retval;
1984         }
1985
1986         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
1987         jib_flags  = SWAP_BE32(jibp->flags);
1988         jib_size   = SWAP_BE64(jibp->size);
1989
1990         if (jib_flags & kJIJournalInFSMask) {
1991                 hfsmp->jvp = hfsmp->hfs_devvp;
1992                 jib_offset = SWAP_BE64(jibp->offset);
1993         } else {
1994             int need_init=0;
1995
1996             // if the volume was unmounted cleanly then we'll pick any
1997             // available external journal partition
1998             //
1999             if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2000                     *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2001             }
2002
2003             hfsmp->jvp = open_journal_dev(dev_name,
2004                                           !(jib_flags & kJIJournalNeedInitMask),
2005                                           (char *)&jibp->ext_jnl_uuid[0],
2006                                           (char *)&jibp->machine_serial_num[0],
2007                                           jib_size,
2008                                           hfsmp->hfs_logical_block_size,
2009                                           &need_init);
2010             if (hfsmp->jvp == NULL) {
2011                 buf_brelse(jinfo_bp);
2012                 return EROFS;
2013             } else {
2014                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2015                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2016                     }
2017             }
2018
2019             jib_offset = 0;
2020             write_jibp = 1;
2021             if (need_init) {
2022                     jib_flags |= kJIJournalNeedInitMask;
2023             }
2024         }
2025
2026         // save this off for the hack-y check in hfs_remove()
2027         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2028         hfsmp->jnl_size  = jib_size;
2029
2030         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2031             // if the file system is read-only, check if the journal is empty.
2032             // if it is, then we can allow the mount.  otherwise we have to
2033             // return failure.
2034             retval = journal_is_clean(hfsmp->jvp,
2035                                       jib_offset + embeddedOffset,
2036                                       jib_size,
2037                                       devvp,
2038                                       hfsmp->hfs_logical_block_size);
2039
2040             hfsmp->jnl = NULL;
2041
2042             buf_brelse(jinfo_bp);
2043
2044             if (retval) {
2045                 const char *name = vnode_getname(devvp);
2046               printf("hfs: early journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2047                      name ? name : "");
2048                 if (name)
2049                         vnode_putname(name);
2050             }
2051
2052             return retval;
2053         }
2054
2055         if (jib_flags & kJIJournalNeedInitMask) {
2056                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2057                            jib_offset + embeddedOffset, jib_size);
2058                 hfsmp->jnl = journal_create(hfsmp->jvp,
2059                                                                         jib_offset + embeddedOffset,
2060                                                                         jib_size,
2061                                                                         devvp,
2062                                                                         blksize,
2063                                                                         arg_flags,
2064                                                                         arg_tbufsz,
2065                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
2066
2067                 // no need to start a transaction here... if this were to fail
2068                 // we'd just re-init it on the next mount.
2069                 jib_flags &= ~kJIJournalNeedInitMask;
2070                 jibp->flags  = SWAP_BE32(jib_flags);
2071                 buf_bwrite(jinfo_bp);
2072                 jinfo_bp = NULL;
2073                 jibp     = NULL;
2074         } else {
2075                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2076                 //         jib_offset + embeddedOffset,
2077                 //         jib_size, SWAP_BE32(vhp->blockSize));
2078
2079                 hfsmp->jnl = journal_open(hfsmp->jvp,
2080                                                                   jib_offset + embeddedOffset,
2081                                                                   jib_size,
2082                                                                   devvp,
2083                                                                   blksize,
2084                                                                   arg_flags,
2085                                                                   arg_tbufsz,
2086                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
2087
2088                 if (write_jibp) {
2089                         buf_bwrite(jinfo_bp);
2090                 } else {
2091                         buf_brelse(jinfo_bp);
2092                 }
2093                 jinfo_bp = NULL;
2094                 jibp     = NULL;
2095
2096                 if (hfsmp->jnl && mdbp) {
2097                         // reload the mdb because it could have changed
2098                         // if the journal had to be replayed.
2099                         if (mdb_offset == 0) {
2100                                 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2101                         }
2102                         bp = NULL;
2103                         retval = (int)buf_meta_bread(devvp,
2104                                         HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2105                                         hfsmp->hfs_physical_block_size, cred, &bp);
2106                         if (retval) {
2107                                 if (bp) {
2108                                         buf_brelse(bp);
2109                                 }
2110                                 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2111                                            retval);
2112                                 return retval;
2113                         }
2114                         bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2115                         buf_brelse(bp);
2116                         bp = NULL;
2117                 }
2118         }
2119
2120
2121         //printf("journal @ 0x%x\n", hfsmp->jnl);
2122
2123         // if we expected the journal to be there and we couldn't
2124         // create it or open it then we have to bail out.
2125         if (hfsmp->jnl == NULL) {
2126                 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2127                 return EINVAL;
2128         }
2129
2130         return 0;
2131 }
2132
2133
2134 //
2135 // This function will go and re-locate the .journal_info_block and
2136 // the .journal files in case they moved (which can happen if you
2137 // run Norton SpeedDisk).  If we fail to find either file we just
2138 // disable journaling for this volume and return.  We turn off the
2139 // journaling bit in the vcb and assume it will get written to disk
2140 // later (if it doesn't on the next mount we'd do the same thing
2141 // again which is harmless).  If we disable journaling we don't
2142 // return an error so that the volume is still mountable.
2143 //
2144 // If the info we find for the .journal_info_block and .journal files
2145 // isn't what we had stored, we re-set our cached info and proceed
2146 // with opening the journal normally.
2147 //
2148 static int
2149 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2150 {
2151         JournalInfoBlock *jibp;
2152         struct buf       *jinfo_bp;
2153         int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2154         int               retval, write_jibp = 0, recreate_journal = 0;
2155         struct vnode     *devvp;
2156         struct cat_attr   jib_attr, jattr;
2157         struct cat_fork   jib_fork, jfork;
2158         ExtendedVCB      *vcb;
2159         u_int32_t            fid;
2160         struct hfs_mount_args *args = _args;
2161         u_int32_t         jib_flags;
2162         u_int64_t         jib_offset;
2163         u_int64_t         jib_size;
2164
2165         devvp = hfsmp->hfs_devvp;
2166         vcb = HFSTOVCB(hfsmp);
2167
2168         if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2169                 if (args->journal_disable) {
2170                         return 0;
2171                 }
2172
2173                 arg_flags  = args->journal_flags;
2174                 arg_tbufsz = args->journal_tbuffer_size;
2175         }
2176
2177         fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2178         if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2179                 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2180                            jib_fork.cf_extents[0].startBlock);
2181                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2182                 return 0;
2183         }
2184         hfsmp->hfs_jnlinfoblkid = fid;
2185
2186         // make sure the journal_info_block begins where we think it should.
2187         if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2188                 printf("hfs: The journal_info_block moved (was: %d; is: %d).  Fixing up\n",
2189                            SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2190
2191                 vcb->vcbJinfoBlock    = jib_fork.cf_extents[0].startBlock;
2192                 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2193                 recreate_journal = 1;
2194         }
2195
2196
2197         sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2198         jinfo_bp = NULL;
2199         retval = (int)buf_meta_bread(devvp,
2200                                                 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2201                                                 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2202                                                 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2203         if (retval) {
2204                 if (jinfo_bp) {
2205                         buf_brelse(jinfo_bp);
2206                 }
2207                 printf("hfs: can't read journal info block. disabling journaling.\n");
2208                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2209                 return 0;
2210         }
2211
2212         jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2213         jib_flags  = SWAP_BE32(jibp->flags);
2214         jib_offset = SWAP_BE64(jibp->offset);
2215         jib_size   = SWAP_BE64(jibp->size);
2216
2217         fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2218         if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2219                 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2220                            jfork.cf_extents[0].startBlock);
2221                 buf_brelse(jinfo_bp);
2222                 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2223                 return 0;
2224         }
2225         hfsmp->hfs_jnlfileid = fid;
2226
2227         // make sure the journal file begins where we think it should.
2228         if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2229                 printf("hfs: The journal file moved (was: %lld; is: %d).  Fixing up\n",
2230                            (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2231
2232                 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2233                 write_jibp   = 1;
2234                 recreate_journal = 1;
2235         }
2236
2237         // check the size of the journal file.
2238         if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2239                 printf("hfs: The journal file changed size! (was %lld; is %lld).  Fixing up.\n",
2240                            jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2241
2242                 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2243                 write_jibp = 1;
2244                 recreate_journal = 1;
2245         }
2246
2247         if (jib_flags & kJIJournalInFSMask) {
2248                 hfsmp->jvp = hfsmp->hfs_devvp;
2249                 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2250         } else {
2251             const char *dev_name;
2252             int need_init = 0;
2253
2254             dev_name = vnode_name(devvp);
2255             if (dev_name == NULL) {
2256                     dev_name = "unknown-dev";
2257             }
2258
2259             // since the journal is empty, just use any available external journal
2260             *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2261
2262             // this fills in the uuid of the device we actually get
2263             hfsmp->jvp = open_journal_dev(dev_name,
2264                                           !(jib_flags & kJIJournalNeedInitMask),
2265                                           (char *)&jibp->ext_jnl_uuid[0],
2266                                           (char *)&jibp->machine_serial_num[0],
2267                                           jib_size,
2268                                           hfsmp->hfs_logical_block_size,
2269                                           &need_init);
2270             if (hfsmp->jvp == NULL) {
2271                 buf_brelse(jinfo_bp);
2272                 return EROFS;
2273             } else {
2274                     if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2275                             strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2276                     }
2277             }
2278             jib_offset = 0;
2279             recreate_journal = 1;
2280             write_jibp = 1;
2281             if (need_init) {
2282                     jib_flags |= kJIJournalNeedInitMask;
2283             }
2284         }
2285
2286         // save this off for the hack-y check in hfs_remove()
2287         hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2288         hfsmp->jnl_size  = jib_size;
2289
2290         if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2291             // if the file system is read-only, check if the journal is empty.
2292             // if it is, then we can allow the mount.  otherwise we have to
2293             // return failure.
2294             retval = journal_is_clean(hfsmp->jvp,
2295                                       jib_offset,
2296                                       jib_size,
2297                                       devvp,
2298                                       hfsmp->hfs_logical_block_size);
2299
2300             hfsmp->jnl = NULL;
2301
2302             buf_brelse(jinfo_bp);
2303
2304             if (retval) {
2305                 const char *name = vnode_getname(devvp);
2306               printf("hfs: late journal init: volume on %s is read-only and journal is dirty.  Can not mount volume.\n",
2307                      name ? name : "");
2308                 if (name)
2309                         vnode_putname(name);
2310             }
2311
2312             return retval;
2313         }
2314
2315         if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2316                 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2317                            jib_offset, jib_size);
2318                 hfsmp->jnl = journal_create(hfsmp->jvp,
2319                                                                         jib_offset,
2320                                                                         jib_size,
2321                                                                         devvp,
2322                                                                         hfsmp->hfs_logical_block_size,
2323                                                                         arg_flags,
2324                                                                         arg_tbufsz,
2325                                                                         hfs_sync_metadata, hfsmp->hfs_mp);
2326
2327                 // no need to start a transaction here... if this were to fail
2328                 // we'd just re-init it on the next mount.
2329                 jib_flags &= ~kJIJournalNeedInitMask;
2330                 write_jibp   = 1;
2331
2332         } else {
2333                 //
2334                 // if we weren't the last person to mount this volume
2335                 // then we need to throw away the journal because it
2336                 // is likely that someone else mucked with the disk.
2337                 // if the journal is empty this is no big deal.  if the
2338                 // disk is dirty this prevents us from replaying the
2339                 // journal over top of changes that someone else made.
2340                 //
2341                 arg_flags |= JOURNAL_RESET;
2342
2343                 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2344                 //         jib_offset,
2345                 //         jib_size, SWAP_BE32(vhp->blockSize));
2346
2347                 hfsmp->jnl = journal_open(hfsmp->jvp,
2348                                                                   jib_offset,
2349                                                                   jib_size,
2350                                                                   devvp,
2351                                                                   hfsmp->hfs_logical_block_size,
2352                                                                   arg_flags,
2353                                                                   arg_tbufsz,
2354                                                                   hfs_sync_metadata, hfsmp->hfs_mp);
2355         }
2356
2357
2358         if (write_jibp) {
2359                 jibp->flags  = SWAP_BE32(jib_flags);
2360                 jibp->offset = SWAP_BE64(jib_offset);
2361                 jibp->size   = SWAP_BE64(jib_size);
2362
2363                 buf_bwrite(jinfo_bp);
2364         } else {
2365                 buf_brelse(jinfo_bp);
2366         }
2367         jinfo_bp = NULL;
2368         jibp     = NULL;
2369
2370         //printf("hfs: journal @ 0x%x\n", hfsmp->jnl);
2371
2372         // if we expected the journal to be there and we couldn't
2373         // create it or open it then we have to bail out.
2374         if (hfsmp->jnl == NULL) {
2375                 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2376                 return EINVAL;
2377         }
2378
2379         return 0;
2380 }
2381
2382 /*
2383  * Calculate the allocation zone for metadata.
2384  *
2385  * This zone includes the following:
2386  *      Allocation Bitmap file
2387  *      Overflow Extents file
2388  *      Journal file
2389  *      Quota files
2390  *      Clustered Hot files
2391  *      Catalog file
2392  *
2393  *                          METADATA ALLOCATION ZONE
2394  * ____________________________________________________________________________
2395  * |    |    |     |               |                              |           |
2396  * | BM | JF | OEF |    CATALOG    |--->                          | HOT FILES |
2397  * |____|____|_____|_______________|______________________________|___________|
2398  *
2399  * <------------------------------- N * 128 MB ------------------------------->
2400  *
2401  */
2402 #define GIGABYTE  (u_int64_t)(1024*1024*1024)
2403
2404 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2405 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2406 #define JOURNAL_DEFAULT_SIZE  (8*1024*1024)
2407 #define JOURNAL_MAXIMUM_SIZE  (512*1024*1024)
2408 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
2409 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
2410
2411 void
2412 hfs_metadatazone_init(struct hfsmount *hfsmp)
2413 {
2414         ExtendedVCB  *vcb;
2415         u_int64_t  fs_size;
2416         u_int64_t  zonesize;
2417         u_int64_t  temp;
2418         u_int64_t  filesize;
2419         u_int32_t  blk;
2420         int  items, really_do_it=1;
2421
2422         vcb = HFSTOVCB(hfsmp);
2423         fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2424
2425         /*
2426          * For volumes less than 10 GB, don't bother.
2427          */
2428         if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2429                 really_do_it = 0;
2430         }
2431
2432         /*
2433          * Skip non-journaled volumes as well.
2434          */
2435         if (hfsmp->jnl == NULL) {
2436                 really_do_it = 0;
2437         }
2438
2439         /*
2440          * Start with space for the boot blocks and Volume Header.
2441          * 1536 = byte offset from start of volume to end of volume header:
2442          * 1024 bytes is the offset from the start of the volume to the
2443          * start of the volume header (defined by the volume format)
2444          * + 512 bytes (the size of the volume header).
2445          */
2446         zonesize = roundup(1536, hfsmp->blockSize);
2447
2448         /*
2449          * Add the on-disk size of allocation bitmap.
2450          */
2451         zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2452
2453         /*
2454          * Add space for the Journal Info Block and Journal (if they're in
2455          * this file system).
2456          */
2457         if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
2458                 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
2459         }
2460
2461         /*
2462          * Add the existing size of the Extents Overflow B-tree.
2463          * (It rarely grows, so don't bother reserving additional room for it.)
2464          */
2465         zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2466
2467         /*
2468          * If there is an Attributes B-tree, leave room for 11 clumps worth.
2469          * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
2470          * When installing a full OS install onto a 20GB volume, we use
2471          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2472          * us with another 3 or 4 clumps worth before we need another extent.
2473          */
2474         if (hfsmp->hfs_attribute_cp) {
2475                 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
2476         }
2477
2478         /*
2479          * Leave room for 11 clumps of the Catalog B-tree.
2480          * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
2481          * When installing a full OS install onto a 20GB volume, we use
2482          * 7 to 8 clumps worth of space (depending on packages), so that leaves
2483          * us with another 3 or 4 clumps worth before we need another extent.
2484          */
2485         zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
2486
2487         /*
2488          * Add space for hot file region.
2489          *
2490          * ...for now, use 5 MB per 1 GB (0.5 %)
2491          */
2492         filesize = (fs_size / 1024) * 5;
2493         if (filesize > HOTBAND_MAXIMUM_SIZE)
2494                 filesize = HOTBAND_MAXIMUM_SIZE;
2495         else if (filesize < HOTBAND_MINIMUM_SIZE)
2496                 filesize = HOTBAND_MINIMUM_SIZE;
2497         /*
2498          * Calculate user quota file requirements.
2499          */
2500         if (hfsmp->hfs_flags & HFS_QUOTAS) {
2501                 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
2502                 if (items < QF_MIN_USERS)
2503                         items = QF_MIN_USERS;
2504                 else if (items > QF_MAX_USERS)
2505                         items = QF_MAX_USERS;
2506                 if (!powerof2(items)) {
2507                         int x = items;
2508                         items = 4;
2509                         while (x>>1 != 1) {
2510                                 x = x >> 1;
2511                                 items = items << 1;
2512                         }
2513                 }
2514                 filesize += (items + 1) * sizeof(struct dqblk);
2515                 /*
2516                  * Calculate group quota file requirements.
2517                  *
2518                  */
2519                 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
2520                 if (items < QF_MIN_GROUPS)
2521                         items = QF_MIN_GROUPS;
2522                 else if (items > QF_MAX_GROUPS)
2523                         items = QF_MAX_GROUPS;
2524                 if (!powerof2(items)) {
2525                         int x = items;
2526                         items = 4;
2527                         while (x>>1 != 1) {
2528                                 x = x >> 1;
2529                                 items = items << 1;
2530                         }
2531                 }
2532                 filesize += (items + 1) * sizeof(struct dqblk);
2533         }
2534         zonesize += filesize;
2535
2536         /*
2537          * Round up entire zone to a bitmap block's worth.
2538          * The extra space goes to the catalog file and hot file area.
2539          */
2540         temp = zonesize;
2541         zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
2542         hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
2543         /*
2544          * If doing the round up for hfs_min_alloc_start would push us past
2545          * allocLimit, then just reset it back to 0.  Though using a value
2546          * bigger than allocLimit would not cause damage in the block allocator
2547          * code, this value could get stored in the volume header and make it out
2548          * to disk, making the volume header technically corrupt.
2549          */
2550         if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
2551                 hfsmp->hfs_min_alloc_start = 0;
2552         }
2553
2554         if (really_do_it == 0) {
2555                 /* If metadata zone needs to be disabled because the
2556                  * volume was truncated, clear the bit and zero out
2557                  * the values that are no longer needed.
2558                  */
2559                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2560                         /* Disable metadata zone */
2561                         hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
2562
2563                         /* Zero out mount point values that are not required */
2564                         hfsmp->hfs_catalog_maxblks = 0;
2565                         hfsmp->hfs_hotfile_maxblks = 0;
2566                         hfsmp->hfs_hotfile_start = 0;
2567                         hfsmp->hfs_hotfile_end = 0;
2568                         hfsmp->hfs_hotfile_freeblks = 0;
2569                         hfsmp->hfs_metazone_start = 0;
2570                         hfsmp->hfs_metazone_end = 0;
2571                 }
2572
2573                 return;
2574         }
2575
2576         temp = zonesize - temp;  /* temp has extra space */
2577         filesize += temp / 3;
2578         hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
2579
2580         hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
2581
2582         /* Convert to allocation blocks. */
2583         blk = zonesize / vcb->blockSize;
2584
2585         /* The default metadata zone location is at the start of volume. */
2586         hfsmp->hfs_metazone_start = 1;
2587         hfsmp->hfs_metazone_end = blk - 1;
2588
2589         /* The default hotfile area is at the end of the zone. */
2590         hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
2591         hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
2592         hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
2593 #if 0
2594         printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
2595         printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
2596         printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
2597 #endif
2598         hfsmp->hfs_flags |= HFS_METADATA_ZONE;
2599 }
2600
2601
2602 static u_int32_t
2603 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
2604 {
2605         ExtendedVCB  *vcb = HFSTOVCB(hfsmp);
2606         int  lockflags;
2607         int  freeblocks;
2608
2609         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2610         freeblocks = MetaZoneFreeBlocks(vcb);
2611         hfs_systemfile_unlock(hfsmp, lockflags);
2612
2613         /* Minus Extents overflow file reserve. */
2614         freeblocks -=
2615                 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
2616         /* Minus catalog file reserve. */
2617         freeblocks -=
2618                 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
2619         if (freeblocks < 0)
2620                 freeblocks = 0;
2621
2622         return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
2623 }
2624
2625 /*
2626  * Determine if a file is a "virtual" metadata file.
2627  * This includes journal and quota files.
2628  */
2629 __private_extern__
2630 int
2631 hfs_virtualmetafile(struct cnode *cp)
2632 {
2633         const char * filename;
2634
2635
2636         if (cp->c_parentcnid != kHFSRootFolderID)
2637                 return (0);
2638
2639         filename = (const char *)cp->c_desc.cd_nameptr;
2640         if (filename == NULL)
2641                 return (0);
2642
2643         if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
2644             (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
2645             (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
2646             (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
2647             (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
2648                 return (1);
2649
2650         return (0);
2651 }
2652
2653
2654 //
2655 // Fire off a timed callback to sync the disk if the
2656 // volume is on ejectable media.
2657 //
2658  __private_extern__
2659 void
2660 hfs_sync_ejectable(struct hfsmount *hfsmp)
2661 {
2662         if (hfsmp->hfs_syncer)  {
2663                 clock_sec_t secs;
2664                 clock_usec_t usecs;
2665                 uint64_t now;
2666
2667                 clock_get_calendar_microtime(&secs, &usecs);
2668                 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2669
2670                 if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) {
2671                         // if we have a sync scheduled but i/o is starting to pile up,
2672                         // don't call thread_call_enter_delayed() again because that
2673                         // will defer the sync.
2674                         return;
2675                 }
2676
2677                 if (hfsmp->hfs_sync_scheduled == 0) {
2678                         uint64_t deadline;
2679
2680                         hfsmp->hfs_last_sync_request_time = now;
2681
2682                         clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
2683
2684                         /*
2685                          * Increment hfs_sync_scheduled on the assumption that we're the
2686                          * first thread to schedule the timer.  If some other thread beat
2687                          * us, then we'll decrement it.  If we *were* the first to
2688                          * schedule the timer, then we need to keep track that the
2689                          * callback is waiting to complete.
2690                          */
2691                         OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2692                         if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
2693                                 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
2694                         else
2695                                 OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2696                 }
2697         }
2698 }
2699
2700
2701 __private_extern__
2702 int
2703 hfs_start_transaction(struct hfsmount *hfsmp)
2704 {
2705         int ret, unlock_on_err=0;
2706         void * thread = current_thread();
2707
2708 #ifdef HFS_CHECK_LOCK_ORDER
2709         /*
2710          * You cannot start a transaction while holding a system
2711          * file lock. (unless the transaction is nested.)
2712          */
2713         if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
2714                 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
2715                         panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
2716                 }
2717                 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
2718                         panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
2719                 }
2720                 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
2721                         panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
2722                 }
2723         }
2724 #endif /* HFS_CHECK_LOCK_ORDER */
2725
2726     if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
2727         lck_rw_lock_shared(&hfsmp->hfs_global_lock);
2728         OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
2729         unlock_on_err = 1;
2730     }
2731
2732         /* If a downgrade to read-only mount is in progress, no other
2733          * process than the downgrade process is allowed to modify
2734          * the file system.
2735          */
2736         if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
2737                         (hfsmp->hfs_downgrading_proc != thread)) {
2738                 ret = EROFS;
2739                 goto out;
2740         }
2741
2742     if (hfsmp->jnl) {
2743         ret = journal_start_transaction(hfsmp->jnl);
2744         if (ret == 0) {
2745             OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
2746         }
2747     } else {
2748         ret = 0;
2749     }
2750
2751 out:
2752     if (ret != 0 && unlock_on_err) {
2753         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
2754         OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2755     }
2756
2757     return ret;
2758 }
2759
2760 __private_extern__
2761 int
2762 hfs_end_transaction(struct hfsmount *hfsmp)
2763 {
2764     int need_unlock=0, ret;
2765
2766     if (    hfsmp->jnl == NULL
2767         || (   journal_owner(hfsmp->jnl) == current_thread()
2768             && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
2769
2770             need_unlock = 1;
2771     }
2772
2773     if (hfsmp->jnl) {
2774         ret = journal_end_transaction(hfsmp->jnl);
2775     } else {
2776         ret = 0;
2777     }
2778
2779     if (need_unlock) {
2780         OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
2781         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
2782         hfs_sync_ejectable(hfsmp);
2783     }
2784
2785     return ret;
2786 }
2787
2788
2789 __private_extern__
2790 int
2791 hfs_journal_flush(struct hfsmount *hfsmp)
2792 {
2793         int ret;
2794
2795         /* Only peek at hfsmp->jnl while holding the global lock */
2796         lck_rw_lock_shared(&hfsmp->hfs_global_lock);
2797         if (hfsmp->jnl) {
2798                 ret = journal_flush(hfsmp->jnl);
2799         } else {
2800                 ret = 0;
2801         }
2802         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
2803
2804         return ret;
2805 }
2806
2807
2808 /*
2809  * hfs_erase_unused_nodes
2810  *
2811  * Check wheter a volume may suffer from unused Catalog B-tree nodes that
2812  * are not zeroed (due to <rdar://problem/6947811>).  If so, just write
2813  * zeroes to the unused nodes.
2814  *
2815  * How do we detect when a volume needs this repair?  We can't always be
2816  * certain.  If a volume was created after a certain date, then it may have
2817  * been created with the faulty newfs_hfs.  Since newfs_hfs only created one
2818  * clump, we can assume that if a Catalog B-tree is larger than its clump size,
2819  * that means that the entire first clump must have been written to, which means
2820  * there shouldn't be unused and unwritten nodes in that first clump, and this
2821  * repair is not needed.
2822  *
2823  * We have defined a bit in the Volume Header's attributes to indicate when the
2824  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
2825  * As will fsck_hfs when it repairs the unused nodes.
2826  */
2827 __private_extern__
2828 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
2829 {
2830         int result;
2831         struct filefork *catalog;
2832         int lockflags;
2833
2834         if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
2835         {
2836                 /* This volume has already been checked and repaired. */
2837                 return 0;
2838         }
2839
2840         if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
2841         {
2842                 /* This volume is too old to have had the problem. */
2843                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
2844                 return 0;
2845         }
2846
2847         catalog = hfsmp->hfs_catalog_cp->c_datafork;
2848         if (catalog->ff_size > catalog->ff_clumpsize)
2849         {
2850                 /* The entire first clump must have been in use at some point. */
2851                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
2852                 return 0;
2853         }
2854
2855         /*
2856          * If we get here, we need to zero out those unused nodes.
2857          *
2858          * We start a transaction and lock the catalog since we're going to be
2859          * making on-disk changes.  But note that BTZeroUnusedNodes doens't actually
2860          * do its writing via the journal, because that would be too much I/O
2861          * to fit in a transaction, and it's a pain to break it up into multiple
2862          * transactions.  (It behaves more like growing a B-tree would.)
2863          */
2864         printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
2865         result = hfs_start_transaction(hfsmp);
2866         if (result)
2867                 goto done;
2868         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
2869         result = BTZeroUnusedNodes(catalog);
2870         vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
2871         hfs_systemfile_unlock(hfsmp, lockflags);
2872         hfs_end_transaction(hfsmp);
2873         if (result == 0)
2874                 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
2875         printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
2876
2877 done:
2878         return result;
2879 }